spacr 0.3.38__py3-none-any.whl → 0.3.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/core.py +1 -1
- spacr/io.py +20 -13
- spacr/ml.py +33 -24
- spacr/plot.py +421 -37
- spacr/toxo.py +202 -16
- spacr/utils.py +4 -2
- {spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/METADATA +1 -1
- {spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/RECORD +12 -12
- {spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/LICENSE +0 -0
- {spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/WHEEL +0 -0
- {spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.38.dist-info → spacr-0.3.41.dist-info}/top_level.txt +0 -0
spacr/core.py
CHANGED
@@ -143,7 +143,7 @@ def preprocess_generate_masks(src, settings={}):
|
|
143
143
|
start = time.time()
|
144
144
|
if i+1 <= settings['examples_to_plot']:
|
145
145
|
file_path = os.path.join(merged_src, file)
|
146
|
-
plot_image_mask_overlay(file_path, settings['channels'], settings['cell_channel'], settings['nucleus_channel'], settings['pathogen_channel'], figuresize=10,
|
146
|
+
plot_image_mask_overlay(file_path, settings['channels'], settings['cell_channel'], settings['nucleus_channel'], settings['pathogen_channel'], figuresize=10, percentiles=(1,99), thickness=3, save_pdf=True)
|
147
147
|
stop = time.time()
|
148
148
|
duration = stop-start
|
149
149
|
time_ls.append(duration)
|
spacr/io.py
CHANGED
@@ -1686,11 +1686,16 @@ def preprocess_img_data(settings):
|
|
1686
1686
|
print(f'Found {extension_counts[most_common_extension]} {most_common_extension} files')
|
1687
1687
|
else:
|
1688
1688
|
print(f'Could not find any {valid_ext} files in {src} only found {extension_counts[0]}')
|
1689
|
-
|
1689
|
+
|
1690
|
+
|
1691
|
+
|
1692
|
+
|
1693
|
+
|
1694
|
+
if os.path.exists(os.path.join(src,'stack')):
|
1690
1695
|
print('Found existing stack folder.')
|
1691
|
-
if os.path.exists(src
|
1696
|
+
if os.path.exists(os.path.join(src,'channel_stack')):
|
1692
1697
|
print('Found existing channel_stack folder.')
|
1693
|
-
if os.path.exists(src
|
1698
|
+
if os.path.exists(os.path.join(src,'norm_channel_stack')):
|
1694
1699
|
print('Found existing norm_channel_stack folder. Skipping preprocessing')
|
1695
1700
|
return settings, src
|
1696
1701
|
|
@@ -1713,12 +1718,13 @@ def preprocess_img_data(settings):
|
|
1713
1718
|
|
1714
1719
|
src = _run_test_mode(settings['src'], regex, timelapse, test_images, random_test)
|
1715
1720
|
settings['src'] = src
|
1716
|
-
|
1721
|
+
|
1722
|
+
stack_path = os.path.join(src, 'stack')
|
1717
1723
|
if img_format == None:
|
1718
|
-
if not os.path.exists(
|
1724
|
+
if not os.path.exists(stack_path):
|
1719
1725
|
_merge_channels(src, plot=False)
|
1720
|
-
|
1721
|
-
if not os.path.exists(
|
1726
|
+
|
1727
|
+
if not os.path.exists(stack_path):
|
1722
1728
|
try:
|
1723
1729
|
if not img_format == None:
|
1724
1730
|
if timelapse:
|
@@ -1727,7 +1733,7 @@ def preprocess_img_data(settings):
|
|
1727
1733
|
_rename_and_organize_image_files(src, regex, batch_size, pick_slice, skip_mode, metadata_type, img_format)
|
1728
1734
|
|
1729
1735
|
#Make sure no batches will be of only one image
|
1730
|
-
all_imgs = len(
|
1736
|
+
all_imgs = len(stack_path)
|
1731
1737
|
full_batches = all_imgs // batch_size
|
1732
1738
|
last_batch_size = all_imgs % batch_size
|
1733
1739
|
|
@@ -1738,26 +1744,27 @@ def preprocess_img_data(settings):
|
|
1738
1744
|
raise ValueError("Only one batch of size 1 detected. Adjust the batch size.")
|
1739
1745
|
# If the last batch is of size 1, merge it with the second last batch
|
1740
1746
|
elif full_batches > 0:
|
1747
|
+
print(f"all images: {all_imgs}, full batch: {full_batches}, last batch: {last_batch_size}")
|
1741
1748
|
raise ValueError("Last batch of size 1 detected. Adjust the batch size.")
|
1742
1749
|
|
1743
1750
|
_merge_channels(src, plot=False)
|
1744
1751
|
|
1745
1752
|
if timelapse:
|
1746
|
-
_create_movies_from_npy_per_channel(
|
1753
|
+
_create_movies_from_npy_per_channel(stack_path, fps=2)
|
1747
1754
|
|
1748
1755
|
if plot:
|
1749
1756
|
print(f'plotting {nr} images from {src}/stack')
|
1750
|
-
plot_arrays(
|
1757
|
+
plot_arrays(stack_path, figuresize, cmap, nr=nr, normalize=normalize)
|
1751
1758
|
|
1752
1759
|
if all_to_mip:
|
1753
|
-
_mip_all(
|
1760
|
+
_mip_all(stack_path)
|
1754
1761
|
if plot:
|
1755
1762
|
print(f'plotting {nr} images from {src}/stack')
|
1756
|
-
plot_arrays(
|
1763
|
+
plot_arrays(stack_path, figuresize, cmap, nr=nr, normalize=normalize)
|
1757
1764
|
except Exception as e:
|
1758
1765
|
print(f"Error: {e}")
|
1759
1766
|
|
1760
|
-
concatenate_and_normalize(src=
|
1767
|
+
concatenate_and_normalize(src=stack_path,
|
1761
1768
|
channels=mask_channels,
|
1762
1769
|
save_dtype=np.float32,
|
1763
1770
|
settings=settings)
|
spacr/ml.py
CHANGED
@@ -134,7 +134,7 @@ def scale_variables(X, y):
|
|
134
134
|
|
135
135
|
return X_scaled, y_scaled
|
136
136
|
|
137
|
-
def process_model_coefficients(model, regression_type, X, y,
|
137
|
+
def process_model_coefficients(model, regression_type, X, y, nc, pc, controls):
|
138
138
|
"""Return DataFrame of model coefficients and p-values."""
|
139
139
|
if regression_type in ['ols', 'gls', 'wls', 'rlm', 'glm', 'mixed', 'quantile', 'logit', 'probit', 'poisson']:
|
140
140
|
coefs = model.params
|
@@ -169,8 +169,8 @@ def process_model_coefficients(model, regression_type, X, y, highlight):
|
|
169
169
|
coef_df['p_value'] = np.nan # Placeholder since sklearn doesn't provide p-values
|
170
170
|
|
171
171
|
coef_df['-log10(p_value)'] = -np.log10(coef_df['p_value'])
|
172
|
-
coef_df['
|
173
|
-
|
172
|
+
coef_df['grna'] = coef_df['feature'].str.extract(r'\[(.*?)\]')[0]
|
173
|
+
coef_df['condition'] = coef_df.apply(lambda row: 'nc' if nc in row['feature'] else 'pc' if pc in row['feature'] else ('control' if row['grna'] in controls else 'other'),axis=1)
|
174
174
|
return coef_df[~coef_df['feature'].str.contains('row|column')]
|
175
175
|
|
176
176
|
def prepare_formula(dependent_variable, random_row_column_effects=False):
|
@@ -284,15 +284,13 @@ def check_and_clean_data(df, dependent_variable):
|
|
284
284
|
df_cleaned['row'] = df['row']
|
285
285
|
df_cleaned['column'] = df['column']
|
286
286
|
|
287
|
-
#display(df_cleaned)
|
288
|
-
|
289
287
|
# Create a new column 'gene_fraction' that sums the fractions by gene within the same well
|
290
288
|
df_cleaned['gene_fraction'] = df_cleaned.groupby(['prc', 'gene'])['fraction'].transform('sum')
|
291
289
|
|
292
290
|
print("Data is ready for model fitting.")
|
293
291
|
return df_cleaned
|
294
292
|
|
295
|
-
def regression(df, csv_path, dependent_variable='predictions', regression_type=None, alpha=1.0, random_row_column_effects=False,
|
293
|
+
def regression(df, csv_path, dependent_variable='predictions', regression_type=None, alpha=1.0, random_row_column_effects=False, nc='233460', pc='220950', controls=[''], dst=None, cov_type=None, plot=False):
|
296
294
|
from .plot import volcano_plot, plot_histogram
|
297
295
|
|
298
296
|
# Generate the volcano filename
|
@@ -312,9 +310,7 @@ def regression(df, csv_path, dependent_variable='predictions', regression_type=N
|
|
312
310
|
if regression_type is None:
|
313
311
|
regression_type = 'ols' if is_normal else 'glm'
|
314
312
|
|
315
|
-
#display('before check_and_clean_data:',df)
|
316
313
|
df = check_and_clean_data(df, dependent_variable)
|
317
|
-
#display('after check_and_clean_data:',df)
|
318
314
|
|
319
315
|
# Handle mixed effects if row/column effect is treated as random
|
320
316
|
if random_row_column_effects:
|
@@ -340,10 +336,10 @@ def regression(df, csv_path, dependent_variable='predictions', regression_type=N
|
|
340
336
|
model = regression_model(X, y, regression_type=regression_type, groups=groups, alpha=alpha, cov_type=cov_type)
|
341
337
|
|
342
338
|
# Process the model coefficients
|
343
|
-
coef_df = process_model_coefficients(model, regression_type, X, y,
|
344
|
-
|
345
|
-
|
346
|
-
|
339
|
+
coef_df = process_model_coefficients(model, regression_type, X, y, nc, pc, controls)
|
340
|
+
|
341
|
+
if plot:
|
342
|
+
volcano_plot(coef_df, volcano_path)
|
347
343
|
|
348
344
|
return model, coef_df
|
349
345
|
|
@@ -487,19 +483,28 @@ def perform_regression(settings):
|
|
487
483
|
if settings['transform'] is None:
|
488
484
|
_ = plot_plates(score_data_df, variable=dependent_variable, grouping='mean', min_max='allq', cmap='viridis', min_count=settings['min_cell_count'], dst = res_folder)
|
489
485
|
|
490
|
-
model, coef_df = regression(merged_df, csv_path, dependent_variable, settings['regression_type'], settings['alpha'], settings['random_row_column_effects'],
|
486
|
+
model, coef_df = regression(merged_df, csv_path, dependent_variable, settings['regression_type'], settings['alpha'], settings['random_row_column_effects'], nc=settings['negative_control'], pc=settings['positive_control'], controls=settings['controls'], dst=res_folder, cov_type=settings['cov_type'])
|
491
487
|
|
492
488
|
coef_df['grna'] = coef_df['feature'].apply(lambda x: re.search(r'grna\[(.*?)\]', x).group(1) if 'grna' in x else None)
|
493
489
|
coef_df['gene'] = coef_df['feature'].apply(lambda x: re.search(r'gene\[(.*?)\]', x).group(1) if 'gene' in x else None)
|
494
490
|
coef_df = coef_df.merge(n_grna, how='left', on='grna')
|
495
491
|
coef_df = coef_df.merge(n_gene, how='left', on='gene')
|
496
|
-
display(coef_df)
|
497
492
|
|
498
493
|
gene_coef_df = coef_df[coef_df['n_gene'] != None]
|
499
494
|
grna_coef_df = coef_df[coef_df['n_grna'] != None]
|
500
495
|
gene_coef_df = gene_coef_df.dropna(subset=['n_gene'])
|
501
496
|
grna_coef_df = grna_coef_df.dropna(subset=['n_grna'])
|
502
497
|
|
498
|
+
if settings['controls'] is not None:
|
499
|
+
control_coef_df = grna_coef_df[grna_coef_df['grna'].isin(settings['controls'])]
|
500
|
+
mean_coef = control_coef_df['coefficient'].mean()
|
501
|
+
variance_coef = control_coef_df['coefficient'].var()
|
502
|
+
std_coef = control_coef_df['coefficient'].std()
|
503
|
+
reg_threshold = mean_coef + (3 * std_coef)
|
504
|
+
|
505
|
+
print('coef_df')
|
506
|
+
display(coef_df)
|
507
|
+
|
503
508
|
coef_df.to_csv(results_path, index=False)
|
504
509
|
gene_coef_df.to_csv(results_path_gene, index=False)
|
505
510
|
grna_coef_df.to_csv(results_path_grna, index=False)
|
@@ -509,7 +514,10 @@ def perform_regression(settings):
|
|
509
514
|
|
510
515
|
else:
|
511
516
|
significant = coef_df[coef_df['p_value']<= 0.05]
|
512
|
-
|
517
|
+
if settings['controls'] is not None:
|
518
|
+
significant_high = significant[significant['coefficient'] >= reg_threshold]
|
519
|
+
significant_low = significant[significant['coefficient'] <= reg_threshold]
|
520
|
+
significant = pd.concat([significant_high, significant_low])
|
513
521
|
significant.sort_values(by='coefficient', ascending=False, inplace=True)
|
514
522
|
significant = significant[~significant['feature'].str.contains('row|column')]
|
515
523
|
|
@@ -530,22 +538,24 @@ def perform_regression(settings):
|
|
530
538
|
grna_merged_df = merge_regression_res_with_metadata(results_path_grna, metadata_file, name=filename)
|
531
539
|
|
532
540
|
if settings['toxo']:
|
533
|
-
|
534
541
|
data_path = merged_df
|
535
542
|
data_path_gene = gene_merged_df
|
536
543
|
data_path_grna = grna_merged_df
|
537
544
|
base_dir = os.path.dirname(os.path.abspath(__file__))
|
538
545
|
metadata_path = os.path.join(base_dir, 'resources', 'data', 'lopit.csv')
|
539
|
-
|
540
|
-
custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', string_list=[settings['highlight']], point_size=50, figsize=20)
|
541
|
-
custom_volcano_plot(data_path_gene, metadata_path, metadata_column='tagm_location', string_list=[settings['highlight']], point_size=50, figsize=20)
|
542
|
-
custom_volcano_plot(data_path_grna, metadata_path, metadata_column='tagm_location', string_list=[settings['highlight']], point_size=50, figsize=20)
|
543
546
|
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
+
custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=200, figsize=20, threshold=reg_threshold, split_axis_lims=settings['split_axis_lims'])
|
548
|
+
#custom_volcano_plot(data_path_gene, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=reg_threshold)
|
549
|
+
#custom_volcano_plot(data_path_grna, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=reg_threshold)
|
550
|
+
|
551
|
+
#if len(significant) > 2:
|
552
|
+
# metadata_path = os.path.join(base_dir, 'resources', 'data', 'toxoplasma_metadata.csv')
|
553
|
+
# go_term_enrichment_by_column(significant, metadata_path)
|
547
554
|
|
548
555
|
print('Significant Genes')
|
556
|
+
grnas = significant['grna'].unique().tolist()
|
557
|
+
genes = significant['gene'].unique().tolist()
|
558
|
+
print(f"Found p<0.05 coedfficients for {len(grnas)} gRNAs and {len(genes)} genes")
|
549
559
|
display(significant)
|
550
560
|
|
551
561
|
output = {'results':coef_df,
|
@@ -763,7 +773,6 @@ def generate_ml_scores(settings):
|
|
763
773
|
raise ValueError("The 'png_list_df' DataFrame must contain 'prcfo' and 'test' columns.")
|
764
774
|
annotated_df = png_list_df[['prcfo', settings['annotation_column']]].set_index('prcfo')
|
765
775
|
df = annotated_df.merge(df, left_index=True, right_index=True)
|
766
|
-
#display(df)
|
767
776
|
unique_values = df[settings['annotation_column']].dropna().unique()
|
768
777
|
if len(unique_values) == 1:
|
769
778
|
unannotated_rows = df[df[settings['annotation_column']].isna()].index
|
spacr/plot.py
CHANGED
@@ -13,10 +13,11 @@ from IPython.display import display
|
|
13
13
|
from skimage.segmentation import find_boundaries
|
14
14
|
from skimage import measure
|
15
15
|
from skimage.measure import find_contours, label, regionprops
|
16
|
+
import tifffile as tiff
|
16
17
|
|
17
18
|
from scipy.stats import normaltest, ttest_ind, mannwhitneyu, f_oneway, kruskal
|
18
19
|
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
19
|
-
from scipy.stats import ttest_ind, mannwhitneyu, levene, wilcoxon, kruskal
|
20
|
+
from scipy.stats import ttest_ind, mannwhitneyu, levene, wilcoxon, kruskal, normaltest, shapiro
|
20
21
|
import itertools
|
21
22
|
import pingouin as pg
|
22
23
|
|
@@ -25,13 +26,26 @@ from IPython.display import Image as ipyimage
|
|
25
26
|
|
26
27
|
import matplotlib.patches as patches
|
27
28
|
from collections import defaultdict
|
29
|
+
from matplotlib.gridspec import GridSpec
|
28
30
|
|
29
|
-
def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10,
|
31
|
+
def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, pathogen_channel, figuresize=10, percentiles=(2,98), thickness=3, save_pdf=True, mode='outlines', export_tiffs=False):
|
30
32
|
"""Plot image and mask overlays."""
|
31
33
|
|
32
|
-
def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness):
|
34
|
+
def _plot_merged_plot(image, outlines, outline_colors, figuresize, thickness, percentiles, mode='outlines'):
|
33
35
|
"""Plot the merged plot with overlay, image channels, and masks."""
|
34
36
|
|
37
|
+
def _generate_colored_mask(mask, alpha):
|
38
|
+
""" Generate a colored mask with transparency using the given colormap. """
|
39
|
+
cmap = generate_mask_random_cmap(mask)
|
40
|
+
rgba_mask = cmap(mask / mask.max()) # Normalize mask and map to colormap (RGBA)
|
41
|
+
rgba_mask[..., 3] = np.where(mask > 0, alpha, 0) # Apply transparency only where mask is present
|
42
|
+
return rgba_mask
|
43
|
+
|
44
|
+
def _overlay_mask(image, mask):
|
45
|
+
"""Overlay the colored mask onto the original image."""
|
46
|
+
combined = np.clip(image + mask[..., :3] * mask[..., 3:4], 0, 1) # Ensure pixel values stay in [0, 1]
|
47
|
+
return combined
|
48
|
+
|
35
49
|
def _normalize_image(image, percentiles=(2, 98)):
|
36
50
|
"""Normalize the image to the given percentiles."""
|
37
51
|
v_min, v_max = np.percentile(image, percentiles)
|
@@ -61,11 +75,15 @@ def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, patho
|
|
61
75
|
# Plot each channel with its corresponding outlines
|
62
76
|
for v in range(num_channels):
|
63
77
|
channel_image = image[..., v]
|
64
|
-
channel_image_normalized = _normalize_image(channel_image)
|
78
|
+
channel_image_normalized = _normalize_image(channel_image, percentiles)
|
65
79
|
channel_image_rgb = np.dstack((channel_image_normalized, channel_image_normalized, channel_image_normalized))
|
66
80
|
|
67
81
|
for outline, color in zip(outlines, outline_colors):
|
68
|
-
|
82
|
+
if mode == 'outlines':
|
83
|
+
channel_image_rgb = _apply_contours(channel_image_rgb, outline, color, thickness)
|
84
|
+
else:
|
85
|
+
mask = _generate_colored_mask(outline, alpha=0.5)
|
86
|
+
channel_image_rgb = _overlay_mask(channel_image_rgb, mask)
|
69
87
|
|
70
88
|
ax[v].imshow(channel_image_rgb)
|
71
89
|
ax[v].set_title(f'Image - Channel {v}')
|
@@ -75,11 +93,15 @@ def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, patho
|
|
75
93
|
rgb_channels = min(3, num_channels)
|
76
94
|
for i in range(rgb_channels):
|
77
95
|
channel_image = image[..., i]
|
78
|
-
channel_image_normalized = _normalize_image(channel_image)
|
96
|
+
channel_image_normalized = _normalize_image(channel_image, percentiles)
|
79
97
|
rgb_image[..., i] = channel_image_normalized
|
80
98
|
|
81
99
|
for outline, color in zip(outlines, outline_colors):
|
82
|
-
|
100
|
+
if mode == 'outlines':
|
101
|
+
rgb_image = _apply_contours(rgb_image, outline, color, thickness)
|
102
|
+
else:
|
103
|
+
mask = _generate_colored_mask(outline, alpha=0.5)
|
104
|
+
rgb_image = _overlay_mask(rgb_image, mask)
|
83
105
|
|
84
106
|
ax[-1].imshow(rgb_image)
|
85
107
|
ax[-1].set_title('Combined RGB Image')
|
@@ -96,8 +118,22 @@ def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, patho
|
|
96
118
|
plt.show()
|
97
119
|
return fig
|
98
120
|
|
121
|
+
def _save_channels_as_tiff(stack, save_dir, filename):
|
122
|
+
"""Save each channel in the stack as a grayscale TIFF."""
|
123
|
+
os.makedirs(save_dir, exist_ok=True)
|
124
|
+
for i in range(stack.shape[-1]):
|
125
|
+
channel = stack[..., i]
|
126
|
+
tiff_path = os.path.join(save_dir, f"{filename}_channel_{i}.tiff")
|
127
|
+
tiff.imwrite(tiff_path, channel, photometric='minisblack')
|
128
|
+
print(f"Saved {tiff_path}")
|
129
|
+
|
99
130
|
stack = np.load(file)
|
100
131
|
|
132
|
+
if export_tiffs:
|
133
|
+
save_dir = os.path.join(os.path.dirname(os.path.dirname(file)), 'results', os.path.splitext(os.path.basename(file))[0], 'tiff')
|
134
|
+
filename = os.path.splitext(os.path.basename(file))[0]
|
135
|
+
_save_channels_as_tiff(stack, save_dir, filename)
|
136
|
+
|
101
137
|
# Convert to float for normalization and ensure correct handling of both 8-bit and 16-bit arrays
|
102
138
|
if stack.dtype == np.uint16:
|
103
139
|
stack = stack.astype(np.float32)
|
@@ -128,7 +164,7 @@ def plot_image_mask_overlay(file, channels, cell_channel, nucleus_channel, patho
|
|
128
164
|
outlines.append(np.take(stack, cell_mask_dim, axis=2))
|
129
165
|
outline_colors.append('red')
|
130
166
|
|
131
|
-
fig = _plot_merged_plot(image=image, outlines=outlines, outline_colors=outline_colors, figuresize=figuresize, thickness=thickness)
|
167
|
+
fig = _plot_merged_plot(image=image, outlines=outlines, outline_colors=outline_colors, figuresize=figuresize, thickness=thickness, percentiles=percentiles, mode=mode)
|
132
168
|
|
133
169
|
return fig
|
134
170
|
|
@@ -1691,17 +1727,25 @@ def plot_object_outlines(src, objects=['nucleus','cell','pathogen'], channels=[0
|
|
1691
1727
|
overlay=True,
|
1692
1728
|
max_nr=10,
|
1693
1729
|
randomize=True)
|
1694
|
-
|
1730
|
+
|
1695
1731
|
def volcano_plot(coef_df, filename='volcano_plot.pdf'):
|
1732
|
+
palette = {
|
1733
|
+
'pc': 'red',
|
1734
|
+
'nc': 'green',
|
1735
|
+
'control': 'blue',
|
1736
|
+
'other': 'gray'
|
1737
|
+
}
|
1738
|
+
|
1696
1739
|
# Create the volcano plot
|
1697
1740
|
plt.figure(figsize=(10, 6))
|
1698
1741
|
sns.scatterplot(
|
1699
1742
|
data=coef_df,
|
1700
1743
|
x='coefficient',
|
1701
1744
|
y='-log10(p_value)',
|
1702
|
-
hue='
|
1703
|
-
palette=
|
1745
|
+
hue='condition',
|
1746
|
+
palette=palette
|
1704
1747
|
)
|
1748
|
+
|
1705
1749
|
plt.title('Volcano Plot of Coefficients')
|
1706
1750
|
plt.xlabel('Coefficient')
|
1707
1751
|
plt.ylabel('-log10(p-value)')
|
@@ -2098,7 +2142,7 @@ class spacrGraph:
|
|
2098
2142
|
def __init__(self, df, grouping_column, data_column, graph_type='bar', summary_func='mean',
|
2099
2143
|
order=None, colors=None, output_dir='./output', save=False, y_lim=None,
|
2100
2144
|
error_bar_type='std', remove_outliers=False, theme='pastel', representation='object',
|
2101
|
-
paired=False, all_to_all=True, compare_group=None):
|
2145
|
+
paired=False, all_to_all=True, compare_group=None, graph_name=None):
|
2102
2146
|
|
2103
2147
|
"""
|
2104
2148
|
Class for creating grouped plots with optional statistical tests and data preprocessing.
|
@@ -2121,11 +2165,14 @@ class spacrGraph:
|
|
2121
2165
|
self.all_to_all = all_to_all
|
2122
2166
|
self.compare_group = compare_group
|
2123
2167
|
self.y_lim = y_lim
|
2168
|
+
self.graph_name = graph_name
|
2169
|
+
|
2170
|
+
|
2124
2171
|
self.results_df = pd.DataFrame()
|
2125
2172
|
self.sns_palette = None
|
2126
2173
|
self.fig = None
|
2127
2174
|
|
2128
|
-
self.results_name = str(self.data_column[0])+'_'+str(self.grouping_column)+'_'+str(self.graph_type)
|
2175
|
+
self.results_name = str(self.graph_name)+'_'+str(self.data_column[0])+'_'+str(self.grouping_column)+'_'+str(self.graph_type)
|
2129
2176
|
|
2130
2177
|
self._set_theme()
|
2131
2178
|
self.raw_df = self.df.copy()
|
@@ -2134,10 +2181,10 @@ class spacrGraph:
|
|
2134
2181
|
def _set_theme(self):
|
2135
2182
|
"""Set the Seaborn theme and reorder colors if necessary."""
|
2136
2183
|
integer_list = list(range(1, 81))
|
2137
|
-
color_order = [
|
2184
|
+
color_order = [7,9,4,0,3,6,2] + integer_list
|
2138
2185
|
self.sns_palette = self._set_reordered_theme(self.theme, color_order, 100)
|
2139
2186
|
|
2140
|
-
def _set_reordered_theme(self, theme='
|
2187
|
+
def _set_reordered_theme(self, theme='deep', order=None, n_colors=100, show_theme=False):
|
2141
2188
|
"""Set and reorder the Seaborn color palette."""
|
2142
2189
|
palette = sns.color_palette(theme, n_colors)
|
2143
2190
|
if order:
|
@@ -2182,20 +2229,36 @@ class spacrGraph:
|
|
2182
2229
|
"""Perform normality tests for each group and each data column."""
|
2183
2230
|
unique_groups = self.df[self.grouping_column].unique()
|
2184
2231
|
normality_results = []
|
2232
|
+
|
2185
2233
|
for column in self.data_column:
|
2186
|
-
|
2187
|
-
|
2188
|
-
|
2189
|
-
|
2190
|
-
|
2234
|
+
# Iterate over each group and its corresponding data
|
2235
|
+
for group in unique_groups:
|
2236
|
+
data = self.df.loc[self.df[self.grouping_column] == group, column]
|
2237
|
+
n_samples = len(data)
|
2238
|
+
|
2239
|
+
if n_samples >= 8:
|
2240
|
+
# Use D'Agostino-Pearson test for larger samples
|
2241
|
+
stat, p_value = normaltest(data)
|
2242
|
+
test_name = "D'Agostino-Pearson test"
|
2243
|
+
else:
|
2244
|
+
# Use Shapiro-Wilk test for smaller samples
|
2245
|
+
stat, p_value = shapiro(data)
|
2246
|
+
test_name = "Shapiro-Wilk test"
|
2247
|
+
|
2248
|
+
# Store the result for this group and column
|
2191
2249
|
normality_results.append({
|
2192
2250
|
'Comparison': f'Normality test for {group} on {column}',
|
2193
2251
|
'Test Statistic': stat,
|
2194
2252
|
'p-value': p_value,
|
2195
|
-
'Test Name':
|
2253
|
+
'Test Name': test_name,
|
2196
2254
|
'Column': column,
|
2197
|
-
'n':
|
2255
|
+
'n': n_samples # Sample size
|
2198
2256
|
})
|
2257
|
+
|
2258
|
+
# Check if all groups are normally distributed (p > 0.05)
|
2259
|
+
normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column]
|
2260
|
+
is_normal = all(p > 0.05 for p in normal_p_values)
|
2261
|
+
|
2199
2262
|
return is_normal, normality_results
|
2200
2263
|
|
2201
2264
|
def perform_levene_test(self, unique_groups):
|
@@ -2339,17 +2402,21 @@ class spacrGraph:
|
|
2339
2402
|
ax.text(x_pos, y_pos, text, ha='center', va='center', fontsize=12)
|
2340
2403
|
|
2341
2404
|
def _get_positions(self, ax):
|
2342
|
-
if self.graph_type
|
2405
|
+
if self.graph_type in ['bar','jitter_bar']:
|
2343
2406
|
x_positions = [np.mean(bar.get_paths()[0].vertices[:, 0]) for bar in ax.collections if hasattr(bar, 'get_paths')]
|
2344
2407
|
|
2345
2408
|
elif self.graph_type == 'violin':
|
2346
2409
|
x_positions = [np.mean(violin.get_paths()[0].vertices[:, 0]) for violin in ax.collections if hasattr(violin, 'get_paths')]
|
2347
2410
|
|
2348
|
-
elif self.graph_type
|
2411
|
+
elif self.graph_type in ['box', 'jitter_box']:
|
2349
2412
|
x_positions = list(set(line.get_xdata().mean() for line in ax.lines if line.get_linestyle() == '-'))
|
2350
2413
|
|
2351
2414
|
elif self.graph_type == 'jitter':
|
2352
2415
|
x_positions = [np.mean(collection.get_offsets()[:, 0]) for collection in ax.collections if collection.get_offsets().size > 0]
|
2416
|
+
|
2417
|
+
elif self.graph_type in ['line', 'line_std']:
|
2418
|
+
x_positions = []
|
2419
|
+
|
2353
2420
|
return x_positions
|
2354
2421
|
|
2355
2422
|
def _draw_comparison_lines(ax, x_positions):
|
@@ -2367,7 +2434,7 @@ class spacrGraph:
|
|
2367
2434
|
|
2368
2435
|
# Determine significance marker
|
2369
2436
|
if p_value <= 0.001:
|
2370
|
-
|
2437
|
+
signiresults_namecance = '***'
|
2371
2438
|
elif p_value <= 0.01:
|
2372
2439
|
significance = '**'
|
2373
2440
|
elif p_value <= 0.05:
|
@@ -2408,6 +2475,9 @@ class spacrGraph:
|
|
2408
2475
|
self.fig_width = (num_groups * self.bar_width) + (spacing_between_groups * num_groups)
|
2409
2476
|
self.fig_height = self.fig_width/2
|
2410
2477
|
|
2478
|
+
if self.graph_type in ['line','line_std']:
|
2479
|
+
self.fig_height, self.fig_width = 10, 10
|
2480
|
+
|
2411
2481
|
if ax is None:
|
2412
2482
|
self.fig, ax = plt.subplots(figsize=(self.fig_height, self.fig_width))
|
2413
2483
|
else:
|
@@ -2429,6 +2499,14 @@ class spacrGraph:
|
|
2429
2499
|
self._create_box_plot(ax)
|
2430
2500
|
elif self.graph_type == 'violin':
|
2431
2501
|
self._create_violin_plot(ax)
|
2502
|
+
elif self.graph_type == 'jitter_box':
|
2503
|
+
self._create_jitter_box_plot(ax)
|
2504
|
+
elif self.graph_type == 'jitter_bar':
|
2505
|
+
self._create_jitter_bar_plot(ax)
|
2506
|
+
elif self.graph_type == 'line':
|
2507
|
+
self._create_line_graph(ax)
|
2508
|
+
elif self.graph_type == 'line_std':
|
2509
|
+
self._create_line_with_std_area(ax)
|
2432
2510
|
else:
|
2433
2511
|
raise ValueError(f"Unknown graph type: {self.graph_type}")
|
2434
2512
|
|
@@ -2441,14 +2519,17 @@ class spacrGraph:
|
|
2441
2519
|
|
2442
2520
|
sns.despine(ax=ax, top=True, right=True)
|
2443
2521
|
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Data Column') # Move the legend outside the plot
|
2444
|
-
|
2522
|
+
|
2523
|
+
if not self.graph_type in ['line','line_std']:
|
2524
|
+
ax.set_xlabel('')
|
2525
|
+
|
2445
2526
|
x_positions = _get_positions(self, ax)
|
2446
2527
|
|
2447
|
-
if len(self.data_column) == 1:
|
2528
|
+
if len(self.data_column) == 1 and not self.graph_type in ['line','line_std']:
|
2448
2529
|
ax.legend().remove()
|
2449
2530
|
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
|
2450
2531
|
|
2451
|
-
elif len(self.data_column) > 1:
|
2532
|
+
elif len(self.data_column) > 1 and not self.graph_type in ['line','line_std']:
|
2452
2533
|
ax.set_xticks([])
|
2453
2534
|
ax.tick_params(bottom=False)
|
2454
2535
|
ax.set_xticklabels([])
|
@@ -2524,7 +2605,54 @@ class spacrGraph:
|
|
2524
2605
|
handles, labels = ax.get_legend_handles_labels()
|
2525
2606
|
unique_labels = dict(zip(labels, handles))
|
2526
2607
|
ax.legend(unique_labels.values(), unique_labels.keys(), loc='best')
|
2527
|
-
|
2608
|
+
|
2609
|
+
def _create_line_graph(self, ax):
|
2610
|
+
"""Helper method to create a line graph with one line per group based on epochs and accuracy."""
|
2611
|
+
#display(self.df)
|
2612
|
+
# Ensure epoch is used on the x-axis and accuracy on the y-axis
|
2613
|
+
x_axis_column = self.data_column[0]
|
2614
|
+
y_axis_column = self.data_column[1]
|
2615
|
+
|
2616
|
+
# Set hue to the grouping column to get one line per group
|
2617
|
+
hue = self.grouping_column
|
2618
|
+
|
2619
|
+
# Check if the required columns exist in the DataFrame
|
2620
|
+
required_columns = [x_axis_column, y_axis_column, self.grouping_column]
|
2621
|
+
for col in required_columns:
|
2622
|
+
if col not in self.df.columns:
|
2623
|
+
raise ValueError(f"Column '{col}' not found in DataFrame.")
|
2624
|
+
|
2625
|
+
# Create the line graph with one line per group
|
2626
|
+
sns.lineplot(data=self.df,x=x_axis_column,y=y_axis_column,hue=hue,palette=self.sns_palette,ax=ax,marker='o',linewidth=1,markersize=6)
|
2627
|
+
|
2628
|
+
# Adjust axis labels
|
2629
|
+
ax.set_xlabel(f"{x_axis_column}")
|
2630
|
+
ax.set_ylabel(f"{y_axis_column}")
|
2631
|
+
|
2632
|
+
def _create_line_with_std_area(self, ax):
|
2633
|
+
"""Helper method to create a line graph with shaded area representing standard deviation."""
|
2634
|
+
|
2635
|
+
x_axis_column = self.data_column[0]
|
2636
|
+
y_axis_column = self.data_column[1]
|
2637
|
+
y_axis_column_mean = f"mean_{y_axis_column}"
|
2638
|
+
y_axis_column_std = f"std_{y_axis_column_mean}"
|
2639
|
+
|
2640
|
+
# Pivot the DataFrame to get mean and std for each epoch across plates
|
2641
|
+
summary_df = self.df.pivot_table(index=x_axis_column,values=y_axis_column,aggfunc=['mean', 'std']).reset_index()
|
2642
|
+
|
2643
|
+
# Flatten MultiIndex columns (result of pivoting)
|
2644
|
+
summary_df.columns = [x_axis_column, y_axis_column_mean, y_axis_column_std]
|
2645
|
+
|
2646
|
+
# Plot the mean accuracy as a line
|
2647
|
+
sns.lineplot(data=summary_df,x=x_axis_column,y=y_axis_column_mean,ax=ax,marker='o',linewidth=1,markersize=0,color='blue',label=y_axis_column_mean)
|
2648
|
+
|
2649
|
+
# Fill the area representing the standard deviation
|
2650
|
+
ax.fill_between(summary_df[x_axis_column],summary_df[y_axis_column_mean] - summary_df[y_axis_column_std],summary_df[y_axis_column_mean] + summary_df[y_axis_column_std],color='blue', alpha=0.1 )
|
2651
|
+
|
2652
|
+
# Adjust axis labels
|
2653
|
+
ax.set_xlabel(f"{x_axis_column}")
|
2654
|
+
ax.set_ylabel(f"{y_axis_column}")
|
2655
|
+
|
2528
2656
|
def _create_box_plot(self, ax):
|
2529
2657
|
"""Helper method to create a box plot with consistent spacing."""
|
2530
2658
|
# Combine grouping column and data column if needed
|
@@ -2574,6 +2702,68 @@ class spacrGraph:
|
|
2574
2702
|
unique_labels = dict(zip(labels, handles))
|
2575
2703
|
ax.legend(unique_labels.values(), unique_labels.keys(), loc='best')
|
2576
2704
|
|
2705
|
+
def _create_jitter_bar_plot(self, ax):
|
2706
|
+
"""Helper method to create a bar plot with consistent bar thickness and centered error bars."""
|
2707
|
+
# Flatten DataFrame: Combine grouping column and data column into one group if needed
|
2708
|
+
if len(self.data_column) > 1:
|
2709
|
+
self.df_melted['Combined Group'] = (self.df_melted[self.grouping_column].astype(str) + " - " + self.df_melted['Data Column'].astype(str))
|
2710
|
+
x_axis_column = 'Combined Group'
|
2711
|
+
hue = None
|
2712
|
+
ax.set_ylabel('Value')
|
2713
|
+
else:
|
2714
|
+
x_axis_column = self.grouping_column
|
2715
|
+
ax.set_ylabel(self.data_column[0])
|
2716
|
+
hue = None
|
2717
|
+
|
2718
|
+
summary_df = self.df_melted.groupby([x_axis_column]).agg(mean=('Value', 'mean'),std=('Value', 'std'),sem=('Value', 'sem')).reset_index()
|
2719
|
+
error_bars = summary_df[self.error_bar_type] if self.error_bar_type in ['std', 'sem'] else None
|
2720
|
+
sns.barplot(data=self.df_melted, x=x_axis_column, y='Value', hue=self.hue, palette=self.sns_palette, ax=ax, dodge=self.jitter_bar_dodge, ci=None)
|
2721
|
+
sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6, edgecolor='white',linewidth=1, size=16)
|
2722
|
+
|
2723
|
+
# Adjust the bar width manually
|
2724
|
+
if len(self.data_column) > 1:
|
2725
|
+
bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
|
2726
|
+
target_width = self.bar_width * 2
|
2727
|
+
for bar in bars:
|
2728
|
+
bar.set_width(target_width) # Set new width
|
2729
|
+
# Center the bar on its x-coordinate
|
2730
|
+
bar.set_x(bar.get_x() - target_width / 2)
|
2731
|
+
|
2732
|
+
# Adjust error bars alignment with bars
|
2733
|
+
bars = [bar for bar in ax.patches if isinstance(bar, plt.Rectangle)]
|
2734
|
+
for bar, (_, row) in zip(bars, summary_df.iterrows()):
|
2735
|
+
x_bar = bar.get_x() + bar.get_width() / 2
|
2736
|
+
err = row[self.error_bar_type]
|
2737
|
+
ax.errorbar(x=x_bar, y=bar.get_height(), yerr=err, fmt='none', c='black', capsize=5, lw=2)
|
2738
|
+
|
2739
|
+
# Set legend and labels
|
2740
|
+
ax.set_xlabel(self.grouping_column)
|
2741
|
+
|
2742
|
+
def _create_jitter_box_plot(self, ax):
|
2743
|
+
"""Helper method to create a box plot with consistent spacing."""
|
2744
|
+
# Combine grouping column and data column if needed
|
2745
|
+
if len(self.data_column) > 1:
|
2746
|
+
self.df_melted['Combined Group'] = (self.df_melted[self.grouping_column].astype(str) + " - " + self.df_melted['Data Column'].astype(str))
|
2747
|
+
x_axis_column = 'Combined Group'
|
2748
|
+
hue = None
|
2749
|
+
ax.set_ylabel('Value')
|
2750
|
+
else:
|
2751
|
+
x_axis_column = self.grouping_column
|
2752
|
+
ax.set_ylabel(self.data_column[0])
|
2753
|
+
hue = None
|
2754
|
+
|
2755
|
+
# Create the box plot
|
2756
|
+
sns.boxplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue,palette=self.sns_palette,ax=ax)
|
2757
|
+
sns.stripplot(data=self.df_melted,x=x_axis_column,y='Value',hue=self.hue, palette=self.sns_palette, dodge=self.jitter_bar_dodge, jitter=self.bar_width, ax=ax,alpha=0.6, edgecolor='white',linewidth=1, size=12)
|
2758
|
+
|
2759
|
+
# Adjust legend and labels
|
2760
|
+
ax.set_xlabel(self.grouping_column)
|
2761
|
+
|
2762
|
+
# Manage the legend
|
2763
|
+
handles, labels = ax.get_legend_handles_labels()
|
2764
|
+
unique_labels = dict(zip(labels, handles))
|
2765
|
+
ax.legend(unique_labels.values(), unique_labels.keys(), loc='best')
|
2766
|
+
|
2577
2767
|
def _save_results(self):
|
2578
2768
|
"""Helper method to save the plot and results."""
|
2579
2769
|
os.makedirs(self.output_dir, exist_ok=True)
|
@@ -2594,14 +2784,14 @@ class spacrGraph:
|
|
2594
2784
|
|
2595
2785
|
def plot_data_from_db(settings):
|
2596
2786
|
from .io import _read_db, _read_and_merge_data
|
2597
|
-
from .utils import annotate_conditions
|
2787
|
+
from .utils import annotate_conditions, save_settings
|
2598
2788
|
"""
|
2599
2789
|
Extracts the specified table from the SQLite database and plots a specified column.
|
2600
2790
|
|
2601
2791
|
Args:
|
2602
2792
|
db_path (str): The path to the SQLite database.
|
2603
2793
|
table_names (str): The name of the table to extract.
|
2604
|
-
|
2794
|
+
data_column (str): The column to plot from the table.
|
2605
2795
|
|
2606
2796
|
Returns:
|
2607
2797
|
df (pd.DataFrame): The extracted table as a DataFrame.
|
@@ -2616,6 +2806,8 @@ def plot_data_from_db(settings):
|
|
2616
2806
|
else:
|
2617
2807
|
raise ValueError("src must be a string or a list of strings.")
|
2618
2808
|
|
2809
|
+
save_settings(settings, name=f"{settings['graph_name']}_plot_settings_db", show=True)
|
2810
|
+
|
2619
2811
|
dfs = []
|
2620
2812
|
for i, src in enumerate(srcs):
|
2621
2813
|
|
@@ -2643,6 +2835,7 @@ def plot_data_from_db(settings):
|
|
2643
2835
|
df = pd.concat(dfs, axis=0)
|
2644
2836
|
df['prc'] = df['plate'].astype(str) + '_' + df['row'].astype(str) + '_' + df['col'].astype(str)
|
2645
2837
|
df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
|
2838
|
+
df['recruitment'] = df['pathogen_channel_1_mean_intensity'] / df['cytoplasm_channel_1_mean_intensity']
|
2646
2839
|
|
2647
2840
|
if settings['cell_plate_metadata'] != None:
|
2648
2841
|
df = df.dropna(subset='host_cell')
|
@@ -2653,24 +2846,91 @@ def plot_data_from_db(settings):
|
|
2653
2846
|
if settings['treatment_plate_metadata'] != None:
|
2654
2847
|
df = df.dropna(subset='treatment')
|
2655
2848
|
|
2656
|
-
df = df.dropna(subset=settings['
|
2849
|
+
df = df.dropna(subset=settings['data_column'])
|
2657
2850
|
df = df.dropna(subset=settings['grouping_column'])
|
2658
2851
|
|
2852
|
+
#df['class'] = df['png_path'].apply(lambda x: 'class_1' if 'class_1' in x else ('class_0' if 'class_0' in x else None))
|
2853
|
+
src = srcs[0]
|
2854
|
+
dst = os.path.join(src, 'results', settings['graph_name'])
|
2855
|
+
os.makedirs(dst, exist_ok=True)
|
2856
|
+
|
2857
|
+
spacr_graph = spacrGraph(
|
2858
|
+
df=df, # Your DataFrame
|
2859
|
+
grouping_column=settings['grouping_column'], # Column for grouping the data (x-axis)
|
2860
|
+
data_column=settings['data_column'], # Column for the data (y-axis)
|
2861
|
+
graph_type=settings['graph_type'], # Type of plot ('bar', 'box', 'violin', 'jitter')
|
2862
|
+
graph_name=settings['graph_name'], # Name of the plot
|
2863
|
+
summary_func='mean', # Function to summarize data (e.g., 'mean', 'median')
|
2864
|
+
colors=None, # Custom colors for the plot (optional)
|
2865
|
+
output_dir=dst, # Directory to save the plot and results
|
2866
|
+
save=settings['save'], # Whether to save the plot and results
|
2867
|
+
y_lim=settings['y_lim'], # Starting point for y-axis (optional)
|
2868
|
+
error_bar_type='std', # Type of error bar ('std' or 'sem')
|
2869
|
+
representation=settings['representation'],
|
2870
|
+
theme=settings['theme'], # Seaborn color palette theme (e.g., 'pastel', 'muted')
|
2871
|
+
)
|
2872
|
+
|
2873
|
+
# Create the plot
|
2874
|
+
spacr_graph.create_plot()
|
2875
|
+
|
2876
|
+
# Get the figure object if needed
|
2877
|
+
fig = spacr_graph.get_figure()
|
2878
|
+
plt.show()
|
2879
|
+
|
2880
|
+
# Optional: Get the results DataFrame containing statistical test results
|
2881
|
+
results_df = spacr_graph.get_results()
|
2882
|
+
return fig, results_df
|
2883
|
+
|
2884
|
+
def plot_data_from_csv(settings):
|
2885
|
+
from .io import _read_db, _read_and_merge_data
|
2886
|
+
from .utils import annotate_conditions, save_settings
|
2887
|
+
"""
|
2888
|
+
Extracts the specified table from the SQLite database and plots a specified column.
|
2889
|
+
|
2890
|
+
Args:
|
2891
|
+
db_path (str): The path to the SQLite database.
|
2892
|
+
table_names (str): The name of the table to extract.
|
2893
|
+
data_column (str): The column to plot from the table.
|
2894
|
+
|
2895
|
+
Returns:
|
2896
|
+
df (pd.DataFrame): The extracted table as a DataFrame.
|
2897
|
+
"""
|
2659
2898
|
|
2899
|
+
if isinstance(settings['src'], str):
|
2900
|
+
srcs = [settings['src']]
|
2901
|
+
elif isinstance(settings['src'], list):
|
2902
|
+
srcs = settings['src']
|
2903
|
+
else:
|
2904
|
+
raise ValueError("src must be a string or a list of strings.")
|
2905
|
+
|
2906
|
+
#save_settings(settings, name=f"{settings['graph_name']}_plot_settings_csv", show=True)
|
2660
2907
|
|
2908
|
+
dfs = []
|
2909
|
+
for i, src in enumerate(srcs):
|
2661
2910
|
|
2911
|
+
dft = pd.read_csv(src)
|
2912
|
+
if 'plate' not in dft.columns:
|
2913
|
+
dft['plate'] = f"plate{i+1}"
|
2914
|
+
dfs.append(dft)
|
2915
|
+
|
2916
|
+
df = pd.concat(dfs, axis=0)
|
2662
2917
|
#display(df)
|
2663
2918
|
|
2664
|
-
|
2919
|
+
df = df.dropna(subset=settings['data_column'])
|
2920
|
+
df = df.dropna(subset=settings['grouping_column'])
|
2921
|
+
src = srcs[0]
|
2922
|
+
dst = os.path.join(os.path.dirname(src), 'results', settings['graph_name'])
|
2923
|
+
os.makedirs(dst, exist_ok=True)
|
2665
2924
|
|
2666
2925
|
spacr_graph = spacrGraph(
|
2667
2926
|
df=df, # Your DataFrame
|
2668
2927
|
grouping_column=settings['grouping_column'], # Column for grouping the data (x-axis)
|
2669
|
-
data_column=settings['
|
2928
|
+
data_column=settings['data_column'], # Column for the data (y-axis)
|
2670
2929
|
graph_type=settings['graph_type'], # Type of plot ('bar', 'box', 'violin', 'jitter')
|
2930
|
+
graph_name=settings['graph_name'], # Name of the plot
|
2671
2931
|
summary_func='mean', # Function to summarize data (e.g., 'mean', 'median')
|
2672
2932
|
colors=None, # Custom colors for the plot (optional)
|
2673
|
-
output_dir=
|
2933
|
+
output_dir=dst, # Directory to save the plot and results
|
2674
2934
|
save=settings['save'], # Whether to save the plot and results
|
2675
2935
|
y_lim=settings['y_lim'], # Starting point for y-axis (optional)
|
2676
2936
|
error_bar_type='std', # Type of error bar ('std' or 'sem')
|
@@ -2687,5 +2947,129 @@ def plot_data_from_db(settings):
|
|
2687
2947
|
|
2688
2948
|
# Optional: Get the results DataFrame containing statistical test results
|
2689
2949
|
results_df = spacr_graph.get_results()
|
2690
|
-
|
2691
|
-
|
2950
|
+
return fig, results_df
|
2951
|
+
|
2952
|
+
def plot_region(settings):
|
2953
|
+
|
2954
|
+
def _sort_paths_by_basename(paths):
|
2955
|
+
return sorted(paths, key=lambda path: os.path.basename(path))
|
2956
|
+
|
2957
|
+
def save_figure_as_pdf(fig, path):
|
2958
|
+
os.makedirs(os.path.dirname(path), exist_ok=True) # Create directory if it doesn't exist
|
2959
|
+
fig.savefig(path, format='pdf', dpi=600, bbox_inches='tight')
|
2960
|
+
print(f"Saved {path}")
|
2961
|
+
|
2962
|
+
from .io import _read_db
|
2963
|
+
fov_path = os.path.join(settings['src'], 'merged', settings['name'])
|
2964
|
+
name = os.path.splitext(settings['name'])[0]
|
2965
|
+
|
2966
|
+
db_path = os.path.join(settings['src'], 'measurements', 'measurements.db')
|
2967
|
+
paths_df = _read_db(db_path, tables=['png_list'])[0]
|
2968
|
+
paths_df = paths_df[paths_df['png_path'].str.contains(name, na=False)]
|
2969
|
+
|
2970
|
+
activation_mode = f"{settings['activation_mode']}_list"
|
2971
|
+
activation_db_path = os.path.join(settings['src'], 'measurements', settings['activation_db'])
|
2972
|
+
activation_paths_df = _read_db(activation_db_path, tables=[activation_mode])[0]
|
2973
|
+
activation_paths_df = activation_paths_df[activation_paths_df['png_path'].str.contains(name, na=False)]
|
2974
|
+
|
2975
|
+
png_paths = _sort_paths_by_basename(paths_df['png_path'].tolist())
|
2976
|
+
activation_paths = _sort_paths_by_basename(activation_paths_df['png_path'].tolist())
|
2977
|
+
|
2978
|
+
fig_3 = plot_image_grid(image_paths=activation_paths, percentiles=settings['percentiles'])
|
2979
|
+
fig_2 = plot_image_grid(image_paths=png_paths, percentiles=settings['percentiles'])
|
2980
|
+
fig_1 = plot_image_mask_overlay(file=fov_path,
|
2981
|
+
channels=settings['channels'],
|
2982
|
+
cell_channel=settings['cell_channel'],
|
2983
|
+
nucleus_channel=settings['nucleus_channel'],
|
2984
|
+
pathogen_channel=settings['pathogen_channel'],
|
2985
|
+
figuresize=10,
|
2986
|
+
percentiles=settings['percentiles'],
|
2987
|
+
thickness=3,
|
2988
|
+
save_pdf=False,
|
2989
|
+
mode=settings['mode'],
|
2990
|
+
export_tiffs=settings['export_tiffs'])
|
2991
|
+
|
2992
|
+
dst = os.path.join(settings['src'], 'results', name)
|
2993
|
+
save_figure_as_pdf(fig_1, os.path.join(dst, f"{name}_mask_overlay.pdf"))
|
2994
|
+
save_figure_as_pdf(fig_2, os.path.join(dst, f"{name}_png_grid.pdf"))
|
2995
|
+
save_figure_as_pdf(fig_3, os.path.join(dst, f"{name}_activation_grid.pdf"))
|
2996
|
+
|
2997
|
+
return fig_1, fig_2, fig_3
|
2998
|
+
|
2999
|
+
def plot_image_grid(image_paths, percentiles):
|
3000
|
+
"""
|
3001
|
+
Plots a square grid of images from a list of image paths.
|
3002
|
+
Unused subplots are filled with black, and padding is minimized.
|
3003
|
+
|
3004
|
+
Parameters:
|
3005
|
+
- image_paths: List of paths to images to be displayed.
|
3006
|
+
|
3007
|
+
Returns:
|
3008
|
+
- fig: The generated matplotlib figure.
|
3009
|
+
"""
|
3010
|
+
|
3011
|
+
from PIL import Image
|
3012
|
+
import matplotlib.pyplot as plt
|
3013
|
+
import math
|
3014
|
+
|
3015
|
+
def _normalize_image(image, percentiles=(2, 98)):
|
3016
|
+
""" Normalize the image to the given percentiles for each channel independently, preserving the input type (either PIL.Image or numpy.ndarray)."""
|
3017
|
+
|
3018
|
+
# Check if the input is a PIL image and convert it to a NumPy array
|
3019
|
+
is_pil_image = isinstance(image, Image.Image)
|
3020
|
+
if is_pil_image:
|
3021
|
+
image = np.array(image)
|
3022
|
+
|
3023
|
+
# If the image is single-channel, normalize directly
|
3024
|
+
if image.ndim == 2:
|
3025
|
+
v_min, v_max = np.percentile(image, percentiles)
|
3026
|
+
normalized_image = np.clip((image - v_min) / (v_max - v_min), 0, 1)
|
3027
|
+
else:
|
3028
|
+
# If multi-channel, normalize each channel independently
|
3029
|
+
normalized_image = np.zeros_like(image, dtype=np.float32)
|
3030
|
+
for c in range(image.shape[-1]):
|
3031
|
+
v_min, v_max = np.percentile(image[..., c], percentiles)
|
3032
|
+
normalized_image[..., c] = np.clip((image[..., c] - v_min) / (v_max - v_min), 0, 1)
|
3033
|
+
|
3034
|
+
# If the input was a PIL image, convert the result back to PIL format
|
3035
|
+
if is_pil_image:
|
3036
|
+
# Ensure the image is converted back to 8-bit range (0-255) for PIL
|
3037
|
+
normalized_image = (normalized_image * 255).astype(np.uint8)
|
3038
|
+
return Image.fromarray(normalized_image)
|
3039
|
+
|
3040
|
+
return normalized_image
|
3041
|
+
|
3042
|
+
N = len(image_paths)
|
3043
|
+
# Calculate the smallest square grid size to fit all images
|
3044
|
+
grid_size = math.ceil(math.sqrt(N))
|
3045
|
+
|
3046
|
+
# Create the square grid of subplots with a black background
|
3047
|
+
fig, axs = plt.subplots(
|
3048
|
+
grid_size, grid_size,
|
3049
|
+
figsize=(grid_size * 2, grid_size * 2),
|
3050
|
+
facecolor='black' # Set figure background to black
|
3051
|
+
)
|
3052
|
+
|
3053
|
+
# Flatten axs in case of a 2D array
|
3054
|
+
axs = axs.flatten()
|
3055
|
+
|
3056
|
+
for i, img_path in enumerate(image_paths):
|
3057
|
+
ax = axs[i]
|
3058
|
+
|
3059
|
+
# Load the image
|
3060
|
+
img = Image.open(img_path)
|
3061
|
+
img = _normalize_image(img, percentiles)
|
3062
|
+
|
3063
|
+
# Display the image
|
3064
|
+
ax.imshow(img)
|
3065
|
+
ax.axis('off') # Hide axes
|
3066
|
+
|
3067
|
+
# Fill any unused subplots with black
|
3068
|
+
for j in range(i + 1, len(axs)):
|
3069
|
+
axs[j].imshow([[0, 0, 0]], cmap='gray') # Black square
|
3070
|
+
axs[j].axis('off') # Hide axes
|
3071
|
+
|
3072
|
+
# Adjust layout to minimize white space
|
3073
|
+
plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, top=1, bottom=0)
|
3074
|
+
|
3075
|
+
return fig
|
spacr/toxo.py
CHANGED
@@ -4,8 +4,9 @@ import numpy as np
|
|
4
4
|
from adjustText import adjust_text
|
5
5
|
import pandas as pd
|
6
6
|
from scipy.stats import fisher_exact
|
7
|
+
from IPython.display import display
|
7
8
|
|
8
|
-
def
|
9
|
+
def custom_volcano_plot_v1(data_path, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=0):
|
9
10
|
"""
|
10
11
|
Create a volcano plot with the ability to control the shape of points based on a categorical column,
|
11
12
|
color points based on a string list, annotate specific points based on p-value and coefficient thresholds,
|
@@ -19,7 +20,8 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
|
|
19
20
|
- point_size: Fixed value to control the size of points.
|
20
21
|
- figsize: Width of the plot (height is half the width).
|
21
22
|
"""
|
22
|
-
|
23
|
+
|
24
|
+
|
23
25
|
filename = 'volcano_plot.pdf'
|
24
26
|
|
25
27
|
# Load the data
|
@@ -42,46 +44,65 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
|
|
42
44
|
metadata['gene_nr'] = metadata['gene_nr'].astype(str)
|
43
45
|
data['gene_nr'] = data['gene_nr'].astype(str)
|
44
46
|
|
47
|
+
|
45
48
|
# Merge data and metadata on 'gene_nr'
|
46
49
|
merged_data = pd.merge(data, metadata[['gene_nr', 'tagm_location']], on='gene_nr', how='left')
|
47
|
-
|
48
|
-
# Controls handling
|
49
|
-
controls = ['000000', '000001', '000002', '000003', '000004', '000005', '000006', '000007', '000008', '000009', '000010', '000011']
|
50
|
-
merged_data.loc[merged_data['gene_nr'].isin(controls), metadata_column] = 'control'
|
50
|
+
|
51
51
|
merged_data.loc[merged_data['gene_nr'].str.startswith('4'), metadata_column] = 'GT1_gene'
|
52
52
|
merged_data.loc[merged_data['gene_nr'] == 'Intercept', metadata_column] = 'Intercept'
|
53
53
|
|
54
|
-
# Create a 'highlight_color' column based on the string_list
|
55
|
-
merged_data['highlight_color'] = merged_data['gene_nr'].apply(lambda x: 'red' if any(s in x for s in string_list) else 'blue')
|
56
|
-
|
57
54
|
# Create the volcano plot
|
58
55
|
figsize_2 = figsize / 2
|
59
56
|
plt.figure(figsize=(figsize_2, figsize))
|
57
|
+
|
58
|
+
palette = {
|
59
|
+
'pc': 'red',
|
60
|
+
'nc': 'green',
|
61
|
+
'control': 'black',
|
62
|
+
'other': 'gray'
|
63
|
+
}
|
64
|
+
|
65
|
+
merged_data['condition'] = pd.Categorical(
|
66
|
+
merged_data['condition'],
|
67
|
+
categories=['pc', 'nc', 'control', 'other'],
|
68
|
+
ordered=True
|
69
|
+
)
|
60
70
|
|
71
|
+
display(merged_data)
|
72
|
+
|
61
73
|
# Create the scatter plot with fixed point size
|
62
74
|
sns.scatterplot(
|
63
75
|
data=merged_data,
|
64
76
|
x='coefficient',
|
65
77
|
y='-log10(p_value)',
|
66
|
-
hue='
|
67
|
-
style=metadata_column if metadata_column else None, #
|
78
|
+
hue='condition', # Controls color
|
79
|
+
style=metadata_column if metadata_column else None, # Controls point shape
|
68
80
|
s=point_size, # Fixed size for all points
|
69
|
-
palette=
|
81
|
+
palette=palette, # Color palette
|
82
|
+
alpha=1.0 # Transparency
|
70
83
|
)
|
71
84
|
|
72
85
|
# Set the plot title and labels
|
73
86
|
plt.title('Custom Volcano Plot of Coefficients')
|
74
87
|
plt.xlabel('Coefficient')
|
75
88
|
plt.ylabel('-log10(p-value)')
|
89
|
+
|
90
|
+
if threshold > 0:
|
91
|
+
plt.gca().axvline(x=-abs(threshold), linestyle='--', color='black')
|
92
|
+
plt.gca().axvline(x=abs(threshold), linestyle='--', color='black')
|
76
93
|
|
77
94
|
# Horizontal line at p-value threshold (0.05)
|
78
|
-
plt.axhline(y=-np.log10(0.05), color='
|
95
|
+
plt.axhline(y=-np.log10(0.05), color='black', linestyle='--')
|
79
96
|
|
80
|
-
# Annotate points where p_value <= 0.05 and coefficient >= 0.25
|
81
97
|
texts = []
|
82
98
|
for i, row in merged_data.iterrows():
|
83
|
-
if row['p_value'] <= 0.05 and row['coefficient'] >=
|
84
|
-
texts.append(plt.text(
|
99
|
+
if row['p_value'] <= 0.05 and abs(row['coefficient']) >= abs(threshold):
|
100
|
+
texts.append(plt.text(
|
101
|
+
row['coefficient'],
|
102
|
+
-np.log10(row['p_value']),
|
103
|
+
row['variable'],
|
104
|
+
fontsize=8
|
105
|
+
))
|
85
106
|
|
86
107
|
# Adjust text positions to avoid overlap
|
87
108
|
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='black'))
|
@@ -96,6 +117,171 @@ def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location
|
|
96
117
|
# Show the plot
|
97
118
|
plt.show()
|
98
119
|
|
120
|
+
def custom_volcano_plot(data_path, metadata_path, metadata_column='tagm_location', point_size=50, figsize=20, threshold=0, split_axis_lims = [10, None, None, 10]):
|
121
|
+
"""
|
122
|
+
Create a volcano plot with the ability to control the shape of points based on a categorical column,
|
123
|
+
color points based on a condition, annotate specific points based on p-value and coefficient thresholds,
|
124
|
+
and control the size of points.
|
125
|
+
"""
|
126
|
+
|
127
|
+
filename = 'volcano_plot.pdf'
|
128
|
+
|
129
|
+
# Load the data
|
130
|
+
if isinstance(data_path, pd.DataFrame):
|
131
|
+
data = data_path
|
132
|
+
else:
|
133
|
+
data = pd.read_csv(data_path)
|
134
|
+
|
135
|
+
data['variable'] = data['feature'].str.extract(r'\[(.*?)\]')
|
136
|
+
data['variable'].fillna(data['feature'], inplace=True)
|
137
|
+
split_columns = data['variable'].str.split('_', expand=True)
|
138
|
+
data['gene_nr'] = split_columns[0]
|
139
|
+
|
140
|
+
# Load metadata
|
141
|
+
if isinstance(metadata_path, pd.DataFrame):
|
142
|
+
metadata = metadata_path
|
143
|
+
else:
|
144
|
+
metadata = pd.read_csv(metadata_path)
|
145
|
+
|
146
|
+
metadata['gene_nr'] = metadata['gene_nr'].astype(str)
|
147
|
+
data['gene_nr'] = data['gene_nr'].astype(str)
|
148
|
+
|
149
|
+
# Merge data and metadata on 'gene_nr'
|
150
|
+
merged_data = pd.merge(data, metadata[['gene_nr', 'tagm_location']], on='gene_nr', how='left')
|
151
|
+
|
152
|
+
merged_data.loc[merged_data['gene_nr'].str.startswith('4'), metadata_column] = 'GT1_gene'
|
153
|
+
merged_data.loc[merged_data['gene_nr'] == 'Intercept', metadata_column] = 'Intercept'
|
154
|
+
merged_data.loc[merged_data['condition'] == 'control', metadata_column] = 'control'
|
155
|
+
|
156
|
+
# Categorize condition for coloring
|
157
|
+
merged_data['condition'] = pd.Categorical(
|
158
|
+
merged_data['condition'],
|
159
|
+
categories=['other','pc', 'nc', 'control'],
|
160
|
+
ordered=True)
|
161
|
+
|
162
|
+
# Create subplots with a broken y-axis
|
163
|
+
figsize_2 = figsize / 2
|
164
|
+
fig, (ax1, ax2) = plt.subplots(
|
165
|
+
2, 1, figsize=(figsize_2, figsize),
|
166
|
+
sharex=True, gridspec_kw={'height_ratios': [1, 3]}
|
167
|
+
)
|
168
|
+
|
169
|
+
# Define color palette
|
170
|
+
palette = {
|
171
|
+
'pc': 'red',
|
172
|
+
'nc': 'green',
|
173
|
+
'control': 'white',
|
174
|
+
'other': 'gray'}
|
175
|
+
|
176
|
+
# Scatter plot on both axes
|
177
|
+
sns.scatterplot(
|
178
|
+
data=merged_data,
|
179
|
+
x='coefficient',
|
180
|
+
y='-log10(p_value)',
|
181
|
+
hue='condition',
|
182
|
+
style=metadata_column if metadata_column else None,
|
183
|
+
s=point_size,
|
184
|
+
edgecolor='black',
|
185
|
+
palette=palette,
|
186
|
+
alpha=0.8,
|
187
|
+
ax=ax2 # Lower plot
|
188
|
+
)
|
189
|
+
|
190
|
+
sns.scatterplot(
|
191
|
+
data=merged_data[merged_data['-log10(p_value)'] > 10],
|
192
|
+
x='coefficient',
|
193
|
+
y='-log10(p_value)',
|
194
|
+
hue='condition',
|
195
|
+
style=metadata_column if metadata_column else None,
|
196
|
+
s=point_size,
|
197
|
+
palette=palette,
|
198
|
+
edgecolor='black',
|
199
|
+
alpha=0.8,
|
200
|
+
ax=ax1 # Upper plot
|
201
|
+
)
|
202
|
+
|
203
|
+
if isinstance(split_axis_lims, list):
|
204
|
+
if len(split_axis_lims) == 4:
|
205
|
+
ylim_min_ax1 = split_axis_lims[0]
|
206
|
+
if split_axis_lims[1] is None:
|
207
|
+
ylim_max_ax1 = merged_data['-log10(p_value)'].max() + 5
|
208
|
+
else:
|
209
|
+
ylim_max_ax1 = split_axis_lims[1]
|
210
|
+
ylim_min_ax2 = split_axis_lims[2]
|
211
|
+
ylim_max_ax2 = split_axis_lims[3]
|
212
|
+
else:
|
213
|
+
ylim_min_ax1 = None
|
214
|
+
ylim_max_ax1 = merged_data['-log10(p_value)'].max() + 5
|
215
|
+
ylim_min_ax2 = 0
|
216
|
+
ylim_max_ax2 = None
|
217
|
+
|
218
|
+
# Set axis limits and hide unnecessary parts
|
219
|
+
ax1.set_ylim(ylim_min_ax1, ylim_max_ax1)
|
220
|
+
ax2.set_ylim(0, ylim_max_ax2)
|
221
|
+
ax1.spines['bottom'].set_visible(False)
|
222
|
+
ax2.spines['top'].set_visible(False)
|
223
|
+
ax1.tick_params(labelbottom=False)
|
224
|
+
|
225
|
+
ax1.legend_.remove()
|
226
|
+
if ax1.get_legend() is not None:
|
227
|
+
ax1.get_legend().remove()
|
228
|
+
ax1.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
|
229
|
+
|
230
|
+
# Add vertical threshold lines to both plots
|
231
|
+
if threshold > 0:
|
232
|
+
for ax in (ax1, ax2):
|
233
|
+
ax.axvline(x=-abs(threshold), linestyle='--', color='black')
|
234
|
+
ax.axvline(x=abs(threshold), linestyle='--', color='black')
|
235
|
+
|
236
|
+
# Add a horizontal line at p-value threshold (0.05)
|
237
|
+
ax2.axhline(y=-np.log10(0.05), color='black', linestyle='--')
|
238
|
+
|
239
|
+
# Annotate significant points on both axes
|
240
|
+
texts_ax1 = []
|
241
|
+
texts_ax2 = []
|
242
|
+
|
243
|
+
for i, row in merged_data.iterrows():
|
244
|
+
if row['p_value'] <= 0.05 and abs(row['coefficient']) >= abs(threshold):
|
245
|
+
# Select the appropriate axis for the annotation
|
246
|
+
#ax = ax1 if row['-log10(p_value)'] > 10 else ax2
|
247
|
+
|
248
|
+
ax = ax1 if row['-log10(p_value)'] >= ax1.get_ylim()[0] else ax2
|
249
|
+
|
250
|
+
|
251
|
+
# Create the annotation on the selected axis
|
252
|
+
text = ax.text(
|
253
|
+
row['coefficient'],
|
254
|
+
-np.log10(row['p_value']),
|
255
|
+
row['variable'],
|
256
|
+
fontsize=8,
|
257
|
+
ha='center',
|
258
|
+
va='bottom',
|
259
|
+
)
|
260
|
+
|
261
|
+
# Store the text annotation in the correct list
|
262
|
+
if ax == ax1:
|
263
|
+
texts_ax1.append(text)
|
264
|
+
else:
|
265
|
+
texts_ax2.append(text)
|
266
|
+
|
267
|
+
# Adjust text positions to avoid overlap for both axes
|
268
|
+
adjust_text(texts_ax1, arrowprops=dict(arrowstyle='-', color='black'), ax=ax1)
|
269
|
+
adjust_text(texts_ax2, arrowprops=dict(arrowstyle='-', color='black'), ax=ax2)
|
270
|
+
|
271
|
+
# Move the legend outside the lower plot
|
272
|
+
ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
|
273
|
+
|
274
|
+
# Adjust the spacing between subplots and move the title
|
275
|
+
plt.subplots_adjust(hspace=0.05)
|
276
|
+
fig.suptitle('Custom Volcano Plot of Coefficients', y=1.02, fontsize=16) # Title above the top plot
|
277
|
+
|
278
|
+
# Save the plot as PDF
|
279
|
+
plt.savefig(filename, format='pdf', bbox_inches='tight')
|
280
|
+
print(f'Saved Volcano plot: {filename}')
|
281
|
+
|
282
|
+
# Show the plot
|
283
|
+
plt.show()
|
284
|
+
|
99
285
|
def go_term_enrichment_by_column(significant_df, metadata_path, go_term_columns=['Computed GO Processes', 'Curated GO Components', 'Curated GO Functions', 'Curated GO Processes']):
|
100
286
|
"""
|
101
287
|
Perform GO term enrichment analysis for each GO term column and generate plots.
|
spacr/utils.py
CHANGED
@@ -326,6 +326,8 @@ def save_settings(settings, name='settings', show=False):
|
|
326
326
|
|
327
327
|
if isinstance(settings['src'], list):
|
328
328
|
src = settings['src'][0]
|
329
|
+
#if os.path.exists(src):
|
330
|
+
|
329
331
|
name = f"{name}_list"
|
330
332
|
else:
|
331
333
|
src = settings['src']
|
@@ -4712,10 +4714,10 @@ def merge_regression_res_with_metadata(results_file, metadata_file, name='_metad
|
|
4712
4714
|
df_metadata['gene'] = df_metadata['Gene ID'].apply(lambda x: x.split('_')[1] if '_' in x else None)
|
4713
4715
|
|
4714
4716
|
# Drop rows where gene extraction failed
|
4715
|
-
df_results = df_results.dropna(subset=['gene'])
|
4717
|
+
#df_results = df_results.dropna(subset=['gene'])
|
4716
4718
|
|
4717
4719
|
# Merge the two dataframes on the gene column
|
4718
|
-
merged_df = pd.merge(df_results, df_metadata, on='gene')
|
4720
|
+
merged_df = pd.merge(df_results, df_metadata, on='gene', how='left')
|
4719
4721
|
|
4720
4722
|
# Generate the new file name
|
4721
4723
|
base, ext = os.path.splitext(results_file)
|
@@ -8,26 +8,26 @@ spacr/app_measure.py,sha256=_K7APYIeOKpV6e_LcqabBjvEi7mfq9Fch8175x1x0k8,162
|
|
8
8
|
spacr/app_sequencing.py,sha256=DjG26jy4cpddnV8WOOAIiExtOe9MleVMY4MFa5uTo5w,157
|
9
9
|
spacr/app_umap.py,sha256=ZWAmf_OsIKbYvolYuWPMYhdlVe-n2CADoJulAizMiEo,153
|
10
10
|
spacr/cellpose.py,sha256=zv4BzhaP2O-mtQ-pUfYvpOyxgn1ke_bDWgdHD5UWm9I,13942
|
11
|
-
spacr/core.py,sha256=
|
11
|
+
spacr/core.py,sha256=dW9RrAKFLfVsFhX0-kaVMc2T7b47Ky0pTXK-CEVOeWQ,48235
|
12
12
|
spacr/deep_spacr.py,sha256=HdOcNU8cHcE_19nP7_5uTz-ih3E169ffr2Hm--NvMvA,43255
|
13
13
|
spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
|
14
14
|
spacr/gui_core.py,sha256=LV_HX5zreu3Bye6sQFDbOuk8Dfj4StMoohy6hsrDEXA,41363
|
15
15
|
spacr/gui_elements.py,sha256=w-S1MZdyxt5O3DsNAHNNXy_WGfwBPg0NhwQtCsJeiao,137071
|
16
16
|
spacr/gui_utils.py,sha256=7e9DsZIuV7-jh97kEf7v1In_cFzlFueV4SGcGYGpTxw,45454
|
17
|
-
spacr/io.py,sha256=
|
17
|
+
spacr/io.py,sha256=LN_gJq_oqjbf8y-lBtLLZtJi8DLbNdyoGEcBYyOjbhQ,143606
|
18
18
|
spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
|
19
19
|
spacr/measure.py,sha256=BThn_sALgKrwGKnLOGpT4FyoJeRVoTZoP9SXbCtCMRw,54857
|
20
20
|
spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
|
21
|
-
spacr/ml.py,sha256=
|
21
|
+
spacr/ml.py,sha256=e6nUQaiKBPwcDN_aZZKsbZG6qEa5k9B42wtuL8ipv3Q,50287
|
22
22
|
spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
|
23
|
-
spacr/plot.py,sha256=
|
23
|
+
spacr/plot.py,sha256=TDGMwiIHjvk6v94WFlIvemU-6JfEik_GmSez51vyvCc,135869
|
24
24
|
spacr/sequencing.py,sha256=t18mgpK6rhWuB1LtFOsPxqgpFXxuUmrD06ecsaVQ0Gw,19655
|
25
25
|
spacr/settings.py,sha256=AzP9NGiXI1MqT69bHObxwDSCUk0kdstBVvl1JpcD_-w,75960
|
26
26
|
spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
|
27
27
|
spacr/submodules.py,sha256=AB7s6-cULsaqz-haAaCtXfGEIi8uPZGT4xoCslUJC3Y,18391
|
28
28
|
spacr/timelapse.py,sha256=FSYpUtAVy6xc3lwprRYgyDTT9ysUhfRQ4zrP9_h2mvg,39465
|
29
|
-
spacr/toxo.py,sha256=
|
30
|
-
spacr/utils.py,sha256=
|
29
|
+
spacr/toxo.py,sha256=7dUJe5_HSvDCP16OIXtbYLyshh9LXb2JQ80Vtn-XdPk,15979
|
30
|
+
spacr/utils.py,sha256=_8OxwGVCZaMNBiweB4_YOxBkqQX1LR9YstPSIFmeQKA,216420
|
31
31
|
spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
|
32
32
|
spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
|
33
33
|
spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
|
@@ -150,9 +150,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
|
|
150
150
|
spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
|
151
151
|
spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
|
152
152
|
spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
|
153
|
-
spacr-0.3.
|
154
|
-
spacr-0.3.
|
155
|
-
spacr-0.3.
|
156
|
-
spacr-0.3.
|
157
|
-
spacr-0.3.
|
158
|
-
spacr-0.3.
|
153
|
+
spacr-0.3.41.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
|
154
|
+
spacr-0.3.41.dist-info/METADATA,sha256=zGZO-9iZjljHTjnVg9kAxQJjr2vqpobl7S7ZSQlgxP8,5949
|
155
|
+
spacr-0.3.41.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
156
|
+
spacr-0.3.41.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
|
157
|
+
spacr-0.3.41.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
|
158
|
+
spacr-0.3.41.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|