spacr 0.3.64__py3-none-any.whl → 0.3.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spacr/__init__.py +2 -0
- spacr/plot.py +78 -2
- spacr/stats.py +221 -0
- spacr/submodules.py +96 -119
- spacr/utils.py +0 -23
- {spacr-0.3.64.dist-info → spacr-0.3.65.dist-info}/METADATA +1 -1
- {spacr-0.3.64.dist-info → spacr-0.3.65.dist-info}/RECORD +11 -10
- {spacr-0.3.64.dist-info → spacr-0.3.65.dist-info}/LICENSE +0 -0
- {spacr-0.3.64.dist-info → spacr-0.3.65.dist-info}/WHEEL +0 -0
- {spacr-0.3.64.dist-info → spacr-0.3.65.dist-info}/entry_points.txt +0 -0
- {spacr-0.3.64.dist-info → spacr-0.3.65.dist-info}/top_level.txt +0 -0
spacr/__init__.py
CHANGED
@@ -27,6 +27,7 @@ from . import openai
|
|
27
27
|
from . import ml
|
28
28
|
from . import toxo
|
29
29
|
from . import cellpose
|
30
|
+
from . import stats
|
30
31
|
from . import logger
|
31
32
|
|
32
33
|
__all__ = [
|
@@ -57,6 +58,7 @@ __all__ = [
|
|
57
58
|
"ml",
|
58
59
|
"toxo",
|
59
60
|
"cellpose",
|
61
|
+
"stats",
|
60
62
|
"logger"
|
61
63
|
]
|
62
64
|
|
spacr/plot.py
CHANGED
@@ -17,7 +17,7 @@ from skimage.measure import find_contours, label, regionprops
|
|
17
17
|
from skimage.segmentation import mark_boundaries
|
18
18
|
from skimage.transform import resize as sk_resize
|
19
19
|
import scikit_posthocs as sp
|
20
|
-
|
20
|
+
from scipy.stats import chi2_contingency
|
21
21
|
import tifffile as tiff
|
22
22
|
|
23
23
|
from scipy.stats import normaltest, ttest_ind, mannwhitneyu, f_oneway, kruskal
|
@@ -2609,7 +2609,7 @@ class spacrGraph:
|
|
2609
2609
|
def perform_posthoc_tests(self, is_normal, unique_groups):
|
2610
2610
|
"""Perform post-hoc tests for multiple groups based on all_to_all flag."""
|
2611
2611
|
|
2612
|
-
from .
|
2612
|
+
from .stats import choose_p_adjust_method
|
2613
2613
|
|
2614
2614
|
posthoc_results = []
|
2615
2615
|
if is_normal and len(unique_groups) > 2 and self.all_to_all:
|
@@ -3736,3 +3736,79 @@ def graph_importance(settings):
|
|
3736
3736
|
# Get the figure object if needed
|
3737
3737
|
fig = spacr_graph.get_figure()
|
3738
3738
|
plt.show()
|
3739
|
+
|
3740
|
+
def plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object'):
|
3741
|
+
"""
|
3742
|
+
Generate a stacked bar plot for proportions and perform chi-squared and pairwise tests.
|
3743
|
+
|
3744
|
+
Parameters:
|
3745
|
+
- settings (dict): Analysis settings.
|
3746
|
+
- df (DataFrame): Input data.
|
3747
|
+
- group_column (str): Column indicating the groups.
|
3748
|
+
- bin_column (str): Column indicating the categories.
|
3749
|
+
- prc_column (str): Optional; column for additional stratification.
|
3750
|
+
- level (str): Level of aggregation ('well' or 'object').
|
3751
|
+
|
3752
|
+
Returns:
|
3753
|
+
- chi2 (float): Chi-squared statistic for the overall test.
|
3754
|
+
- p (float): p-value for the overall chi-squared test.
|
3755
|
+
- dof (int): Degrees of freedom for the overall chi-squared test.
|
3756
|
+
- expected (ndarray): Expected frequencies for the overall chi-squared test.
|
3757
|
+
- raw_counts (DataFrame): Contingency table of observed counts.
|
3758
|
+
- fig (Figure): The generated plot.
|
3759
|
+
- pairwise_results (list): Pairwise test results from `chi_pairwise`.
|
3760
|
+
"""
|
3761
|
+
|
3762
|
+
from .stats import chi_pairwise
|
3763
|
+
|
3764
|
+
# Calculate contingency table for overall chi-squared test
|
3765
|
+
raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
|
3766
|
+
chi2, p, dof, expected = chi2_contingency(raw_counts)
|
3767
|
+
print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
|
3768
|
+
print(f"p-value (raw data): {p:.4e}")
|
3769
|
+
|
3770
|
+
# Perform pairwise comparisons
|
3771
|
+
pairwise_results = chi_pairwise(raw_counts, verbose=settings.get('verbose', False))
|
3772
|
+
|
3773
|
+
# Plot based on level setting
|
3774
|
+
if level == 'well':
|
3775
|
+
# Aggregate by well for mean ± SD visualization
|
3776
|
+
well_proportions = (
|
3777
|
+
df.groupby([group_column, prc_column, bin_column])
|
3778
|
+
.size()
|
3779
|
+
.groupby(level=[0, 1])
|
3780
|
+
.apply(lambda x: x / x.sum())
|
3781
|
+
.unstack(fill_value=0)
|
3782
|
+
)
|
3783
|
+
mean_proportions = well_proportions.groupby(group_column).mean()
|
3784
|
+
std_proportions = well_proportions.groupby(group_column).std()
|
3785
|
+
|
3786
|
+
ax = mean_proportions.plot(
|
3787
|
+
kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap='viridis', figsize=(12, 8)
|
3788
|
+
)
|
3789
|
+
plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
|
3790
|
+
else:
|
3791
|
+
# Object-level plotting without aggregation
|
3792
|
+
group_counts = df.groupby([group_column, bin_column]).size()
|
3793
|
+
group_totals = group_counts.groupby(level=0).sum()
|
3794
|
+
proportions = group_counts / group_totals
|
3795
|
+
proportion_df = proportions.unstack(fill_value=0)
|
3796
|
+
|
3797
|
+
ax = proportion_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8))
|
3798
|
+
plt.title('Proportion of Volume Bins by Group')
|
3799
|
+
|
3800
|
+
plt.xlabel('Group')
|
3801
|
+
plt.ylabel('Proportion')
|
3802
|
+
|
3803
|
+
# Update legend with formatted labels, maintaining correct order
|
3804
|
+
plt.legend(title=f'Classes', bbox_to_anchor=(1.05, 1), loc='upper left')
|
3805
|
+
plt.ylim(0, 1)
|
3806
|
+
fig = plt.gcf()
|
3807
|
+
|
3808
|
+
results_df = pd.DataFrame({
|
3809
|
+
'chi_squared_stat': [chi2],
|
3810
|
+
'p_value': [p],
|
3811
|
+
'degrees_of_freedom': [dof]
|
3812
|
+
})
|
3813
|
+
|
3814
|
+
return results_df, pairwise_results, fig
|
spacr/stats.py
ADDED
@@ -0,0 +1,221 @@
|
|
1
|
+
from scipy.stats import shapiro, normaltest, levene, ttest_ind, mannwhitneyu, kruskal, f_oneway
|
2
|
+
from statsmodels.stats.multicomp import pairwise_tukeyhsd
|
3
|
+
import scikit_posthocs as sp
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
from scipy.stats import chi2_contingency, fisher_exact
|
7
|
+
import itertools
|
8
|
+
from statsmodels.stats.multitest import multipletests
|
9
|
+
|
10
|
+
|
11
|
+
def choose_p_adjust_method(num_groups, num_data_points):
|
12
|
+
"""
|
13
|
+
Selects the most appropriate p-value adjustment method based on data characteristics.
|
14
|
+
|
15
|
+
Parameters:
|
16
|
+
- num_groups: Number of unique groups being compared
|
17
|
+
- num_data_points: Number of data points per group (assuming balanced groups)
|
18
|
+
|
19
|
+
Returns:
|
20
|
+
- A string representing the recommended p-adjustment method
|
21
|
+
"""
|
22
|
+
num_comparisons = (num_groups * (num_groups - 1)) // 2 # Number of pairwise comparisons
|
23
|
+
|
24
|
+
# Decision logic for choosing the adjustment method
|
25
|
+
if num_comparisons <= 10 and num_data_points > 5:
|
26
|
+
return 'holm' # Balanced between power and Type I error control
|
27
|
+
elif num_comparisons > 10 and num_data_points <= 5:
|
28
|
+
return 'fdr_bh' # FDR control for large number of comparisons and small sample size
|
29
|
+
elif num_comparisons <= 10:
|
30
|
+
return 'sidak' # Less conservative than Bonferroni, good for independent comparisons
|
31
|
+
else:
|
32
|
+
return 'bonferroni' # Very conservative, use for strict control of Type I errors
|
33
|
+
|
34
|
+
def perform_normality_tests(df, grouping_column, data_columns):
|
35
|
+
"""Perform normality tests for each group and data column."""
|
36
|
+
unique_groups = df[grouping_column].unique()
|
37
|
+
normality_results = []
|
38
|
+
|
39
|
+
for column in data_columns:
|
40
|
+
for group in unique_groups:
|
41
|
+
data = df.loc[df[grouping_column] == group, column].dropna()
|
42
|
+
n_samples = len(data)
|
43
|
+
|
44
|
+
if n_samples < 3:
|
45
|
+
# Skip test if there aren't enough data points
|
46
|
+
print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
|
47
|
+
normality_results.append({
|
48
|
+
'Comparison': f'Normality test for {group} on {column}',
|
49
|
+
'Test Statistic': None,
|
50
|
+
'p-value': None,
|
51
|
+
'Test Name': 'Skipped',
|
52
|
+
'Column': column,
|
53
|
+
'n': n_samples
|
54
|
+
})
|
55
|
+
continue
|
56
|
+
|
57
|
+
# Choose the appropriate normality test based on the sample size
|
58
|
+
if n_samples >= 8:
|
59
|
+
stat, p_value = normaltest(data)
|
60
|
+
test_name = "D'Agostino-Pearson test"
|
61
|
+
else:
|
62
|
+
stat, p_value = shapiro(data)
|
63
|
+
test_name = "Shapiro-Wilk test"
|
64
|
+
|
65
|
+
normality_results.append({
|
66
|
+
'Comparison': f'Normality test for {group} on {column}',
|
67
|
+
'Test Statistic': stat,
|
68
|
+
'p-value': p_value,
|
69
|
+
'Test Name': test_name,
|
70
|
+
'Column': column,
|
71
|
+
'n': n_samples
|
72
|
+
})
|
73
|
+
|
74
|
+
# Check if all groups are normally distributed (p > 0.05)
|
75
|
+
normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
|
76
|
+
is_normal = all(p > 0.05 for p in normal_p_values)
|
77
|
+
|
78
|
+
return is_normal, normality_results
|
79
|
+
|
80
|
+
|
81
|
+
def perform_levene_test(df, grouping_column, data_column):
|
82
|
+
"""Perform Levene's test for equal variance."""
|
83
|
+
unique_groups = df[grouping_column].unique()
|
84
|
+
grouped_data = [df.loc[df[grouping_column] == group, data_column].dropna() for group in unique_groups]
|
85
|
+
stat, p_value = levene(*grouped_data)
|
86
|
+
return stat, p_value
|
87
|
+
|
88
|
+
def perform_statistical_tests(df, grouping_column, data_columns, paired=False):
|
89
|
+
"""Perform statistical tests for each data column."""
|
90
|
+
unique_groups = df[grouping_column].unique()
|
91
|
+
test_results = []
|
92
|
+
|
93
|
+
for column in data_columns:
|
94
|
+
grouped_data = [df.loc[df[grouping_column] == group, column].dropna() for group in unique_groups]
|
95
|
+
if len(unique_groups) == 2: # For two groups
|
96
|
+
if paired:
|
97
|
+
print("Performing paired tests (not implemented in this template).")
|
98
|
+
continue # Extend as needed
|
99
|
+
else:
|
100
|
+
# Check normality for two groups
|
101
|
+
is_normal, _ = perform_normality_tests(df, grouping_column, [column])
|
102
|
+
if is_normal:
|
103
|
+
stat, p = ttest_ind(grouped_data[0], grouped_data[1])
|
104
|
+
test_name = 'T-test'
|
105
|
+
else:
|
106
|
+
stat, p = mannwhitneyu(grouped_data[0], grouped_data[1])
|
107
|
+
test_name = 'Mann-Whitney U test'
|
108
|
+
else:
|
109
|
+
# Check normality for multiple groups
|
110
|
+
is_normal, _ = perform_normality_tests(df, grouping_column, [column])
|
111
|
+
if is_normal:
|
112
|
+
stat, p = f_oneway(*grouped_data)
|
113
|
+
test_name = 'One-way ANOVA'
|
114
|
+
else:
|
115
|
+
stat, p = kruskal(*grouped_data)
|
116
|
+
test_name = 'Kruskal-Wallis test'
|
117
|
+
|
118
|
+
test_results.append({
|
119
|
+
'Column': column,
|
120
|
+
'Test Name': test_name,
|
121
|
+
'Test Statistic': stat,
|
122
|
+
'p-value': p,
|
123
|
+
'Groups': len(unique_groups)
|
124
|
+
})
|
125
|
+
|
126
|
+
return test_results
|
127
|
+
|
128
|
+
|
129
|
+
def perform_posthoc_tests(df, grouping_column, data_column, is_normal):
|
130
|
+
"""Perform post-hoc tests for multiple groups with both original and adjusted p-values."""
|
131
|
+
unique_groups = df[grouping_column].unique()
|
132
|
+
posthoc_results = []
|
133
|
+
|
134
|
+
if len(unique_groups) > 2:
|
135
|
+
num_groups = len(unique_groups)
|
136
|
+
num_data_points = len(df[data_column].dropna()) // num_groups # Assuming roughly equal data points per group
|
137
|
+
p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
|
138
|
+
|
139
|
+
if is_normal:
|
140
|
+
# Tukey's HSD automatically adjusts p-values
|
141
|
+
tukey_result = pairwise_tukeyhsd(df[data_column], df[grouping_column], alpha=0.05)
|
142
|
+
for comparison, p_value in zip(tukey_result._results_table.data[1:], tukey_result.pvalues):
|
143
|
+
posthoc_results.append({
|
144
|
+
'Comparison': f"{comparison[0]} vs {comparison[1]}",
|
145
|
+
'Original p-value': None, # Tukey HSD does not provide raw p-values
|
146
|
+
'Adjusted p-value': p_value,
|
147
|
+
'Adjusted Method': 'Tukey HSD',
|
148
|
+
'Test Name': 'Tukey HSD'
|
149
|
+
})
|
150
|
+
else:
|
151
|
+
# Dunn's test with p-value adjustment
|
152
|
+
raw_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=None)
|
153
|
+
adjusted_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=p_adjust_method)
|
154
|
+
for i, group_a in enumerate(adjusted_dunn_result.index):
|
155
|
+
for j, group_b in enumerate(adjusted_dunn_result.columns):
|
156
|
+
if i < j: # Only consider unique pairs
|
157
|
+
posthoc_results.append({
|
158
|
+
'Comparison': f"{group_a} vs {group_b}",
|
159
|
+
'Original p-value': raw_dunn_result.iloc[i, j],
|
160
|
+
'Adjusted p-value': adjusted_dunn_result.iloc[i, j],
|
161
|
+
'Adjusted Method': p_adjust_method,
|
162
|
+
'Test Name': "Dunn's Post-hoc"
|
163
|
+
})
|
164
|
+
|
165
|
+
return posthoc_results
|
166
|
+
|
167
|
+
def chi_pairwise(raw_counts, verbose=False):
|
168
|
+
"""
|
169
|
+
Perform pairwise chi-square or Fisher's exact tests between all unique group pairs
|
170
|
+
and apply p-value correction.
|
171
|
+
|
172
|
+
Parameters:
|
173
|
+
- raw_counts (DataFrame): Contingency table with group-wise counts.
|
174
|
+
- verbose (bool): Whether to print results for each pair.
|
175
|
+
|
176
|
+
Returns:
|
177
|
+
- pairwise_df (DataFrame): DataFrame with pairwise test results, including corrected p-values.
|
178
|
+
"""
|
179
|
+
pairwise_results = []
|
180
|
+
groups = raw_counts.index.unique() # Use index from raw_counts for group pairs
|
181
|
+
raw_p_values = [] # Store raw p-values for correction later
|
182
|
+
|
183
|
+
# Calculate the number of groups and average number of data points per group
|
184
|
+
num_groups = len(groups)
|
185
|
+
num_data_points = raw_counts.sum(axis=1).mean() # Average total data points per group
|
186
|
+
p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
|
187
|
+
|
188
|
+
for group1, group2 in itertools.combinations(groups, 2):
|
189
|
+
contingency_table = raw_counts.loc[[group1, group2]].values
|
190
|
+
if contingency_table.shape[1] == 2: # Fisher's Exact Test for 2x2 tables
|
191
|
+
oddsratio, p_value = fisher_exact(contingency_table)
|
192
|
+
test_name = "Fisher's Exact Test"
|
193
|
+
else: # Chi-Square Test for larger tables
|
194
|
+
chi2_stat, p_value, _, _ = chi2_contingency(contingency_table)
|
195
|
+
test_name = 'Pairwise Chi-Square Test'
|
196
|
+
|
197
|
+
pairwise_results.append({
|
198
|
+
'Group 1': group1,
|
199
|
+
'Group 2': group2,
|
200
|
+
'Test Name': test_name,
|
201
|
+
'p-value': p_value
|
202
|
+
})
|
203
|
+
raw_p_values.append(p_value)
|
204
|
+
|
205
|
+
# Apply p-value correction
|
206
|
+
corrected_p_values = multipletests(raw_p_values, method=p_adjust_method)[1]
|
207
|
+
|
208
|
+
# Add corrected p-values to results
|
209
|
+
for i, result in enumerate(pairwise_results):
|
210
|
+
result['p-value_adj'] = corrected_p_values[i]
|
211
|
+
|
212
|
+
pairwise_df = pd.DataFrame(pairwise_results)
|
213
|
+
|
214
|
+
pairwise_df['adj'] = p_adjust_method
|
215
|
+
|
216
|
+
if verbose:
|
217
|
+
# Print pairwise results
|
218
|
+
print("\nPairwise Frequency Analysis Results:")
|
219
|
+
print(pairwise_df.to_string(index=False))
|
220
|
+
|
221
|
+
return pairwise_df
|
spacr/submodules.py
CHANGED
@@ -847,11 +847,61 @@ def interperate_vision_model(settings={}):
|
|
847
847
|
|
848
848
|
return output
|
849
849
|
|
850
|
+
def _plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object'):
|
851
|
+
# Always calculate chi-squared on raw data
|
852
|
+
raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
|
853
|
+
chi2, p, dof, expected = chi2_contingency(raw_counts)
|
854
|
+
print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
|
855
|
+
print(f"p-value (raw data): {p:.4e}")
|
856
|
+
|
857
|
+
# Extract bin labels and indices for formatting the legend in the correct order
|
858
|
+
bin_labels = df[bin_column].cat.categories if pd.api.types.is_categorical_dtype(df[bin_column]) else sorted(df[bin_column].unique())
|
859
|
+
bin_indices = range(1, len(bin_labels) + 1)
|
860
|
+
legend_labels = [f"{index}: {label}" for index, label in zip(bin_indices, bin_labels)]
|
861
|
+
|
862
|
+
# Plot based on level setting
|
863
|
+
if level == 'well':
|
864
|
+
# Aggregate by well for mean ± SD visualization
|
865
|
+
well_proportions = (
|
866
|
+
df.groupby([group_column, prc_column, bin_column])
|
867
|
+
.size()
|
868
|
+
.groupby(level=[0, 1])
|
869
|
+
.apply(lambda x: x / x.sum())
|
870
|
+
.unstack(fill_value=0)
|
871
|
+
)
|
872
|
+
mean_proportions = well_proportions.groupby(group_column).mean()
|
873
|
+
std_proportions = well_proportions.groupby(group_column).std()
|
874
|
+
|
875
|
+
ax = mean_proportions.plot(
|
876
|
+
kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap='viridis', figsize=(12, 8)
|
877
|
+
)
|
878
|
+
plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
|
879
|
+
else:
|
880
|
+
# Object-level plotting without aggregation
|
881
|
+
group_counts = df.groupby([group_column, bin_column]).size()
|
882
|
+
group_totals = group_counts.groupby(level=0).sum()
|
883
|
+
proportions = group_counts / group_totals
|
884
|
+
proportion_df = proportions.unstack(fill_value=0)
|
885
|
+
|
886
|
+
ax = proportion_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8))
|
887
|
+
plt.title('Proportion of Volume Bins by Group')
|
888
|
+
|
889
|
+
plt.xlabel('Group')
|
890
|
+
plt.ylabel('Proportion')
|
891
|
+
|
892
|
+
# Update legend with formatted labels, maintaining correct order
|
893
|
+
volume_unit = "px³" if settings['um_per_px'] is None else "µm³"
|
894
|
+
plt.legend(legend_labels, title=f'Volume Range ({volume_unit})', bbox_to_anchor=(1.05, 1), loc='upper left')
|
895
|
+
plt.ylim(0, 1)
|
896
|
+
fig = plt.gcf()
|
897
|
+
return chi2, p, dof, expected, raw_counts, fig
|
898
|
+
|
850
899
|
def analyze_endodyogeny(settings):
|
851
900
|
|
852
901
|
from .utils import annotate_conditions, save_settings
|
853
902
|
from .io import _read_and_merge_data
|
854
903
|
from .settings import set_analyze_endodyogeny_defaults
|
904
|
+
from .plot import plot_proportion_stacked_bars
|
855
905
|
|
856
906
|
def _calculate_volume_bins(df, compartment='pathogen', min_area_bin=500, max_bins=None, verbose=False):
|
857
907
|
area_column = f'{compartment}_area'
|
@@ -890,55 +940,6 @@ def analyze_endodyogeny(settings):
|
|
890
940
|
|
891
941
|
return df
|
892
942
|
|
893
|
-
def _plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object'):
|
894
|
-
# Always calculate chi-squared on raw data
|
895
|
-
raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
|
896
|
-
chi2, p, dof, expected = chi2_contingency(raw_counts)
|
897
|
-
print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
|
898
|
-
print(f"p-value (raw data): {p:.4e}")
|
899
|
-
|
900
|
-
# Extract bin labels and indices for formatting the legend in the correct order
|
901
|
-
bin_labels = df[bin_column].cat.categories if pd.api.types.is_categorical_dtype(df[bin_column]) else sorted(df[bin_column].unique())
|
902
|
-
bin_indices = range(1, len(bin_labels) + 1)
|
903
|
-
legend_labels = [f"{index}: {label}" for index, label in zip(bin_indices, bin_labels)]
|
904
|
-
|
905
|
-
# Plot based on level setting
|
906
|
-
if level == 'well':
|
907
|
-
# Aggregate by well for mean ± SD visualization
|
908
|
-
well_proportions = (
|
909
|
-
df.groupby([group_column, prc_column, bin_column])
|
910
|
-
.size()
|
911
|
-
.groupby(level=[0, 1])
|
912
|
-
.apply(lambda x: x / x.sum())
|
913
|
-
.unstack(fill_value=0)
|
914
|
-
)
|
915
|
-
mean_proportions = well_proportions.groupby(group_column).mean()
|
916
|
-
std_proportions = well_proportions.groupby(group_column).std()
|
917
|
-
|
918
|
-
ax = mean_proportions.plot(
|
919
|
-
kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap='viridis', figsize=(12, 8)
|
920
|
-
)
|
921
|
-
plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
|
922
|
-
else:
|
923
|
-
# Object-level plotting without aggregation
|
924
|
-
group_counts = df.groupby([group_column, bin_column]).size()
|
925
|
-
group_totals = group_counts.groupby(level=0).sum()
|
926
|
-
proportions = group_counts / group_totals
|
927
|
-
proportion_df = proportions.unstack(fill_value=0)
|
928
|
-
|
929
|
-
ax = proportion_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8))
|
930
|
-
plt.title('Proportion of Volume Bins by Group')
|
931
|
-
|
932
|
-
plt.xlabel('Group')
|
933
|
-
plt.ylabel('Proportion')
|
934
|
-
|
935
|
-
# Update legend with formatted labels, maintaining correct order
|
936
|
-
volume_unit = "px³" if settings['um_per_px'] is None else "µm³"
|
937
|
-
plt.legend(legend_labels, title=f'Volume Range ({volume_unit})', bbox_to_anchor=(1.05, 1), loc='upper left')
|
938
|
-
plt.ylim(0, 1)
|
939
|
-
fig = plt.gcf()
|
940
|
-
return chi2, p, dof, expected, raw_counts, fig
|
941
|
-
|
942
943
|
settings = set_analyze_endodyogeny_defaults(settings)
|
943
944
|
save_settings(settings, name='analyze_endodyogeny', show=True)
|
944
945
|
output = {}
|
@@ -985,31 +986,30 @@ def analyze_endodyogeny(settings):
|
|
985
986
|
df = _calculate_volume_bins(df, settings['compartment'], settings['min_area_bin'], settings['max_bins'], settings['verbose'])
|
986
987
|
output['data'] = df
|
987
988
|
# Perform chi-squared test and plot
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
expected_flat = expected_df.stack().reset_index()
|
1001
|
-
expected_flat.columns = [settings['group_column'], f"{settings['compartment']}_volume_bin", 'expected_count']
|
1002
|
-
results_df = results_df.merge(expected_flat, how="cross")
|
989
|
+
results_df, pairwise_results_df, fig = plot_proportion_stacked_bars(settings, df, settings['group_column'], bin_column=f"{settings['compartment']}_volume_bin", level=settings['level'])
|
990
|
+
|
991
|
+
# Extract bin labels and indices for formatting the legend in the correct order
|
992
|
+
bin_labels = df[f"{settings['compartment']}_volume_bin"].cat.categories if pd.api.types.is_categorical_dtype(df[f"{settings['compartment']}_volume_bin"]) else sorted(df[f"{settings['compartment']}_volume_bin"].unique())
|
993
|
+
bin_indices = range(1, len(bin_labels) + 1)
|
994
|
+
legend_labels = [f"{index}: {label}" for index, label in zip(bin_indices, bin_labels)]
|
995
|
+
|
996
|
+
# Update legend with formatted labels, maintaining correct order
|
997
|
+
volume_unit = "px³" if settings['um_per_px'] is None else "µm³"
|
998
|
+
plt.legend(legend_labels, title=f'Volume Range ({volume_unit})', bbox_to_anchor=(1.05, 1), loc='upper left')
|
999
|
+
plt.ylim(0, 1)
|
1000
|
+
|
1003
1001
|
output['chi_squared'] = results_df
|
1004
1002
|
|
1005
1003
|
if settings['save']:
|
1006
1004
|
# Save DataFrame to CSV
|
1007
|
-
output_dir = os.path.join(settings['src'][0], 'results')
|
1005
|
+
output_dir = os.path.join(settings['src'][0], 'results', 'analyze_endodyogeny')
|
1008
1006
|
os.makedirs(output_dir, exist_ok=True)
|
1009
1007
|
output_path = os.path.join(output_dir, 'chi_squared_results.csv')
|
1008
|
+
output_path_pairwise = os.path.join(output_dir, 'chi_squared_results.csv')
|
1010
1009
|
output_path_fig = os.path.join(output_dir, 'chi_squared_results.pdf')
|
1011
1010
|
fig.savefig(output_path_fig, dpi=300, bbox_inches='tight')
|
1012
1011
|
results_df.to_csv(output_path, index=False)
|
1012
|
+
pairwise_results_df.to_csv(output_path_pairwise, index=False)
|
1013
1013
|
print(f"Chi-squared results saved to {output_path}")
|
1014
1014
|
|
1015
1015
|
plt.show()
|
@@ -1021,51 +1021,8 @@ def analyze_class_proportion(settings):
|
|
1021
1021
|
from .utils import annotate_conditions, save_settings
|
1022
1022
|
from .io import _read_and_merge_data
|
1023
1023
|
from .settings import set_analyze_class_proportion_defaults
|
1024
|
-
from .plot import plot_plates
|
1025
|
-
|
1026
|
-
|
1027
|
-
def _plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object'):
|
1028
|
-
# Always calculate chi-squared on raw data
|
1029
|
-
raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
|
1030
|
-
chi2, p, dof, expected = chi2_contingency(raw_counts)
|
1031
|
-
print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
|
1032
|
-
print(f"p-value (raw data): {p:.4e}")
|
1033
|
-
|
1034
|
-
# Plot based on level setting
|
1035
|
-
if level == 'well':
|
1036
|
-
# Aggregate by well for mean ± SD visualization
|
1037
|
-
well_proportions = (
|
1038
|
-
df.groupby([group_column, prc_column, bin_column])
|
1039
|
-
.size()
|
1040
|
-
.groupby(level=[0, 1])
|
1041
|
-
.apply(lambda x: x / x.sum())
|
1042
|
-
.unstack(fill_value=0)
|
1043
|
-
)
|
1044
|
-
mean_proportions = well_proportions.groupby(group_column).mean()
|
1045
|
-
std_proportions = well_proportions.groupby(group_column).std()
|
1046
|
-
|
1047
|
-
ax = mean_proportions.plot(
|
1048
|
-
kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap='viridis', figsize=(12, 8)
|
1049
|
-
)
|
1050
|
-
plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
|
1051
|
-
else:
|
1052
|
-
# Object-level plotting without aggregation
|
1053
|
-
group_counts = df.groupby([group_column, bin_column]).size()
|
1054
|
-
group_totals = group_counts.groupby(level=0).sum()
|
1055
|
-
proportions = group_counts / group_totals
|
1056
|
-
proportion_df = proportions.unstack(fill_value=0)
|
1057
|
-
|
1058
|
-
ax = proportion_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8))
|
1059
|
-
plt.title('Proportion of Volume Bins by Group')
|
1060
|
-
|
1061
|
-
plt.xlabel('Group')
|
1062
|
-
plt.ylabel('Proportion')
|
1063
|
-
|
1064
|
-
# Update legend with formatted labels, maintaining correct order
|
1065
|
-
plt.legend(title=f'Classes', bbox_to_anchor=(1.05, 1), loc='upper left')
|
1066
|
-
plt.ylim(0, 1)
|
1067
|
-
fig = plt.gcf()
|
1068
|
-
return chi2, p, dof, expected, raw_counts, fig
|
1024
|
+
from .plot import plot_plates, plot_proportion_stacked_bars
|
1025
|
+
from .stats import perform_normality_tests, perform_levene_test, perform_statistical_tests, perform_posthoc_tests
|
1069
1026
|
|
1070
1027
|
settings = set_analyze_class_proportion_defaults(settings)
|
1071
1028
|
save_settings(settings, name='analyze_class_proportion', show=True)
|
@@ -1110,25 +1067,20 @@ def analyze_class_proportion(settings):
|
|
1110
1067
|
output['data'] = df
|
1111
1068
|
|
1112
1069
|
# Perform chi-squared test and plot
|
1113
|
-
|
1114
|
-
|
1115
|
-
# Create a DataFrame with chi-squared test results and raw counts
|
1116
|
-
results_df = pd.DataFrame({
|
1117
|
-
'chi_squared_stat': [chi2],
|
1118
|
-
'p_value': [p],
|
1119
|
-
'degrees_of_freedom': [dof]
|
1120
|
-
})
|
1070
|
+
results_df, pairwise_results, fig = plot_proportion_stacked_bars(settings, df, settings['group_column'], bin_column=settings['class_column'], level=settings['level'])
|
1121
1071
|
|
1122
1072
|
output['chi_squared'] = results_df
|
1123
1073
|
|
1124
1074
|
if settings['save']:
|
1125
|
-
output_dir = os.path.join(settings['src'][0], 'results')
|
1075
|
+
output_dir = os.path.join(settings['src'][0], 'results', 'analyze_class_proportion')
|
1126
1076
|
os.makedirs(output_dir, exist_ok=True)
|
1127
1077
|
output_path_chi = os.path.join(output_dir, 'class_chi_squared_results.csv')
|
1078
|
+
output_path_chi_pairwise = os.path.join(output_dir, 'class_frequency_test.csv')
|
1128
1079
|
output_path_data = os.path.join(output_dir, 'class_chi_squared_data.csv')
|
1129
1080
|
output_path_fig = os.path.join(output_dir, 'class_chi_squared.pdf')
|
1130
1081
|
fig.savefig(output_path_fig, dpi=300, bbox_inches='tight')
|
1131
1082
|
results_df.to_csv(output_path_chi, index=False)
|
1083
|
+
pairwise_results.to_csv(output_path_chi_pairwise, index=False)
|
1132
1084
|
df.to_csv(output_path_data, index=False)
|
1133
1085
|
print(f"Chi-squared results saved to {output_path_chi}")
|
1134
1086
|
print(f"Annotated data saved to {output_path_data}")
|
@@ -1141,4 +1093,29 @@ def analyze_class_proportion(settings):
|
|
1141
1093
|
fig2.savefig(output_path_fig2, dpi=300, bbox_inches='tight')
|
1142
1094
|
|
1143
1095
|
plt.show()
|
1096
|
+
|
1097
|
+
# Perform normality, variance, and statistical tests
|
1098
|
+
is_normal, normality_results = perform_normality_tests(df, settings['group_column'], [settings['class_column']])
|
1099
|
+
variance_stat, variance_p = perform_levene_test(df, settings['group_column'], settings['class_column'])
|
1100
|
+
|
1101
|
+
print(f"Levene's test statistic: {variance_stat:.4f}, p-value: {variance_p:.4e}")
|
1102
|
+
variance_results = {
|
1103
|
+
'Test Statistic': variance_stat,
|
1104
|
+
'p-value': variance_p,
|
1105
|
+
'Test Name': "Levene's Test"
|
1106
|
+
}
|
1107
|
+
|
1108
|
+
test_results = perform_statistical_tests(df, settings['group_column'], [settings['class_column']])
|
1109
|
+
posthoc_results = perform_posthoc_tests(
|
1110
|
+
df, settings['group_column'], settings['class_column'], is_normal=is_normal
|
1111
|
+
)
|
1112
|
+
|
1113
|
+
# Save additional results
|
1114
|
+
if settings['save']:
|
1115
|
+
pd.DataFrame(normality_results).to_csv(os.path.join(output_dir, 'normality_results.csv'), index=False)
|
1116
|
+
pd.DataFrame([variance_results]).to_csv(os.path.join(output_dir, 'variance_results.csv'), index=False)
|
1117
|
+
pd.DataFrame(test_results).to_csv(os.path.join(output_dir, 'statistical_test_results.csv'), index=False)
|
1118
|
+
pd.DataFrame(posthoc_results).to_csv(os.path.join(output_dir, 'posthoc_results.csv'), index=False)
|
1119
|
+
print("Statistical analysis results saved.")
|
1120
|
+
|
1144
1121
|
return output
|
spacr/utils.py
CHANGED
@@ -5156,29 +5156,6 @@ def control_filelist(folder, mode='column', values=['01','02']):
|
|
5156
5156
|
if mode is 'row_name':
|
5157
5157
|
filtered_files = [file for file in files if file.split('_')[1][:1] in values]
|
5158
5158
|
return filtered_files
|
5159
|
-
|
5160
|
-
def choose_p_adjust_method(num_groups, num_data_points):
|
5161
|
-
"""
|
5162
|
-
Selects the most appropriate p-value adjustment method based on data characteristics.
|
5163
|
-
|
5164
|
-
Parameters:
|
5165
|
-
- num_groups: Number of unique groups being compared
|
5166
|
-
- num_data_points: Number of data points per group (assuming balanced groups)
|
5167
|
-
|
5168
|
-
Returns:
|
5169
|
-
- A string representing the recommended p-adjustment method
|
5170
|
-
"""
|
5171
|
-
num_comparisons = (num_groups * (num_groups - 1)) // 2 # Number of pairwise comparisons
|
5172
|
-
|
5173
|
-
# Decision logic for choosing the adjustment method
|
5174
|
-
if num_comparisons <= 10 and num_data_points > 5:
|
5175
|
-
return 'holm' # Balanced between power and Type I error control
|
5176
|
-
elif num_comparisons > 10 and num_data_points <= 5:
|
5177
|
-
return 'fdr_bh' # FDR control for large number of comparisons and small sample size
|
5178
|
-
elif num_comparisons <= 10:
|
5179
|
-
return 'sidak' # Less conservative than Bonferroni, good for independent comparisons
|
5180
|
-
else:
|
5181
|
-
return 'bonferroni' # Very conservative, use for strict control of Type I errors
|
5182
5159
|
|
5183
5160
|
def rename_columns_in_db(db_path):
|
5184
5161
|
with sqlite3.connect(db_path) as conn:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
spacr/__init__.py,sha256=
|
1
|
+
spacr/__init__.py,sha256=fvk5JfLpOqUA1W0yPcsVZnS9qbpXFOceFk09LKolVfw,1627
|
2
2
|
spacr/__main__.py,sha256=bkAJJD2kjIqOP-u1kLvct9jQQCeUXzlEjdgitwi1Lm8,75
|
3
3
|
spacr/app_annotate.py,sha256=W9eLPa_LZIvXsXx_-0iDFEU938LBDvRy6prXo0qF4KQ,2533
|
4
4
|
spacr/app_classify.py,sha256=urTP_wlZ58hSyM5a19slYlBxN0PdC-9-ga0hvq8CGWc,165
|
@@ -21,14 +21,15 @@ spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
|
|
21
21
|
spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
|
22
22
|
spacr/ml.py,sha256=GOQJH8jdTrJQwiLlDrcc9-yCxLFaMx4YD4OJs0-R5YI,77947
|
23
23
|
spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
|
24
|
-
spacr/plot.py,sha256=
|
24
|
+
spacr/plot.py,sha256=LApfosnN9gaF6eGRrPGt3uZIwSwAT7kgRbMnUDuxx0Y,165160
|
25
25
|
spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
|
26
26
|
spacr/settings.py,sha256=LSoDNuz1m7rySh7MWXEL1xlUU4rFiCRVlGvZCSCOqzU,80085
|
27
27
|
spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
|
28
|
-
spacr/
|
28
|
+
spacr/stats.py,sha256=mbhwsyIqt5upsSD346qGjdCw7CFBa0tIS7zHU9e0jNI,9536
|
29
|
+
spacr/submodules.py,sha256=3hgY8MWQTfajJbUIYmHMzYNd42d80L_0aN6bpoTUnu0,55059
|
29
30
|
spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
|
30
31
|
spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
|
31
|
-
spacr/utils.py,sha256=
|
32
|
+
spacr/utils.py,sha256=zojZlZtGwwDVDY0fgRt5XViVuJLuxadRO1IYctWm_SQ,221885
|
32
33
|
spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
|
33
34
|
spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
|
34
35
|
spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
|
@@ -151,9 +152,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
|
|
151
152
|
spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
|
152
153
|
spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
|
153
154
|
spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
|
154
|
-
spacr-0.3.
|
155
|
-
spacr-0.3.
|
156
|
-
spacr-0.3.
|
157
|
-
spacr-0.3.
|
158
|
-
spacr-0.3.
|
159
|
-
spacr-0.3.
|
155
|
+
spacr-0.3.65.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
|
156
|
+
spacr-0.3.65.dist-info/METADATA,sha256=FHAKN1FrIXWI6vqz43lT8VPSPzBpEwRIC54aQaL0Mr8,6032
|
157
|
+
spacr-0.3.65.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
158
|
+
spacr-0.3.65.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
|
159
|
+
spacr-0.3.65.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
|
160
|
+
spacr-0.3.65.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|