spacr 0.3.64__py3-none-any.whl → 0.3.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/__init__.py CHANGED
@@ -27,6 +27,7 @@ from . import openai
27
27
  from . import ml
28
28
  from . import toxo
29
29
  from . import cellpose
30
+ from . import stats
30
31
  from . import logger
31
32
 
32
33
  __all__ = [
@@ -57,6 +58,7 @@ __all__ = [
57
58
  "ml",
58
59
  "toxo",
59
60
  "cellpose",
61
+ "stats",
60
62
  "logger"
61
63
  ]
62
64
 
spacr/io.py CHANGED
@@ -2445,7 +2445,7 @@ def _read_and_merge_data_v1(locs, tables, verbose=False, nuclei_limit=False, pat
2445
2445
 
2446
2446
  return merged_df, obj_df_ls
2447
2447
 
2448
- def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10):
2448
+ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_limit=10, change_plate=False):
2449
2449
  from .io import _read_db
2450
2450
  from .utils import _split_data
2451
2451
 
@@ -2453,8 +2453,10 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
2453
2453
  data_dict = {table: [] for table in tables}
2454
2454
 
2455
2455
  # Extract plate DataFrames
2456
- for loc in locs:
2456
+ for idx, loc in enumerate(locs):
2457
2457
  db_dfs = _read_db(loc, tables)
2458
+ if change_plate:
2459
+ db_dfs['plate'] = f'plate{idx}'
2458
2460
  for table, df in zip(tables, db_dfs):
2459
2461
  data_dict[table].append(df)
2460
2462
 
spacr/plot.py CHANGED
@@ -17,7 +17,7 @@ from skimage.measure import find_contours, label, regionprops
17
17
  from skimage.segmentation import mark_boundaries
18
18
  from skimage.transform import resize as sk_resize
19
19
  import scikit_posthocs as sp
20
-
20
+ from scipy.stats import chi2_contingency
21
21
  import tifffile as tiff
22
22
 
23
23
  from scipy.stats import normaltest, ttest_ind, mannwhitneyu, f_oneway, kruskal
@@ -2609,7 +2609,7 @@ class spacrGraph:
2609
2609
  def perform_posthoc_tests(self, is_normal, unique_groups):
2610
2610
  """Perform post-hoc tests for multiple groups based on all_to_all flag."""
2611
2611
 
2612
- from .utils import choose_p_adjust_method
2612
+ from .stats import choose_p_adjust_method
2613
2613
 
2614
2614
  posthoc_results = []
2615
2615
  if is_normal and len(unique_groups) > 2 and self.all_to_all:
@@ -3736,3 +3736,79 @@ def graph_importance(settings):
3736
3736
  # Get the figure object if needed
3737
3737
  fig = spacr_graph.get_figure()
3738
3738
  plt.show()
3739
+
3740
+ def plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object', cmap='viridis'):
3741
+ """
3742
+ Generate a stacked bar plot for proportions and perform chi-squared and pairwise tests.
3743
+
3744
+ Parameters:
3745
+ - settings (dict): Analysis settings.
3746
+ - df (DataFrame): Input data.
3747
+ - group_column (str): Column indicating the groups.
3748
+ - bin_column (str): Column indicating the categories.
3749
+ - prc_column (str): Optional; column for additional stratification.
3750
+ - level (str): Level of aggregation ('well' or 'object').
3751
+
3752
+ Returns:
3753
+ - chi2 (float): Chi-squared statistic for the overall test.
3754
+ - p (float): p-value for the overall chi-squared test.
3755
+ - dof (int): Degrees of freedom for the overall chi-squared test.
3756
+ - expected (ndarray): Expected frequencies for the overall chi-squared test.
3757
+ - raw_counts (DataFrame): Contingency table of observed counts.
3758
+ - fig (Figure): The generated plot.
3759
+ - pairwise_results (list): Pairwise test results from `chi_pairwise`.
3760
+ """
3761
+
3762
+ from .stats import chi_pairwise
3763
+
3764
+ # Calculate contingency table for overall chi-squared test
3765
+ raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
3766
+ chi2, p, dof, expected = chi2_contingency(raw_counts)
3767
+ print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
3768
+ print(f"p-value (raw data): {p:.4e}")
3769
+
3770
+ # Perform pairwise comparisons
3771
+ pairwise_results = chi_pairwise(raw_counts, verbose=settings.get('verbose', False))
3772
+
3773
+ # Plot based on level setting
3774
+ if level == 'well':
3775
+ # Aggregate by well for mean ± SD visualization
3776
+ well_proportions = (
3777
+ df.groupby([group_column, prc_column, bin_column])
3778
+ .size()
3779
+ .groupby(level=[0, 1])
3780
+ .apply(lambda x: x / x.sum())
3781
+ .unstack(fill_value=0)
3782
+ )
3783
+ mean_proportions = well_proportions.groupby(group_column).mean()
3784
+ std_proportions = well_proportions.groupby(group_column).std()
3785
+
3786
+ ax = mean_proportions.plot(
3787
+ kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap=cmap, figsize=(12, 8)
3788
+ )
3789
+ plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
3790
+ else:
3791
+ # Object-level plotting without aggregation
3792
+ group_counts = df.groupby([group_column, bin_column]).size()
3793
+ group_totals = group_counts.groupby(level=0).sum()
3794
+ proportions = group_counts / group_totals
3795
+ proportion_df = proportions.unstack(fill_value=0)
3796
+
3797
+ ax = proportion_df.plot(kind='bar', stacked=True, colormap=cmap, figsize=(12, 8))
3798
+ plt.title('Proportion of Volume Bins by Group')
3799
+
3800
+ plt.xlabel('Group')
3801
+ plt.ylabel('Proportion')
3802
+
3803
+ # Update legend with formatted labels, maintaining correct order
3804
+ plt.legend(title=f'Classes', bbox_to_anchor=(1.05, 1), loc='upper left')
3805
+ plt.ylim(0, 1)
3806
+ fig = plt.gcf()
3807
+
3808
+ results_df = pd.DataFrame({
3809
+ 'chi_squared_stat': [chi2],
3810
+ 'p_value': [p],
3811
+ 'degrees_of_freedom': [dof]
3812
+ })
3813
+
3814
+ return results_df, pairwise_results, fig
spacr/settings.py CHANGED
@@ -1415,6 +1415,8 @@ def set_analyze_endodyogeny_defaults(settings):
1415
1415
  settings.setdefault('um_per_px',0.1)
1416
1416
  settings.setdefault('max_bins',None)
1417
1417
  settings.setdefault('save',False)
1418
+ settings.setdefault('change_plate',False)
1419
+ settings.setdefault('cmap','viridis')
1418
1420
  settings.setdefault('verbose',False)
1419
1421
  return settings
1420
1422
 
spacr/stats.py ADDED
@@ -0,0 +1,221 @@
1
+ from scipy.stats import shapiro, normaltest, levene, ttest_ind, mannwhitneyu, kruskal, f_oneway
2
+ from statsmodels.stats.multicomp import pairwise_tukeyhsd
3
+ import scikit_posthocs as sp
4
+ import numpy as np
5
+ import pandas as pd
6
+ from scipy.stats import chi2_contingency, fisher_exact
7
+ import itertools
8
+ from statsmodels.stats.multitest import multipletests
9
+
10
+
11
+ def choose_p_adjust_method(num_groups, num_data_points):
12
+ """
13
+ Selects the most appropriate p-value adjustment method based on data characteristics.
14
+
15
+ Parameters:
16
+ - num_groups: Number of unique groups being compared
17
+ - num_data_points: Number of data points per group (assuming balanced groups)
18
+
19
+ Returns:
20
+ - A string representing the recommended p-adjustment method
21
+ """
22
+ num_comparisons = (num_groups * (num_groups - 1)) // 2 # Number of pairwise comparisons
23
+
24
+ # Decision logic for choosing the adjustment method
25
+ if num_comparisons <= 10 and num_data_points > 5:
26
+ return 'holm' # Balanced between power and Type I error control
27
+ elif num_comparisons > 10 and num_data_points <= 5:
28
+ return 'fdr_bh' # FDR control for large number of comparisons and small sample size
29
+ elif num_comparisons <= 10:
30
+ return 'sidak' # Less conservative than Bonferroni, good for independent comparisons
31
+ else:
32
+ return 'bonferroni' # Very conservative, use for strict control of Type I errors
33
+
34
+ def perform_normality_tests(df, grouping_column, data_columns):
35
+ """Perform normality tests for each group and data column."""
36
+ unique_groups = df[grouping_column].unique()
37
+ normality_results = []
38
+
39
+ for column in data_columns:
40
+ for group in unique_groups:
41
+ data = df.loc[df[grouping_column] == group, column].dropna()
42
+ n_samples = len(data)
43
+
44
+ if n_samples < 3:
45
+ # Skip test if there aren't enough data points
46
+ print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
47
+ normality_results.append({
48
+ 'Comparison': f'Normality test for {group} on {column}',
49
+ 'Test Statistic': None,
50
+ 'p-value': None,
51
+ 'Test Name': 'Skipped',
52
+ 'Column': column,
53
+ 'n': n_samples
54
+ })
55
+ continue
56
+
57
+ # Choose the appropriate normality test based on the sample size
58
+ if n_samples >= 8:
59
+ stat, p_value = normaltest(data)
60
+ test_name = "D'Agostino-Pearson test"
61
+ else:
62
+ stat, p_value = shapiro(data)
63
+ test_name = "Shapiro-Wilk test"
64
+
65
+ normality_results.append({
66
+ 'Comparison': f'Normality test for {group} on {column}',
67
+ 'Test Statistic': stat,
68
+ 'p-value': p_value,
69
+ 'Test Name': test_name,
70
+ 'Column': column,
71
+ 'n': n_samples
72
+ })
73
+
74
+ # Check if all groups are normally distributed (p > 0.05)
75
+ normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
76
+ is_normal = all(p > 0.05 for p in normal_p_values)
77
+
78
+ return is_normal, normality_results
79
+
80
+
81
+ def perform_levene_test(df, grouping_column, data_column):
82
+ """Perform Levene's test for equal variance."""
83
+ unique_groups = df[grouping_column].unique()
84
+ grouped_data = [df.loc[df[grouping_column] == group, data_column].dropna() for group in unique_groups]
85
+ stat, p_value = levene(*grouped_data)
86
+ return stat, p_value
87
+
88
+ def perform_statistical_tests(df, grouping_column, data_columns, paired=False):
89
+ """Perform statistical tests for each data column."""
90
+ unique_groups = df[grouping_column].unique()
91
+ test_results = []
92
+
93
+ for column in data_columns:
94
+ grouped_data = [df.loc[df[grouping_column] == group, column].dropna() for group in unique_groups]
95
+ if len(unique_groups) == 2: # For two groups
96
+ if paired:
97
+ print("Performing paired tests (not implemented in this template).")
98
+ continue # Extend as needed
99
+ else:
100
+ # Check normality for two groups
101
+ is_normal, _ = perform_normality_tests(df, grouping_column, [column])
102
+ if is_normal:
103
+ stat, p = ttest_ind(grouped_data[0], grouped_data[1])
104
+ test_name = 'T-test'
105
+ else:
106
+ stat, p = mannwhitneyu(grouped_data[0], grouped_data[1])
107
+ test_name = 'Mann-Whitney U test'
108
+ else:
109
+ # Check normality for multiple groups
110
+ is_normal, _ = perform_normality_tests(df, grouping_column, [column])
111
+ if is_normal:
112
+ stat, p = f_oneway(*grouped_data)
113
+ test_name = 'One-way ANOVA'
114
+ else:
115
+ stat, p = kruskal(*grouped_data)
116
+ test_name = 'Kruskal-Wallis test'
117
+
118
+ test_results.append({
119
+ 'Column': column,
120
+ 'Test Name': test_name,
121
+ 'Test Statistic': stat,
122
+ 'p-value': p,
123
+ 'Groups': len(unique_groups)
124
+ })
125
+
126
+ return test_results
127
+
128
+
129
+ def perform_posthoc_tests(df, grouping_column, data_column, is_normal):
130
+ """Perform post-hoc tests for multiple groups with both original and adjusted p-values."""
131
+ unique_groups = df[grouping_column].unique()
132
+ posthoc_results = []
133
+
134
+ if len(unique_groups) > 2:
135
+ num_groups = len(unique_groups)
136
+ num_data_points = len(df[data_column].dropna()) // num_groups # Assuming roughly equal data points per group
137
+ p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
138
+
139
+ if is_normal:
140
+ # Tukey's HSD automatically adjusts p-values
141
+ tukey_result = pairwise_tukeyhsd(df[data_column], df[grouping_column], alpha=0.05)
142
+ for comparison, p_value in zip(tukey_result._results_table.data[1:], tukey_result.pvalues):
143
+ posthoc_results.append({
144
+ 'Comparison': f"{comparison[0]} vs {comparison[1]}",
145
+ 'Original p-value': None, # Tukey HSD does not provide raw p-values
146
+ 'Adjusted p-value': p_value,
147
+ 'Adjusted Method': 'Tukey HSD',
148
+ 'Test Name': 'Tukey HSD'
149
+ })
150
+ else:
151
+ # Dunn's test with p-value adjustment
152
+ raw_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=None)
153
+ adjusted_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=p_adjust_method)
154
+ for i, group_a in enumerate(adjusted_dunn_result.index):
155
+ for j, group_b in enumerate(adjusted_dunn_result.columns):
156
+ if i < j: # Only consider unique pairs
157
+ posthoc_results.append({
158
+ 'Comparison': f"{group_a} vs {group_b}",
159
+ 'Original p-value': raw_dunn_result.iloc[i, j],
160
+ 'Adjusted p-value': adjusted_dunn_result.iloc[i, j],
161
+ 'Adjusted Method': p_adjust_method,
162
+ 'Test Name': "Dunn's Post-hoc"
163
+ })
164
+
165
+ return posthoc_results
166
+
167
+ def chi_pairwise(raw_counts, verbose=False):
168
+ """
169
+ Perform pairwise chi-square or Fisher's exact tests between all unique group pairs
170
+ and apply p-value correction.
171
+
172
+ Parameters:
173
+ - raw_counts (DataFrame): Contingency table with group-wise counts.
174
+ - verbose (bool): Whether to print results for each pair.
175
+
176
+ Returns:
177
+ - pairwise_df (DataFrame): DataFrame with pairwise test results, including corrected p-values.
178
+ """
179
+ pairwise_results = []
180
+ groups = raw_counts.index.unique() # Use index from raw_counts for group pairs
181
+ raw_p_values = [] # Store raw p-values for correction later
182
+
183
+ # Calculate the number of groups and average number of data points per group
184
+ num_groups = len(groups)
185
+ num_data_points = raw_counts.sum(axis=1).mean() # Average total data points per group
186
+ p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
187
+
188
+ for group1, group2 in itertools.combinations(groups, 2):
189
+ contingency_table = raw_counts.loc[[group1, group2]].values
190
+ if contingency_table.shape[1] == 2: # Fisher's Exact Test for 2x2 tables
191
+ oddsratio, p_value = fisher_exact(contingency_table)
192
+ test_name = "Fisher's Exact Test"
193
+ else: # Chi-Square Test for larger tables
194
+ chi2_stat, p_value, _, _ = chi2_contingency(contingency_table)
195
+ test_name = 'Pairwise Chi-Square Test'
196
+
197
+ pairwise_results.append({
198
+ 'Group 1': group1,
199
+ 'Group 2': group2,
200
+ 'Test Name': test_name,
201
+ 'p-value': p_value
202
+ })
203
+ raw_p_values.append(p_value)
204
+
205
+ # Apply p-value correction
206
+ corrected_p_values = multipletests(raw_p_values, method=p_adjust_method)[1]
207
+
208
+ # Add corrected p-values to results
209
+ for i, result in enumerate(pairwise_results):
210
+ result['p-value_adj'] = corrected_p_values[i]
211
+
212
+ pairwise_df = pd.DataFrame(pairwise_results)
213
+
214
+ pairwise_df['adj'] = p_adjust_method
215
+
216
+ if verbose:
217
+ # Print pairwise results
218
+ print("\nPairwise Frequency Analysis Results:")
219
+ print(pairwise_df.to_string(index=False))
220
+
221
+ return pairwise_df
spacr/submodules.py CHANGED
@@ -847,11 +847,61 @@ def interperate_vision_model(settings={}):
847
847
 
848
848
  return output
849
849
 
850
+ def _plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object'):
851
+ # Always calculate chi-squared on raw data
852
+ raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
853
+ chi2, p, dof, expected = chi2_contingency(raw_counts)
854
+ print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
855
+ print(f"p-value (raw data): {p:.4e}")
856
+
857
+ # Extract bin labels and indices for formatting the legend in the correct order
858
+ bin_labels = df[bin_column].cat.categories if pd.api.types.is_categorical_dtype(df[bin_column]) else sorted(df[bin_column].unique())
859
+ bin_indices = range(1, len(bin_labels) + 1)
860
+ legend_labels = [f"{index}: {label}" for index, label in zip(bin_indices, bin_labels)]
861
+
862
+ # Plot based on level setting
863
+ if level == 'well':
864
+ # Aggregate by well for mean ± SD visualization
865
+ well_proportions = (
866
+ df.groupby([group_column, prc_column, bin_column])
867
+ .size()
868
+ .groupby(level=[0, 1])
869
+ .apply(lambda x: x / x.sum())
870
+ .unstack(fill_value=0)
871
+ )
872
+ mean_proportions = well_proportions.groupby(group_column).mean()
873
+ std_proportions = well_proportions.groupby(group_column).std()
874
+
875
+ ax = mean_proportions.plot(
876
+ kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap='viridis', figsize=(12, 8)
877
+ )
878
+ plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
879
+ else:
880
+ # Object-level plotting without aggregation
881
+ group_counts = df.groupby([group_column, bin_column]).size()
882
+ group_totals = group_counts.groupby(level=0).sum()
883
+ proportions = group_counts / group_totals
884
+ proportion_df = proportions.unstack(fill_value=0)
885
+
886
+ ax = proportion_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8))
887
+ plt.title('Proportion of Volume Bins by Group')
888
+
889
+ plt.xlabel('Group')
890
+ plt.ylabel('Proportion')
891
+
892
+ # Update legend with formatted labels, maintaining correct order
893
+ volume_unit = "px³" if settings['um_per_px'] is None else "µm³"
894
+ plt.legend(legend_labels, title=f'Volume Range ({volume_unit})', bbox_to_anchor=(1.05, 1), loc='upper left')
895
+ plt.ylim(0, 1)
896
+ fig = plt.gcf()
897
+ return chi2, p, dof, expected, raw_counts, fig
898
+
850
899
  def analyze_endodyogeny(settings):
851
900
 
852
901
  from .utils import annotate_conditions, save_settings
853
902
  from .io import _read_and_merge_data
854
903
  from .settings import set_analyze_endodyogeny_defaults
904
+ from .plot import plot_proportion_stacked_bars
855
905
 
856
906
  def _calculate_volume_bins(df, compartment='pathogen', min_area_bin=500, max_bins=None, verbose=False):
857
907
  area_column = f'{compartment}_area'
@@ -890,55 +940,6 @@ def analyze_endodyogeny(settings):
890
940
 
891
941
  return df
892
942
 
893
- def _plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object'):
894
- # Always calculate chi-squared on raw data
895
- raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
896
- chi2, p, dof, expected = chi2_contingency(raw_counts)
897
- print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
898
- print(f"p-value (raw data): {p:.4e}")
899
-
900
- # Extract bin labels and indices for formatting the legend in the correct order
901
- bin_labels = df[bin_column].cat.categories if pd.api.types.is_categorical_dtype(df[bin_column]) else sorted(df[bin_column].unique())
902
- bin_indices = range(1, len(bin_labels) + 1)
903
- legend_labels = [f"{index}: {label}" for index, label in zip(bin_indices, bin_labels)]
904
-
905
- # Plot based on level setting
906
- if level == 'well':
907
- # Aggregate by well for mean ± SD visualization
908
- well_proportions = (
909
- df.groupby([group_column, prc_column, bin_column])
910
- .size()
911
- .groupby(level=[0, 1])
912
- .apply(lambda x: x / x.sum())
913
- .unstack(fill_value=0)
914
- )
915
- mean_proportions = well_proportions.groupby(group_column).mean()
916
- std_proportions = well_proportions.groupby(group_column).std()
917
-
918
- ax = mean_proportions.plot(
919
- kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap='viridis', figsize=(12, 8)
920
- )
921
- plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
922
- else:
923
- # Object-level plotting without aggregation
924
- group_counts = df.groupby([group_column, bin_column]).size()
925
- group_totals = group_counts.groupby(level=0).sum()
926
- proportions = group_counts / group_totals
927
- proportion_df = proportions.unstack(fill_value=0)
928
-
929
- ax = proportion_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8))
930
- plt.title('Proportion of Volume Bins by Group')
931
-
932
- plt.xlabel('Group')
933
- plt.ylabel('Proportion')
934
-
935
- # Update legend with formatted labels, maintaining correct order
936
- volume_unit = "px³" if settings['um_per_px'] is None else "µm³"
937
- plt.legend(legend_labels, title=f'Volume Range ({volume_unit})', bbox_to_anchor=(1.05, 1), loc='upper left')
938
- plt.ylim(0, 1)
939
- fig = plt.gcf()
940
- return chi2, p, dof, expected, raw_counts, fig
941
-
942
943
  settings = set_analyze_endodyogeny_defaults(settings)
943
944
  save_settings(settings, name='analyze_endodyogeny', show=True)
944
945
  output = {}
@@ -951,13 +952,17 @@ def analyze_endodyogeny(settings):
951
952
  for s in settings['src']:
952
953
  loc = os.path.join(s, 'measurements/measurements.db')
953
954
  locs.append(loc)
955
+
956
+ if 'png_list' not in settings['tables']:
957
+ settings['tables'] = settings['tables'] + ['png_list']
954
958
 
955
959
  df, _ = _read_and_merge_data(
956
960
  locs,
957
961
  tables=settings['tables'],
958
962
  verbose=settings['verbose'],
959
963
  nuclei_limit=settings['nuclei_limit'],
960
- pathogen_limit=settings['pathogen_limit']
964
+ pathogen_limit=settings['pathogen_limit'],
965
+ change_plate=settings['change_plate']
961
966
  )
962
967
 
963
968
  if not settings['um_per_px'] is None:
@@ -984,32 +989,40 @@ def analyze_endodyogeny(settings):
984
989
  df = df.dropna(subset=[settings['group_column']])
985
990
  df = _calculate_volume_bins(df, settings['compartment'], settings['min_area_bin'], settings['max_bins'], settings['verbose'])
986
991
  output['data'] = df
992
+
993
+
994
+ if settings['level'] == 'plate':
995
+ prc_column = 'plate'
996
+ else:
997
+ prc_column = 'prc'
998
+
987
999
  # Perform chi-squared test and plot
988
- chi2, p, dof, expected, raw_counts, fig = _plot_proportion_stacked_bars(settings, df, settings['group_column'], bin_column=f"{settings['compartment']}_volume_bin", level=settings['level']
989
- )
990
-
991
- # Create a DataFrame with chi-squared test results and raw counts
992
- results_df = pd.DataFrame({
993
- 'chi_squared_stat': [chi2],
994
- 'p_value': [p],
995
- 'degrees_of_freedom': [dof]
996
- })
997
-
998
- # Flatten and add expected counts to results_df
999
- expected_df = pd.DataFrame(expected, index=raw_counts.index, columns=raw_counts.columns)
1000
- expected_flat = expected_df.stack().reset_index()
1001
- expected_flat.columns = [settings['group_column'], f"{settings['compartment']}_volume_bin", 'expected_count']
1002
- results_df = results_df.merge(expected_flat, how="cross")
1000
+ results_df, pairwise_results_df, fig = plot_proportion_stacked_bars(settings, df, settings['group_column'], bin_column=f"{settings['compartment']}_volume_bin", prc_column=prc_column, level=settings['level'], cmap=settings['cmap'])
1001
+
1002
+ # Extract bin labels and indices for formatting the legend in the correct order
1003
+ bin_labels = df[f"{settings['compartment']}_volume_bin"].cat.categories if pd.api.types.is_categorical_dtype(df[f"{settings['compartment']}_volume_bin"]) else sorted(df[f"{settings['compartment']}_volume_bin"].unique())
1004
+ bin_indices = range(1, len(bin_labels) + 1)
1005
+ legend_labels = [f"{index}: {label}" for index, label in zip(bin_indices, bin_labels)]
1006
+
1007
+ # Update legend with formatted labels, maintaining correct order
1008
+ volume_unit = "px³" if settings['um_per_px'] is None else "µm³"
1009
+ plt.legend(legend_labels, title=f'Volume Range ({volume_unit})', bbox_to_anchor=(1.05, 1), loc='upper left')
1010
+ plt.ylim(0, 1)
1011
+
1003
1012
  output['chi_squared'] = results_df
1004
1013
 
1005
1014
  if settings['save']:
1006
1015
  # Save DataFrame to CSV
1007
- output_dir = os.path.join(settings['src'][0], 'results')
1016
+ output_dir = os.path.join(settings['src'][0], 'results', 'analyze_endodyogeny')
1008
1017
  os.makedirs(output_dir, exist_ok=True)
1009
1018
  output_path = os.path.join(output_dir, 'chi_squared_results.csv')
1019
+ output_path_data = os.path.join(output_dir, 'data.csv')
1020
+ output_path_pairwise = os.path.join(output_dir, 'chi_squared_results.csv')
1010
1021
  output_path_fig = os.path.join(output_dir, 'chi_squared_results.pdf')
1011
1022
  fig.savefig(output_path_fig, dpi=300, bbox_inches='tight')
1012
1023
  results_df.to_csv(output_path, index=False)
1024
+ df.to_csv(output_path_data, index=False)
1025
+ pairwise_results_df.to_csv(output_path_pairwise, index=False)
1013
1026
  print(f"Chi-squared results saved to {output_path}")
1014
1027
 
1015
1028
  plt.show()
@@ -1021,51 +1034,8 @@ def analyze_class_proportion(settings):
1021
1034
  from .utils import annotate_conditions, save_settings
1022
1035
  from .io import _read_and_merge_data
1023
1036
  from .settings import set_analyze_class_proportion_defaults
1024
- from .plot import plot_plates
1025
-
1026
-
1027
- def _plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object'):
1028
- # Always calculate chi-squared on raw data
1029
- raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
1030
- chi2, p, dof, expected = chi2_contingency(raw_counts)
1031
- print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
1032
- print(f"p-value (raw data): {p:.4e}")
1033
-
1034
- # Plot based on level setting
1035
- if level == 'well':
1036
- # Aggregate by well for mean ± SD visualization
1037
- well_proportions = (
1038
- df.groupby([group_column, prc_column, bin_column])
1039
- .size()
1040
- .groupby(level=[0, 1])
1041
- .apply(lambda x: x / x.sum())
1042
- .unstack(fill_value=0)
1043
- )
1044
- mean_proportions = well_proportions.groupby(group_column).mean()
1045
- std_proportions = well_proportions.groupby(group_column).std()
1046
-
1047
- ax = mean_proportions.plot(
1048
- kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap='viridis', figsize=(12, 8)
1049
- )
1050
- plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
1051
- else:
1052
- # Object-level plotting without aggregation
1053
- group_counts = df.groupby([group_column, bin_column]).size()
1054
- group_totals = group_counts.groupby(level=0).sum()
1055
- proportions = group_counts / group_totals
1056
- proportion_df = proportions.unstack(fill_value=0)
1057
-
1058
- ax = proportion_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8))
1059
- plt.title('Proportion of Volume Bins by Group')
1060
-
1061
- plt.xlabel('Group')
1062
- plt.ylabel('Proportion')
1063
-
1064
- # Update legend with formatted labels, maintaining correct order
1065
- plt.legend(title=f'Classes', bbox_to_anchor=(1.05, 1), loc='upper left')
1066
- plt.ylim(0, 1)
1067
- fig = plt.gcf()
1068
- return chi2, p, dof, expected, raw_counts, fig
1037
+ from .plot import plot_plates, plot_proportion_stacked_bars
1038
+ from .stats import perform_normality_tests, perform_levene_test, perform_statistical_tests, perform_posthoc_tests
1069
1039
 
1070
1040
  settings = set_analyze_class_proportion_defaults(settings)
1071
1041
  save_settings(settings, name='analyze_class_proportion', show=True)
@@ -1110,25 +1080,20 @@ def analyze_class_proportion(settings):
1110
1080
  output['data'] = df
1111
1081
 
1112
1082
  # Perform chi-squared test and plot
1113
- chi2, p, dof, expected, raw_counts, fig = _plot_proportion_stacked_bars(settings, df, settings['group_column'], bin_column=settings['class_column'], level=settings['level'])
1114
-
1115
- # Create a DataFrame with chi-squared test results and raw counts
1116
- results_df = pd.DataFrame({
1117
- 'chi_squared_stat': [chi2],
1118
- 'p_value': [p],
1119
- 'degrees_of_freedom': [dof]
1120
- })
1083
+ results_df, pairwise_results, fig = plot_proportion_stacked_bars(settings, df, settings['group_column'], bin_column=settings['class_column'], level=settings['level'])
1121
1084
 
1122
1085
  output['chi_squared'] = results_df
1123
1086
 
1124
1087
  if settings['save']:
1125
- output_dir = os.path.join(settings['src'][0], 'results')
1088
+ output_dir = os.path.join(settings['src'][0], 'results', 'analyze_class_proportion')
1126
1089
  os.makedirs(output_dir, exist_ok=True)
1127
1090
  output_path_chi = os.path.join(output_dir, 'class_chi_squared_results.csv')
1091
+ output_path_chi_pairwise = os.path.join(output_dir, 'class_frequency_test.csv')
1128
1092
  output_path_data = os.path.join(output_dir, 'class_chi_squared_data.csv')
1129
1093
  output_path_fig = os.path.join(output_dir, 'class_chi_squared.pdf')
1130
1094
  fig.savefig(output_path_fig, dpi=300, bbox_inches='tight')
1131
1095
  results_df.to_csv(output_path_chi, index=False)
1096
+ pairwise_results.to_csv(output_path_chi_pairwise, index=False)
1132
1097
  df.to_csv(output_path_data, index=False)
1133
1098
  print(f"Chi-squared results saved to {output_path_chi}")
1134
1099
  print(f"Annotated data saved to {output_path_data}")
@@ -1141,4 +1106,29 @@ def analyze_class_proportion(settings):
1141
1106
  fig2.savefig(output_path_fig2, dpi=300, bbox_inches='tight')
1142
1107
 
1143
1108
  plt.show()
1109
+
1110
+ # Perform normality, variance, and statistical tests
1111
+ is_normal, normality_results = perform_normality_tests(df, settings['group_column'], [settings['class_column']])
1112
+ variance_stat, variance_p = perform_levene_test(df, settings['group_column'], settings['class_column'])
1113
+
1114
+ print(f"Levene's test statistic: {variance_stat:.4f}, p-value: {variance_p:.4e}")
1115
+ variance_results = {
1116
+ 'Test Statistic': variance_stat,
1117
+ 'p-value': variance_p,
1118
+ 'Test Name': "Levene's Test"
1119
+ }
1120
+
1121
+ test_results = perform_statistical_tests(df, settings['group_column'], [settings['class_column']])
1122
+ posthoc_results = perform_posthoc_tests(
1123
+ df, settings['group_column'], settings['class_column'], is_normal=is_normal
1124
+ )
1125
+
1126
+ # Save additional results
1127
+ if settings['save']:
1128
+ pd.DataFrame(normality_results).to_csv(os.path.join(output_dir, 'normality_results.csv'), index=False)
1129
+ pd.DataFrame([variance_results]).to_csv(os.path.join(output_dir, 'variance_results.csv'), index=False)
1130
+ pd.DataFrame(test_results).to_csv(os.path.join(output_dir, 'statistical_test_results.csv'), index=False)
1131
+ pd.DataFrame(posthoc_results).to_csv(os.path.join(output_dir, 'posthoc_results.csv'), index=False)
1132
+ print("Statistical analysis results saved.")
1133
+
1144
1134
  return output
spacr/utils.py CHANGED
@@ -5156,29 +5156,6 @@ def control_filelist(folder, mode='column', values=['01','02']):
5156
5156
  if mode is 'row_name':
5157
5157
  filtered_files = [file for file in files if file.split('_')[1][:1] in values]
5158
5158
  return filtered_files
5159
-
5160
- def choose_p_adjust_method(num_groups, num_data_points):
5161
- """
5162
- Selects the most appropriate p-value adjustment method based on data characteristics.
5163
-
5164
- Parameters:
5165
- - num_groups: Number of unique groups being compared
5166
- - num_data_points: Number of data points per group (assuming balanced groups)
5167
-
5168
- Returns:
5169
- - A string representing the recommended p-adjustment method
5170
- """
5171
- num_comparisons = (num_groups * (num_groups - 1)) // 2 # Number of pairwise comparisons
5172
-
5173
- # Decision logic for choosing the adjustment method
5174
- if num_comparisons <= 10 and num_data_points > 5:
5175
- return 'holm' # Balanced between power and Type I error control
5176
- elif num_comparisons > 10 and num_data_points <= 5:
5177
- return 'fdr_bh' # FDR control for large number of comparisons and small sample size
5178
- elif num_comparisons <= 10:
5179
- return 'sidak' # Less conservative than Bonferroni, good for independent comparisons
5180
- else:
5181
- return 'bonferroni' # Very conservative, use for strict control of Type I errors
5182
5159
 
5183
5160
  def rename_columns_in_db(db_path):
5184
5161
  with sqlite3.connect(db_path) as conn:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.3.64
3
+ Version: 0.3.66
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -1,4 +1,4 @@
1
- spacr/__init__.py,sha256=CZtAdU5etLcb9dVmz-4Y7Hjhw3ubjMzfjG0L5ybyFVA,1592
1
+ spacr/__init__.py,sha256=fvk5JfLpOqUA1W0yPcsVZnS9qbpXFOceFk09LKolVfw,1627
2
2
  spacr/__main__.py,sha256=bkAJJD2kjIqOP-u1kLvct9jQQCeUXzlEjdgitwi1Lm8,75
3
3
  spacr/app_annotate.py,sha256=W9eLPa_LZIvXsXx_-0iDFEU938LBDvRy6prXo0qF4KQ,2533
4
4
  spacr/app_classify.py,sha256=urTP_wlZ58hSyM5a19slYlBxN0PdC-9-ga0hvq8CGWc,165
@@ -15,20 +15,21 @@ spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
15
15
  spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
16
16
  spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
17
17
  spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
18
- spacr/io.py,sha256=YlJAT6H8l4ipunMyKzjqoPcf-1AXgUmSyR1YN9WxmDI,142857
18
+ spacr/io.py,sha256=SLJKVqe5c3dFa6a7tXA5KMGhNGjhvLbyqsPlD1AqM3g,142962
19
19
  spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
20
20
  spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
21
21
  spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
22
22
  spacr/ml.py,sha256=GOQJH8jdTrJQwiLlDrcc9-yCxLFaMx4YD4OJs0-R5YI,77947
23
23
  spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
24
- spacr/plot.py,sha256=0fne2Msy6niN80oiuwt9ZYw1QwXVnghaUmrwvEZN9-8,161992
24
+ spacr/plot.py,sha256=XPAabtZjzurL6zlG3KfqLEQTnH_jjo-k2jVajJt9om8,165166
25
25
  spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
26
- spacr/settings.py,sha256=LSoDNuz1m7rySh7MWXEL1xlUU4rFiCRVlGvZCSCOqzU,80085
26
+ spacr/settings.py,sha256=wZcqdTWaRus27wn9P0EGyftcJn_i0IwlM9pyeCVqxr8,80173
27
27
  spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
28
- spacr/submodules.py,sha256=X1OI0Dsc1qU4lqKFdF2EnloNkLkDzA1hDn7CYbkBmFc,55473
28
+ spacr/stats.py,sha256=mbhwsyIqt5upsSD346qGjdCw7CFBa0tIS7zHU9e0jNI,9536
29
+ spacr/submodules.py,sha256=SK8YEs850LAx30YAiwap7ecLpp1_p-bci6H-Or0GLoA,55500
29
30
  spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
30
31
  spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
31
- spacr/utils.py,sha256=vvciLh1gH0nsrCWQw3taUcDjxP59wme3gqrejeNO05w,222943
32
+ spacr/utils.py,sha256=zojZlZtGwwDVDY0fgRt5XViVuJLuxadRO1IYctWm_SQ,221885
32
33
  spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
33
34
  spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
34
35
  spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -151,9 +152,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
151
152
  spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
152
153
  spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
153
154
  spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
154
- spacr-0.3.64.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
155
- spacr-0.3.64.dist-info/METADATA,sha256=_07fLYI8eMAYJzOEcAVOemN4TFJAuzAvUrdX1T136T0,6032
156
- spacr-0.3.64.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
157
- spacr-0.3.64.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
158
- spacr-0.3.64.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
159
- spacr-0.3.64.dist-info/RECORD,,
155
+ spacr-0.3.66.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
156
+ spacr-0.3.66.dist-info/METADATA,sha256=A5XJI5cR864WLb08NonbMW2BEUHYn-fQgl8RMcdIK8M,6032
157
+ spacr-0.3.66.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
158
+ spacr-0.3.66.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
159
+ spacr-0.3.66.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
160
+ spacr-0.3.66.dist-info/RECORD,,
File without changes