spacr 0.3.62__py3-none-any.whl → 0.3.65__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
spacr/__init__.py CHANGED
@@ -27,6 +27,7 @@ from . import openai
27
27
  from . import ml
28
28
  from . import toxo
29
29
  from . import cellpose
30
+ from . import stats
30
31
  from . import logger
31
32
 
32
33
  __all__ = [
@@ -57,6 +58,7 @@ __all__ = [
57
58
  "ml",
58
59
  "toxo",
59
60
  "cellpose",
61
+ "stats",
60
62
  "logger"
61
63
  ]
62
64
 
spacr/io.py CHANGED
@@ -2551,6 +2551,7 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
2551
2551
  png_list_g_df_non_numeric.drop(columns=['plate','row_name','column_name','field','file_name','cell_id', 'prcf'], inplace=True)
2552
2552
  if verbose:
2553
2553
  print(f'png_list: {len(png_list)}, png_list grouped: {len(png_list_g_df_numeric)}')
2554
+ print(f"Added png_list columns: {png_list_g_df_numeric.columns}, {png_list_g_df_non_numeric.columns}")
2554
2555
  merged_df = merged_df.merge(png_list_g_df_numeric, left_index=True, right_index=True)
2555
2556
  merged_df = merged_df.merge(png_list_g_df_non_numeric, left_index=True, right_index=True)
2556
2557
 
@@ -2562,7 +2563,8 @@ def _read_and_merge_data(locs, tables, verbose=False, nuclei_limit=10, pathogen_
2562
2563
  metadata.set_index('prcfo', inplace=True)
2563
2564
 
2564
2565
  # Merge metadata with final merged DataFrame
2565
- merged_df = metadata.merge(merged_df, left_index=True, right_index=True).dropna(axis=1)
2566
+ #merged_df = metadata.merge(merged_df, left_index=True, right_index=True).dropna(axis=1)
2567
+ merged_df = metadata.merge(merged_df, left_index=True, right_index=True)
2566
2568
  merged_df.drop(columns=['label_list_morphology', 'label_list_intensity'], errors='ignore', inplace=True)
2567
2569
 
2568
2570
  if verbose:
spacr/ml.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
3
  import numpy as np
4
4
  from scipy import stats
5
5
  from scipy.stats import shapiro
6
+ from math import pi
6
7
 
7
8
  from sklearn.linear_model import Lasso, Ridge, LassoCV, RidgeCV
8
9
  from sklearn.metrics import mean_squared_error
@@ -1515,3 +1516,207 @@ def _calculate_similarity(df, features, col_to_compare, val1, val2):
1515
1516
 
1516
1517
  return df
1517
1518
 
1519
+ def interperate_vision_model(settings={}):
1520
+
1521
+ from .io import _read_and_merge_data, _results_to_csv
1522
+ from .settings import set_interperate_vision_model_defaults
1523
+ from .utils import save_settings
1524
+
1525
+ settings = set_interperate_vision_model_defaults(settings)
1526
+ save_settings(settings, name='interperate_vision_model', show=True)
1527
+
1528
+ # Function to create radar plot for individual and combined values
1529
+ def create_extended_radar_plot(values, labels, title):
1530
+ values = list(values) + [values[0]] # Close the loop for radar chart
1531
+ angles = [n / float(len(labels)) * 2 * pi for n in range(len(labels))]
1532
+ angles += angles[:1]
1533
+
1534
+ fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
1535
+ ax.plot(angles, values, linewidth=2, linestyle='solid')
1536
+ ax.fill(angles, values, alpha=0.25)
1537
+
1538
+ ax.set_xticks(angles[:-1])
1539
+ ax.set_xticklabels(labels, fontsize=10, rotation=45, ha='right')
1540
+ plt.title(title, pad=20)
1541
+ plt.show()
1542
+
1543
+ def extract_compartment_channel(feature_name):
1544
+ # Identify compartment as the first part before an underscore
1545
+ compartment = feature_name.split('_')[0]
1546
+
1547
+ if compartment == 'cells':
1548
+ compartment = 'cell'
1549
+
1550
+ # Identify channels based on substring presence
1551
+ channels = []
1552
+ if 'channel_0' in feature_name:
1553
+ channels.append('channel_0')
1554
+ if 'channel_1' in feature_name:
1555
+ channels.append('channel_1')
1556
+ if 'channel_2' in feature_name:
1557
+ channels.append('channel_2')
1558
+ if 'channel_3' in feature_name:
1559
+ channels.append('channel_3')
1560
+
1561
+ # If multiple channels are found, join them with a '+'
1562
+ if channels:
1563
+ channel = ' + '.join(channels)
1564
+ else:
1565
+ channel = 'morphology' # Use 'morphology' if no channel identifier is found
1566
+
1567
+ return (compartment, channel)
1568
+
1569
+ def read_and_preprocess_data(settings):
1570
+
1571
+ df, _ = _read_and_merge_data(
1572
+ locs=[settings['src']+'/measurements/measurements.db'],
1573
+ tables=settings['tables'],
1574
+ verbose=True,
1575
+ nuclei_limit=settings['nuclei_limit'],
1576
+ pathogen_limit=settings['pathogen_limit']
1577
+ )
1578
+
1579
+ scores_df = pd.read_csv(settings['scores'])
1580
+
1581
+ # Clean and align columns for merging
1582
+ df['object_label'] = df['object_label'].str.replace('o', '')
1583
+
1584
+ if 'row_name' not in scores_df.columns:
1585
+ scores_df['row_name'] = scores_df['row']
1586
+
1587
+ if 'column_name' not in scores_df.columns:
1588
+ scores_df['column_name'] = scores_df['col']
1589
+
1590
+ if 'object_label' not in scores_df.columns:
1591
+ scores_df['object_label'] = scores_df['object']
1592
+
1593
+ # Remove the 'o' prefix from 'object_label' in df, ensuring it is a string type
1594
+ df['object_label'] = df['object_label'].str.replace('o', '').astype(str)
1595
+
1596
+ # Ensure 'object_label' in scores_df is also a string
1597
+ scores_df['object_label'] = scores_df['object'].astype(str)
1598
+
1599
+ # Ensure all join columns have the same data type in both DataFrames
1600
+ df[['plate', 'row_name', 'column_name', 'field', 'object_label']] = df[['plate', 'row_name', 'column_name', 'field', 'object_label']].astype(str)
1601
+ scores_df[['plate', 'row_name', 'column_name', 'field', 'object_label']] = scores_df[['plate', 'row_name', 'column_name', 'field', 'object_label']].astype(str)
1602
+
1603
+ # Select only the necessary columns from scores_df for merging
1604
+ scores_df = scores_df[['plate', 'row_name', 'column_name', 'field', 'object_label', settings['score_column']]]
1605
+
1606
+ # Now merge DataFrames
1607
+ merged_df = pd.merge(df, scores_df, on=['plate', 'row_name', 'column_name', 'field', 'object_label'], how='inner')
1608
+
1609
+ # Separate numerical features and the score column
1610
+ X = merged_df.select_dtypes(include='number').drop(columns=[settings['score_column']])
1611
+ y = merged_df[settings['score_column']]
1612
+
1613
+ return X, y, merged_df
1614
+
1615
+ X, y, merged_df = read_and_preprocess_data(settings)
1616
+
1617
+ # Step 1: Feature Importance using Random Forest
1618
+ if settings['feature_importance'] or settings['feature_importance']:
1619
+ model = RandomForestClassifier(random_state=42, n_jobs=settings['n_jobs'])
1620
+ model.fit(X, y)
1621
+
1622
+ if settings['feature_importance']:
1623
+ print(f"Feature Importance ...")
1624
+ feature_importances = model.feature_importances_
1625
+ feature_importance_df = pd.DataFrame({'feature': X.columns, 'importance': feature_importances})
1626
+ feature_importance_df = feature_importance_df.sort_values(by='importance', ascending=False)
1627
+ top_feature_importance_df = feature_importance_df.head(settings['top_features'])
1628
+
1629
+ # Plot Feature Importance
1630
+ plt.figure(figsize=(10, 6))
1631
+ plt.barh(top_feature_importance_df['feature'], top_feature_importance_df['importance'])
1632
+ plt.xlabel('Importance')
1633
+ plt.title(f"Top {settings['top_features']} Features - Feature Importance")
1634
+ plt.gca().invert_yaxis()
1635
+ plt.show()
1636
+
1637
+ if settings['save']:
1638
+ _results_to_csv(feature_importance_df, filename='feature_importance.csv')
1639
+
1640
+ # Step 2: Permutation Importance
1641
+ if settings['permutation_importance']:
1642
+ print(f"Permutation Importance ...")
1643
+ perm_importance = permutation_importance(model, X, y, n_repeats=10, random_state=42, n_jobs=settings['n_jobs'])
1644
+ perm_importance_df = pd.DataFrame({'feature': X.columns, 'importance': perm_importance.importances_mean})
1645
+ perm_importance_df = perm_importance_df.sort_values(by='importance', ascending=False)
1646
+ top_perm_importance_df = perm_importance_df.head(settings['top_features'])
1647
+
1648
+ # Plot Permutation Importance
1649
+ plt.figure(figsize=(10, 6))
1650
+ plt.barh(top_perm_importance_df['feature'], top_perm_importance_df['importance'])
1651
+ plt.xlabel('Importance')
1652
+ plt.title(f"Top {settings['top_features']} Features - Permutation Importance")
1653
+ plt.gca().invert_yaxis()
1654
+ plt.show()
1655
+
1656
+ if settings['save']:
1657
+ _results_to_csv(perm_importance_df, filename='permutation_importance.csv')
1658
+
1659
+ # Step 3: SHAP Analysis
1660
+ if settings['shap']:
1661
+ print(f"SHAP Analysis ...")
1662
+
1663
+ # Select top N features based on Random Forest importance and fit the model on these features only
1664
+ top_features = feature_importance_df.head(settings['top_features'])['feature']
1665
+ X_top = X[top_features]
1666
+
1667
+ # Refit the model on this subset of features
1668
+ model = RandomForestClassifier(random_state=42, n_jobs=settings['n_jobs'])
1669
+ model.fit(X_top, y)
1670
+
1671
+ # Sample a smaller subset of rows to speed up SHAP
1672
+ if settings['shap_sample']:
1673
+ sample = int(len(X_top) / 100)
1674
+ X_sample = X_top.sample(min(sample, len(X_top)), random_state=42)
1675
+ else:
1676
+ X_sample = X_top
1677
+
1678
+ # Initialize SHAP explainer with the same subset of features
1679
+ explainer = shap.Explainer(model.predict, X_sample)
1680
+ shap_values = explainer(X_sample, max_evals=1500)
1681
+
1682
+ # Plot SHAP summary for the selected sample and top features
1683
+ shap.summary_plot(shap_values, X_sample, max_display=settings['top_features'])
1684
+
1685
+ # Convert SHAP values to a DataFrame for easier manipulation
1686
+ shap_df = pd.DataFrame(shap_values.values, columns=X_sample.columns)
1687
+
1688
+ # Apply the function to create MultiIndex columns with compartment and channel
1689
+ shap_df.columns = pd.MultiIndex.from_tuples(
1690
+ [extract_compartment_channel(feat) for feat in shap_df.columns],
1691
+ names=['compartment', 'channel']
1692
+ )
1693
+
1694
+ # Aggregate SHAP values by compartment and channel
1695
+ compartment_mean = shap_df.abs().groupby(level='compartment', axis=1).mean().mean(axis=0)
1696
+ channel_mean = shap_df.abs().groupby(level='channel', axis=1).mean().mean(axis=0)
1697
+
1698
+ # Calculate combined importance for each pair of compartments and channels
1699
+ combined_compartment = {}
1700
+ for i, comp1 in enumerate(compartment_mean.index):
1701
+ for comp2 in compartment_mean.index[i+1:]:
1702
+ combined_compartment[f"{comp1} + {comp2}"] = shap_df.loc[:, (comp1, slice(None))].abs().mean().mean() + \
1703
+ shap_df.loc[:, (comp2, slice(None))].abs().mean().mean()
1704
+
1705
+ combined_channel = {}
1706
+ for i, chan1 in enumerate(channel_mean.index):
1707
+ for chan2 in channel_mean.index[i+1:]:
1708
+ combined_channel[f"{chan1} + {chan2}"] = shap_df.loc[:, (slice(None), chan1)].abs().mean().mean() + \
1709
+ shap_df.loc[:, (slice(None), chan2)].abs().mean().mean()
1710
+
1711
+ # Prepare values and labels for radar charts
1712
+ all_compartment_importance = list(compartment_mean.values) + list(combined_compartment.values())
1713
+ all_compartment_labels = list(compartment_mean.index) + list(combined_compartment.keys())
1714
+
1715
+ all_channel_importance = list(channel_mean.values) + list(combined_channel.values())
1716
+ all_channel_labels = list(channel_mean.index) + list(combined_channel.keys())
1717
+
1718
+ # Create radar plots for compartments and channels
1719
+ create_extended_radar_plot(all_compartment_importance, all_compartment_labels, "SHAP Importance by Compartment (Individual and Combined)")
1720
+ create_extended_radar_plot(all_channel_importance, all_channel_labels, "SHAP Importance by Channel (Individual and Combined)")
1721
+
1722
+ return merged_df
spacr/plot.py CHANGED
@@ -17,7 +17,7 @@ from skimage.measure import find_contours, label, regionprops
17
17
  from skimage.segmentation import mark_boundaries
18
18
  from skimage.transform import resize as sk_resize
19
19
  import scikit_posthocs as sp
20
-
20
+ from scipy.stats import chi2_contingency
21
21
  import tifffile as tiff
22
22
 
23
23
  from scipy.stats import normaltest, ttest_ind, mannwhitneyu, f_oneway, kruskal
@@ -2609,7 +2609,7 @@ class spacrGraph:
2609
2609
  def perform_posthoc_tests(self, is_normal, unique_groups):
2610
2610
  """Perform post-hoc tests for multiple groups based on all_to_all flag."""
2611
2611
 
2612
- from .utils import choose_p_adjust_method
2612
+ from .stats import choose_p_adjust_method
2613
2613
 
2614
2614
  posthoc_results = []
2615
2615
  if is_normal and len(unique_groups) > 2 and self.all_to_all:
@@ -3688,3 +3688,127 @@ def overlay_masks_on_images(img_folder, normalize=True, resize=True, save=False,
3688
3688
  plt.axis('off')
3689
3689
  plt.show()
3690
3690
 
3691
+ def graph_importance(settings):
3692
+
3693
+ from .settings import set_graph_importance_defaults
3694
+ from .utils import save_settings
3695
+
3696
+ if not isinstance(settings['csvs'], list):
3697
+ settings['csvs'] = settings['csvs']
3698
+
3699
+ settings['src'] = os.path.dirname(settings['csvs'][0])
3700
+
3701
+ settings = set_graph_importance_defaults(settings)
3702
+ save_settings(settings, name='graph_importance')
3703
+
3704
+ dfs = []
3705
+ for path in settings['csvs']:
3706
+ dft = pd.read_csv(path)
3707
+ dfs.append(dft)
3708
+
3709
+ df = pd.concat(dfs)
3710
+
3711
+ if not all(col in df.columns for col in (settings['grouping_column'], settings['data_column'])):
3712
+ print(f"grouping {settings['grouping_column']} and data {settings['data_column']} columns must be in {df.columns.to_list()}")
3713
+ return
3714
+
3715
+ output_dir = os.path.dirname(settings['csvs'][0])
3716
+
3717
+ spacr_graph = spacrGraph(
3718
+ df=df,
3719
+ grouping_column=settings['grouping_column'],
3720
+ data_column=settings['data_column'],
3721
+ graph_type=settings['graph_type'],
3722
+ graph_name=settings['grouping_column'],
3723
+ summary_func='mean',
3724
+ colors=None,
3725
+ output_dir=output_dir,
3726
+ save=settings['save'],
3727
+ y_lim=None,
3728
+ error_bar_type='std',
3729
+ representation='object',
3730
+ theme='muted',
3731
+ )
3732
+
3733
+ # Create the plot
3734
+ spacr_graph.create_plot()
3735
+
3736
+ # Get the figure object if needed
3737
+ fig = spacr_graph.get_figure()
3738
+ plt.show()
3739
+
3740
+ def plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object'):
3741
+ """
3742
+ Generate a stacked bar plot for proportions and perform chi-squared and pairwise tests.
3743
+
3744
+ Parameters:
3745
+ - settings (dict): Analysis settings.
3746
+ - df (DataFrame): Input data.
3747
+ - group_column (str): Column indicating the groups.
3748
+ - bin_column (str): Column indicating the categories.
3749
+ - prc_column (str): Optional; column for additional stratification.
3750
+ - level (str): Level of aggregation ('well' or 'object').
3751
+
3752
+ Returns:
3753
+ - chi2 (float): Chi-squared statistic for the overall test.
3754
+ - p (float): p-value for the overall chi-squared test.
3755
+ - dof (int): Degrees of freedom for the overall chi-squared test.
3756
+ - expected (ndarray): Expected frequencies for the overall chi-squared test.
3757
+ - raw_counts (DataFrame): Contingency table of observed counts.
3758
+ - fig (Figure): The generated plot.
3759
+ - pairwise_results (list): Pairwise test results from `chi_pairwise`.
3760
+ """
3761
+
3762
+ from .stats import chi_pairwise
3763
+
3764
+ # Calculate contingency table for overall chi-squared test
3765
+ raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
3766
+ chi2, p, dof, expected = chi2_contingency(raw_counts)
3767
+ print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
3768
+ print(f"p-value (raw data): {p:.4e}")
3769
+
3770
+ # Perform pairwise comparisons
3771
+ pairwise_results = chi_pairwise(raw_counts, verbose=settings.get('verbose', False))
3772
+
3773
+ # Plot based on level setting
3774
+ if level == 'well':
3775
+ # Aggregate by well for mean ± SD visualization
3776
+ well_proportions = (
3777
+ df.groupby([group_column, prc_column, bin_column])
3778
+ .size()
3779
+ .groupby(level=[0, 1])
3780
+ .apply(lambda x: x / x.sum())
3781
+ .unstack(fill_value=0)
3782
+ )
3783
+ mean_proportions = well_proportions.groupby(group_column).mean()
3784
+ std_proportions = well_proportions.groupby(group_column).std()
3785
+
3786
+ ax = mean_proportions.plot(
3787
+ kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap='viridis', figsize=(12, 8)
3788
+ )
3789
+ plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
3790
+ else:
3791
+ # Object-level plotting without aggregation
3792
+ group_counts = df.groupby([group_column, bin_column]).size()
3793
+ group_totals = group_counts.groupby(level=0).sum()
3794
+ proportions = group_counts / group_totals
3795
+ proportion_df = proportions.unstack(fill_value=0)
3796
+
3797
+ ax = proportion_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8))
3798
+ plt.title('Proportion of Volume Bins by Group')
3799
+
3800
+ plt.xlabel('Group')
3801
+ plt.ylabel('Proportion')
3802
+
3803
+ # Update legend with formatted labels, maintaining correct order
3804
+ plt.legend(title=f'Classes', bbox_to_anchor=(1.05, 1), loc='upper left')
3805
+ plt.ylim(0, 1)
3806
+ fig = plt.gcf()
3807
+
3808
+ results_df = pd.DataFrame({
3809
+ 'chi_squared_stat': [chi2],
3810
+ 'p_value': [p],
3811
+ 'degrees_of_freedom': [dof]
3812
+ })
3813
+
3814
+ return results_df, pairwise_results, fig
spacr/settings.py CHANGED
@@ -1370,4 +1370,68 @@ def get_analyze_plaque_settings(settings):
1370
1370
  settings.setdefault('rescale', False)
1371
1371
  settings.setdefault('resample', False)
1372
1372
  settings.setdefault('fill_in', True)
1373
+ return settings
1374
+
1375
+ def set_graph_importance_defaults(settings):
1376
+ settings.setdefault('csvs','list of paths')
1377
+ settings.setdefault('grouping_column','compartment')
1378
+ settings.setdefault('data_column','compartment_importance_sum')
1379
+ settings.setdefault('graph_type','jitter_bar')
1380
+ settings.setdefault('save',False)
1381
+ return settings
1382
+
1383
+ def set_interperate_vision_model_defaults(settings):
1384
+ settings.setdefault('src','path')
1385
+ settings.setdefault('scores','path')
1386
+ settings.setdefault('tables',['cell', 'nucleus', 'pathogen','cytoplasm'])
1387
+ settings.setdefault('feature_importance',True)
1388
+ settings.setdefault('permutation_importance',False)
1389
+ settings.setdefault('shap',True)
1390
+ settings.setdefault('save',False)
1391
+ settings.setdefault('nuclei_limit',1000)
1392
+ settings.setdefault('pathogen_limit',1000)
1393
+ settings.setdefault('top_features',30)
1394
+ settings.setdefault('shap_sample',True)
1395
+ settings.setdefault('n_jobs',-1)
1396
+ settings.setdefault('shap_approximate',True)
1397
+ settings.setdefault('score_column','cv_predictions')
1398
+ return settings
1399
+
1400
+ def set_analyze_endodyogeny_defaults(settings):
1401
+ settings.setdefault('src','path')
1402
+ settings.setdefault('tables',['cell', 'nucleus', 'pathogen', 'cytoplasm'])
1403
+ settings.setdefault('cell_types',['Hela'])
1404
+ settings.setdefault('cell_plate_metadata',None)
1405
+ settings.setdefault('pathogen_types',['nc', 'pc'])
1406
+ settings.setdefault('pathogen_plate_metadata',[['c1'], ['c2']])
1407
+ settings.setdefault('treatments',None)
1408
+ settings.setdefault('treatment_plate_metadata',None)
1409
+ settings.setdefault('min_area_bin',500)
1410
+ settings.setdefault('group_column','pathogen')
1411
+ settings.setdefault('compartment','pathogen')
1412
+ settings.setdefault('pathogen_limit',1)
1413
+ settings.setdefault('nuclei_limit',10)
1414
+ settings.setdefault('level','object')
1415
+ settings.setdefault('um_per_px',0.1)
1416
+ settings.setdefault('max_bins',None)
1417
+ settings.setdefault('save',False)
1418
+ settings.setdefault('verbose',False)
1419
+ return settings
1420
+
1421
+ def set_analyze_class_proportion_defaults(settings):
1422
+ settings.setdefault('src','path')
1423
+ settings.setdefault('tables',['cell', 'nucleus', 'pathogen', 'cytoplasm'])
1424
+ settings.setdefault('cell_types',['Hela'])
1425
+ settings.setdefault('cell_plate_metadata',None)
1426
+ settings.setdefault('pathogen_types',['nc','pc'])
1427
+ settings.setdefault('pathogen_plate_metadata',[['c1'],['c2']])
1428
+ settings.setdefault('treatments',None)
1429
+ settings.setdefault('treatment_plate_metadata',None)
1430
+ settings.setdefault('group_column','condition')
1431
+ settings.setdefault('class_column','test')
1432
+ settings.setdefault('pathogen_limit',1000)
1433
+ settings.setdefault('nuclei_limit',1000)
1434
+ settings.setdefault('level','well')
1435
+ settings.setdefault('save',False)
1436
+ settings.setdefault('verbose', False)
1373
1437
  return settings
spacr/stats.py ADDED
@@ -0,0 +1,221 @@
1
+ from scipy.stats import shapiro, normaltest, levene, ttest_ind, mannwhitneyu, kruskal, f_oneway
2
+ from statsmodels.stats.multicomp import pairwise_tukeyhsd
3
+ import scikit_posthocs as sp
4
+ import numpy as np
5
+ import pandas as pd
6
+ from scipy.stats import chi2_contingency, fisher_exact
7
+ import itertools
8
+ from statsmodels.stats.multitest import multipletests
9
+
10
+
11
+ def choose_p_adjust_method(num_groups, num_data_points):
12
+ """
13
+ Selects the most appropriate p-value adjustment method based on data characteristics.
14
+
15
+ Parameters:
16
+ - num_groups: Number of unique groups being compared
17
+ - num_data_points: Number of data points per group (assuming balanced groups)
18
+
19
+ Returns:
20
+ - A string representing the recommended p-adjustment method
21
+ """
22
+ num_comparisons = (num_groups * (num_groups - 1)) // 2 # Number of pairwise comparisons
23
+
24
+ # Decision logic for choosing the adjustment method
25
+ if num_comparisons <= 10 and num_data_points > 5:
26
+ return 'holm' # Balanced between power and Type I error control
27
+ elif num_comparisons > 10 and num_data_points <= 5:
28
+ return 'fdr_bh' # FDR control for large number of comparisons and small sample size
29
+ elif num_comparisons <= 10:
30
+ return 'sidak' # Less conservative than Bonferroni, good for independent comparisons
31
+ else:
32
+ return 'bonferroni' # Very conservative, use for strict control of Type I errors
33
+
34
+ def perform_normality_tests(df, grouping_column, data_columns):
35
+ """Perform normality tests for each group and data column."""
36
+ unique_groups = df[grouping_column].unique()
37
+ normality_results = []
38
+
39
+ for column in data_columns:
40
+ for group in unique_groups:
41
+ data = df.loc[df[grouping_column] == group, column].dropna()
42
+ n_samples = len(data)
43
+
44
+ if n_samples < 3:
45
+ # Skip test if there aren't enough data points
46
+ print(f"Skipping normality test for group '{group}' on column '{column}' - Not enough data.")
47
+ normality_results.append({
48
+ 'Comparison': f'Normality test for {group} on {column}',
49
+ 'Test Statistic': None,
50
+ 'p-value': None,
51
+ 'Test Name': 'Skipped',
52
+ 'Column': column,
53
+ 'n': n_samples
54
+ })
55
+ continue
56
+
57
+ # Choose the appropriate normality test based on the sample size
58
+ if n_samples >= 8:
59
+ stat, p_value = normaltest(data)
60
+ test_name = "D'Agostino-Pearson test"
61
+ else:
62
+ stat, p_value = shapiro(data)
63
+ test_name = "Shapiro-Wilk test"
64
+
65
+ normality_results.append({
66
+ 'Comparison': f'Normality test for {group} on {column}',
67
+ 'Test Statistic': stat,
68
+ 'p-value': p_value,
69
+ 'Test Name': test_name,
70
+ 'Column': column,
71
+ 'n': n_samples
72
+ })
73
+
74
+ # Check if all groups are normally distributed (p > 0.05)
75
+ normal_p_values = [result['p-value'] for result in normality_results if result['Column'] == column and result['p-value'] is not None]
76
+ is_normal = all(p > 0.05 for p in normal_p_values)
77
+
78
+ return is_normal, normality_results
79
+
80
+
81
+ def perform_levene_test(df, grouping_column, data_column):
82
+ """Perform Levene's test for equal variance."""
83
+ unique_groups = df[grouping_column].unique()
84
+ grouped_data = [df.loc[df[grouping_column] == group, data_column].dropna() for group in unique_groups]
85
+ stat, p_value = levene(*grouped_data)
86
+ return stat, p_value
87
+
88
+ def perform_statistical_tests(df, grouping_column, data_columns, paired=False):
89
+ """Perform statistical tests for each data column."""
90
+ unique_groups = df[grouping_column].unique()
91
+ test_results = []
92
+
93
+ for column in data_columns:
94
+ grouped_data = [df.loc[df[grouping_column] == group, column].dropna() for group in unique_groups]
95
+ if len(unique_groups) == 2: # For two groups
96
+ if paired:
97
+ print("Performing paired tests (not implemented in this template).")
98
+ continue # Extend as needed
99
+ else:
100
+ # Check normality for two groups
101
+ is_normal, _ = perform_normality_tests(df, grouping_column, [column])
102
+ if is_normal:
103
+ stat, p = ttest_ind(grouped_data[0], grouped_data[1])
104
+ test_name = 'T-test'
105
+ else:
106
+ stat, p = mannwhitneyu(grouped_data[0], grouped_data[1])
107
+ test_name = 'Mann-Whitney U test'
108
+ else:
109
+ # Check normality for multiple groups
110
+ is_normal, _ = perform_normality_tests(df, grouping_column, [column])
111
+ if is_normal:
112
+ stat, p = f_oneway(*grouped_data)
113
+ test_name = 'One-way ANOVA'
114
+ else:
115
+ stat, p = kruskal(*grouped_data)
116
+ test_name = 'Kruskal-Wallis test'
117
+
118
+ test_results.append({
119
+ 'Column': column,
120
+ 'Test Name': test_name,
121
+ 'Test Statistic': stat,
122
+ 'p-value': p,
123
+ 'Groups': len(unique_groups)
124
+ })
125
+
126
+ return test_results
127
+
128
+
129
+ def perform_posthoc_tests(df, grouping_column, data_column, is_normal):
130
+ """Perform post-hoc tests for multiple groups with both original and adjusted p-values."""
131
+ unique_groups = df[grouping_column].unique()
132
+ posthoc_results = []
133
+
134
+ if len(unique_groups) > 2:
135
+ num_groups = len(unique_groups)
136
+ num_data_points = len(df[data_column].dropna()) // num_groups # Assuming roughly equal data points per group
137
+ p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
138
+
139
+ if is_normal:
140
+ # Tukey's HSD automatically adjusts p-values
141
+ tukey_result = pairwise_tukeyhsd(df[data_column], df[grouping_column], alpha=0.05)
142
+ for comparison, p_value in zip(tukey_result._results_table.data[1:], tukey_result.pvalues):
143
+ posthoc_results.append({
144
+ 'Comparison': f"{comparison[0]} vs {comparison[1]}",
145
+ 'Original p-value': None, # Tukey HSD does not provide raw p-values
146
+ 'Adjusted p-value': p_value,
147
+ 'Adjusted Method': 'Tukey HSD',
148
+ 'Test Name': 'Tukey HSD'
149
+ })
150
+ else:
151
+ # Dunn's test with p-value adjustment
152
+ raw_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=None)
153
+ adjusted_dunn_result = sp.posthoc_dunn(df, val_col=data_column, group_col=grouping_column, p_adjust=p_adjust_method)
154
+ for i, group_a in enumerate(adjusted_dunn_result.index):
155
+ for j, group_b in enumerate(adjusted_dunn_result.columns):
156
+ if i < j: # Only consider unique pairs
157
+ posthoc_results.append({
158
+ 'Comparison': f"{group_a} vs {group_b}",
159
+ 'Original p-value': raw_dunn_result.iloc[i, j],
160
+ 'Adjusted p-value': adjusted_dunn_result.iloc[i, j],
161
+ 'Adjusted Method': p_adjust_method,
162
+ 'Test Name': "Dunn's Post-hoc"
163
+ })
164
+
165
+ return posthoc_results
166
+
167
+ def chi_pairwise(raw_counts, verbose=False):
168
+ """
169
+ Perform pairwise chi-square or Fisher's exact tests between all unique group pairs
170
+ and apply p-value correction.
171
+
172
+ Parameters:
173
+ - raw_counts (DataFrame): Contingency table with group-wise counts.
174
+ - verbose (bool): Whether to print results for each pair.
175
+
176
+ Returns:
177
+ - pairwise_df (DataFrame): DataFrame with pairwise test results, including corrected p-values.
178
+ """
179
+ pairwise_results = []
180
+ groups = raw_counts.index.unique() # Use index from raw_counts for group pairs
181
+ raw_p_values = [] # Store raw p-values for correction later
182
+
183
+ # Calculate the number of groups and average number of data points per group
184
+ num_groups = len(groups)
185
+ num_data_points = raw_counts.sum(axis=1).mean() # Average total data points per group
186
+ p_adjust_method = choose_p_adjust_method(num_groups, num_data_points)
187
+
188
+ for group1, group2 in itertools.combinations(groups, 2):
189
+ contingency_table = raw_counts.loc[[group1, group2]].values
190
+ if contingency_table.shape[1] == 2: # Fisher's Exact Test for 2x2 tables
191
+ oddsratio, p_value = fisher_exact(contingency_table)
192
+ test_name = "Fisher's Exact Test"
193
+ else: # Chi-Square Test for larger tables
194
+ chi2_stat, p_value, _, _ = chi2_contingency(contingency_table)
195
+ test_name = 'Pairwise Chi-Square Test'
196
+
197
+ pairwise_results.append({
198
+ 'Group 1': group1,
199
+ 'Group 2': group2,
200
+ 'Test Name': test_name,
201
+ 'p-value': p_value
202
+ })
203
+ raw_p_values.append(p_value)
204
+
205
+ # Apply p-value correction
206
+ corrected_p_values = multipletests(raw_p_values, method=p_adjust_method)[1]
207
+
208
+ # Add corrected p-values to results
209
+ for i, result in enumerate(pairwise_results):
210
+ result['p-value_adj'] = corrected_p_values[i]
211
+
212
+ pairwise_df = pd.DataFrame(pairwise_results)
213
+
214
+ pairwise_df['adj'] = p_adjust_method
215
+
216
+ if verbose:
217
+ # Print pairwise results
218
+ print("\nPairwise Frequency Analysis Results:")
219
+ print(pairwise_df.to_string(index=False))
220
+
221
+ return pairwise_df
spacr/submodules.py CHANGED
@@ -10,6 +10,7 @@ from IPython.display import display
10
10
  from sklearn.ensemble import RandomForestClassifier
11
11
  from sklearn.inspection import permutation_importance
12
12
  from math import pi
13
+ from scipy.stats import chi2_contingency
13
14
 
14
15
  import matplotlib.pyplot as plt
15
16
  from natsort import natsorted
@@ -844,4 +845,277 @@ def interperate_vision_model(settings={}):
844
845
  df.to_csv(save_path)
845
846
  print(f"Saved {save_path}")
846
847
 
847
- return output
848
+ return output
849
+
850
+ def _plot_proportion_stacked_bars(settings, df, group_column, bin_column, prc_column='prc', level='object'):
851
+ # Always calculate chi-squared on raw data
852
+ raw_counts = df.groupby([group_column, bin_column]).size().unstack(fill_value=0)
853
+ chi2, p, dof, expected = chi2_contingency(raw_counts)
854
+ print(f"Chi-squared test statistic (raw data): {chi2:.4f}")
855
+ print(f"p-value (raw data): {p:.4e}")
856
+
857
+ # Extract bin labels and indices for formatting the legend in the correct order
858
+ bin_labels = df[bin_column].cat.categories if pd.api.types.is_categorical_dtype(df[bin_column]) else sorted(df[bin_column].unique())
859
+ bin_indices = range(1, len(bin_labels) + 1)
860
+ legend_labels = [f"{index}: {label}" for index, label in zip(bin_indices, bin_labels)]
861
+
862
+ # Plot based on level setting
863
+ if level == 'well':
864
+ # Aggregate by well for mean ± SD visualization
865
+ well_proportions = (
866
+ df.groupby([group_column, prc_column, bin_column])
867
+ .size()
868
+ .groupby(level=[0, 1])
869
+ .apply(lambda x: x / x.sum())
870
+ .unstack(fill_value=0)
871
+ )
872
+ mean_proportions = well_proportions.groupby(group_column).mean()
873
+ std_proportions = well_proportions.groupby(group_column).std()
874
+
875
+ ax = mean_proportions.plot(
876
+ kind='bar', stacked=True, yerr=std_proportions, capsize=5, colormap='viridis', figsize=(12, 8)
877
+ )
878
+ plt.title('Proportion of Volume Bins by Group (Mean ± SD across wells)')
879
+ else:
880
+ # Object-level plotting without aggregation
881
+ group_counts = df.groupby([group_column, bin_column]).size()
882
+ group_totals = group_counts.groupby(level=0).sum()
883
+ proportions = group_counts / group_totals
884
+ proportion_df = proportions.unstack(fill_value=0)
885
+
886
+ ax = proportion_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 8))
887
+ plt.title('Proportion of Volume Bins by Group')
888
+
889
+ plt.xlabel('Group')
890
+ plt.ylabel('Proportion')
891
+
892
+ # Update legend with formatted labels, maintaining correct order
893
+ volume_unit = "px³" if settings['um_per_px'] is None else "µm³"
894
+ plt.legend(legend_labels, title=f'Volume Range ({volume_unit})', bbox_to_anchor=(1.05, 1), loc='upper left')
895
+ plt.ylim(0, 1)
896
+ fig = plt.gcf()
897
+ return chi2, p, dof, expected, raw_counts, fig
898
+
899
+ def analyze_endodyogeny(settings):
900
+
901
+ from .utils import annotate_conditions, save_settings
902
+ from .io import _read_and_merge_data
903
+ from .settings import set_analyze_endodyogeny_defaults
904
+ from .plot import plot_proportion_stacked_bars
905
+
906
+ def _calculate_volume_bins(df, compartment='pathogen', min_area_bin=500, max_bins=None, verbose=False):
907
+ area_column = f'{compartment}_area'
908
+ df[f'{compartment}_volume'] = df[area_column] ** 1.5
909
+ min_volume_bin = min_area_bin ** 1.5
910
+ max_volume = df[f'{compartment}_volume'].max()
911
+
912
+ # Generate bin edges as floats, and filter out any duplicate edges
913
+ bins = [min_volume_bin * (2 ** i) for i in range(int(np.ceil(np.log2(max_volume / min_volume_bin)) + 1))]
914
+ bins = sorted(set(bins)) # Ensure bin edges are unique
915
+
916
+ # Create bin labels as ranges with decimal precision for float values (e.g., "500.0-1000.0")
917
+ bin_labels = [f"{bins[i]:.2f}-{bins[i+1]:.2f}" for i in range(len(bins) - 1)]
918
+ if verbose:
919
+ print('Volume bins:', bins)
920
+ print('Volume bin labels:', bin_labels)
921
+
922
+ # Apply the bins to create a new column with the binned labels
923
+ df[f'{compartment}_volume_bin'] = pd.cut(df[f'{compartment}_volume'], bins=bins, labels=bin_labels, right=False)
924
+
925
+ # Create a bin index column (numeric version of bins)
926
+ df['bin_index'] = pd.cut(df[f'{compartment}_volume'], bins=bins, labels=range(1, len(bins)), right=False).astype(int)
927
+
928
+ # Adjust bin indices and labels based on max_bins
929
+ if max_bins is not None:
930
+ df.loc[df['bin_index'] > max_bins, 'bin_index'] = max_bins
931
+
932
+ # Update bin labels to reflect capped bins
933
+ bin_labels = bin_labels[:max_bins - 1] + [f">{bins[max_bins - 1]:.2f}"]
934
+ df[f'{compartment}_volume_bin'] = df['bin_index'].map(
935
+ {i + 1: label for i, label in enumerate(bin_labels)}
936
+ )
937
+
938
+ if verbose:
939
+ print(df[[f'{compartment}_volume', f'{compartment}_volume_bin', 'bin_index']].head())
940
+
941
+ return df
942
+
943
+ settings = set_analyze_endodyogeny_defaults(settings)
944
+ save_settings(settings, name='analyze_endodyogeny', show=True)
945
+ output = {}
946
+
947
+ # Process data
948
+ if not isinstance(settings['src'], list):
949
+ settings['src'] = [settings['src']]
950
+
951
+ locs = []
952
+ for s in settings['src']:
953
+ loc = os.path.join(s, 'measurements/measurements.db')
954
+ locs.append(loc)
955
+
956
+ df, _ = _read_and_merge_data(
957
+ locs,
958
+ tables=settings['tables'],
959
+ verbose=settings['verbose'],
960
+ nuclei_limit=settings['nuclei_limit'],
961
+ pathogen_limit=settings['pathogen_limit']
962
+ )
963
+
964
+ if not settings['um_per_px'] is None:
965
+ df[f"{settings['compartment']}_area"] = df[f"{settings['compartment']}_area"] * (settings['um_per_px'] ** 2)
966
+ settings['min_area_bin'] = settings['min_area_bin'] * (settings['um_per_px'] ** 2)
967
+
968
+ df = df[df[f"{settings['compartment']}_area"] >= settings['min_area_bin']]
969
+
970
+ df = annotate_conditions(
971
+ df=df,
972
+ cells=settings['cell_types'],
973
+ cell_loc=settings['cell_plate_metadata'],
974
+ pathogens=settings['pathogen_types'],
975
+ pathogen_loc=settings['pathogen_plate_metadata'],
976
+ treatments=settings['treatments'],
977
+ treatment_loc=settings['treatment_plate_metadata']
978
+ )
979
+
980
+ if settings['group_column'] not in df.columns:
981
+ print(f"{settings['group_column']} not found in DataFrame, please choose from:")
982
+ for col in df.columns:
983
+ print(col)
984
+
985
+ df = df.dropna(subset=[settings['group_column']])
986
+ df = _calculate_volume_bins(df, settings['compartment'], settings['min_area_bin'], settings['max_bins'], settings['verbose'])
987
+ output['data'] = df
988
+ # Perform chi-squared test and plot
989
+ results_df, pairwise_results_df, fig = plot_proportion_stacked_bars(settings, df, settings['group_column'], bin_column=f"{settings['compartment']}_volume_bin", level=settings['level'])
990
+
991
+ # Extract bin labels and indices for formatting the legend in the correct order
992
+ bin_labels = df[f"{settings['compartment']}_volume_bin"].cat.categories if pd.api.types.is_categorical_dtype(df[f"{settings['compartment']}_volume_bin"]) else sorted(df[f"{settings['compartment']}_volume_bin"].unique())
993
+ bin_indices = range(1, len(bin_labels) + 1)
994
+ legend_labels = [f"{index}: {label}" for index, label in zip(bin_indices, bin_labels)]
995
+
996
+ # Update legend with formatted labels, maintaining correct order
997
+ volume_unit = "px³" if settings['um_per_px'] is None else "µm³"
998
+ plt.legend(legend_labels, title=f'Volume Range ({volume_unit})', bbox_to_anchor=(1.05, 1), loc='upper left')
999
+ plt.ylim(0, 1)
1000
+
1001
+ output['chi_squared'] = results_df
1002
+
1003
+ if settings['save']:
1004
+ # Save DataFrame to CSV
1005
+ output_dir = os.path.join(settings['src'][0], 'results', 'analyze_endodyogeny')
1006
+ os.makedirs(output_dir, exist_ok=True)
1007
+ output_path = os.path.join(output_dir, 'chi_squared_results.csv')
1008
+ output_path_pairwise = os.path.join(output_dir, 'chi_squared_results.csv')
1009
+ output_path_fig = os.path.join(output_dir, 'chi_squared_results.pdf')
1010
+ fig.savefig(output_path_fig, dpi=300, bbox_inches='tight')
1011
+ results_df.to_csv(output_path, index=False)
1012
+ pairwise_results_df.to_csv(output_path_pairwise, index=False)
1013
+ print(f"Chi-squared results saved to {output_path}")
1014
+
1015
+ plt.show()
1016
+
1017
+ return output
1018
+
1019
+ def analyze_class_proportion(settings):
1020
+
1021
+ from .utils import annotate_conditions, save_settings
1022
+ from .io import _read_and_merge_data
1023
+ from .settings import set_analyze_class_proportion_defaults
1024
+ from .plot import plot_plates, plot_proportion_stacked_bars
1025
+ from .stats import perform_normality_tests, perform_levene_test, perform_statistical_tests, perform_posthoc_tests
1026
+
1027
+ settings = set_analyze_class_proportion_defaults(settings)
1028
+ save_settings(settings, name='analyze_class_proportion', show=True)
1029
+ output = {}
1030
+
1031
+ # Process data
1032
+ if not isinstance(settings['src'], list):
1033
+ settings['src'] = [settings['src']]
1034
+
1035
+ locs = []
1036
+ for s in settings['src']:
1037
+ loc = os.path.join(s, 'measurements/measurements.db')
1038
+ locs.append(loc)
1039
+
1040
+ if 'png_list' not in settings['tables']:
1041
+ settings['tables'] = settings['tables'] + ['png_list']
1042
+
1043
+ df, _ = _read_and_merge_data(
1044
+ locs,
1045
+ tables=settings['tables'],
1046
+ verbose=settings['verbose'],
1047
+ nuclei_limit=settings['nuclei_limit'],
1048
+ pathogen_limit=settings['pathogen_limit']
1049
+ )
1050
+
1051
+ df = annotate_conditions(
1052
+ df=df,
1053
+ cells=settings['cell_types'],
1054
+ cell_loc=settings['cell_plate_metadata'],
1055
+ pathogens=settings['pathogen_types'],
1056
+ pathogen_loc=settings['pathogen_plate_metadata'],
1057
+ treatments=settings['treatments'],
1058
+ treatment_loc=settings['treatment_plate_metadata']
1059
+ )
1060
+
1061
+ if settings['group_column'] not in df.columns:
1062
+ print(f"{settings['group_column']} not found in DataFrame, please choose from:")
1063
+ for col in df.columns:
1064
+ print(col)
1065
+
1066
+ df[settings['class_column']] = df[settings['class_column']].fillna(0)
1067
+ output['data'] = df
1068
+
1069
+ # Perform chi-squared test and plot
1070
+ results_df, pairwise_results, fig = plot_proportion_stacked_bars(settings, df, settings['group_column'], bin_column=settings['class_column'], level=settings['level'])
1071
+
1072
+ output['chi_squared'] = results_df
1073
+
1074
+ if settings['save']:
1075
+ output_dir = os.path.join(settings['src'][0], 'results', 'analyze_class_proportion')
1076
+ os.makedirs(output_dir, exist_ok=True)
1077
+ output_path_chi = os.path.join(output_dir, 'class_chi_squared_results.csv')
1078
+ output_path_chi_pairwise = os.path.join(output_dir, 'class_frequency_test.csv')
1079
+ output_path_data = os.path.join(output_dir, 'class_chi_squared_data.csv')
1080
+ output_path_fig = os.path.join(output_dir, 'class_chi_squared.pdf')
1081
+ fig.savefig(output_path_fig, dpi=300, bbox_inches='tight')
1082
+ results_df.to_csv(output_path_chi, index=False)
1083
+ pairwise_results.to_csv(output_path_chi_pairwise, index=False)
1084
+ df.to_csv(output_path_data, index=False)
1085
+ print(f"Chi-squared results saved to {output_path_chi}")
1086
+ print(f"Annotated data saved to {output_path_data}")
1087
+
1088
+ plt.show()
1089
+
1090
+ fig2 = plot_plates(df, variable=settings['class_column'], grouping='mean', min_max='allq', cmap='viridis', min_count=0, verbose=True, dst=None)
1091
+ if settings['save']:
1092
+ output_path_fig2 = os.path.join(output_dir, 'class_heatmap.pdf')
1093
+ fig2.savefig(output_path_fig2, dpi=300, bbox_inches='tight')
1094
+
1095
+ plt.show()
1096
+
1097
+ # Perform normality, variance, and statistical tests
1098
+ is_normal, normality_results = perform_normality_tests(df, settings['group_column'], [settings['class_column']])
1099
+ variance_stat, variance_p = perform_levene_test(df, settings['group_column'], settings['class_column'])
1100
+
1101
+ print(f"Levene's test statistic: {variance_stat:.4f}, p-value: {variance_p:.4e}")
1102
+ variance_results = {
1103
+ 'Test Statistic': variance_stat,
1104
+ 'p-value': variance_p,
1105
+ 'Test Name': "Levene's Test"
1106
+ }
1107
+
1108
+ test_results = perform_statistical_tests(df, settings['group_column'], [settings['class_column']])
1109
+ posthoc_results = perform_posthoc_tests(
1110
+ df, settings['group_column'], settings['class_column'], is_normal=is_normal
1111
+ )
1112
+
1113
+ # Save additional results
1114
+ if settings['save']:
1115
+ pd.DataFrame(normality_results).to_csv(os.path.join(output_dir, 'normality_results.csv'), index=False)
1116
+ pd.DataFrame([variance_results]).to_csv(os.path.join(output_dir, 'variance_results.csv'), index=False)
1117
+ pd.DataFrame(test_results).to_csv(os.path.join(output_dir, 'statistical_test_results.csv'), index=False)
1118
+ pd.DataFrame(posthoc_results).to_csv(os.path.join(output_dir, 'posthoc_results.csv'), index=False)
1119
+ print("Statistical analysis results saved.")
1120
+
1121
+ return output
spacr/utils.py CHANGED
@@ -5156,29 +5156,6 @@ def control_filelist(folder, mode='column', values=['01','02']):
5156
5156
  if mode is 'row_name':
5157
5157
  filtered_files = [file for file in files if file.split('_')[1][:1] in values]
5158
5158
  return filtered_files
5159
-
5160
- def choose_p_adjust_method(num_groups, num_data_points):
5161
- """
5162
- Selects the most appropriate p-value adjustment method based on data characteristics.
5163
-
5164
- Parameters:
5165
- - num_groups: Number of unique groups being compared
5166
- - num_data_points: Number of data points per group (assuming balanced groups)
5167
-
5168
- Returns:
5169
- - A string representing the recommended p-adjustment method
5170
- """
5171
- num_comparisons = (num_groups * (num_groups - 1)) // 2 # Number of pairwise comparisons
5172
-
5173
- # Decision logic for choosing the adjustment method
5174
- if num_comparisons <= 10 and num_data_points > 5:
5175
- return 'holm' # Balanced between power and Type I error control
5176
- elif num_comparisons > 10 and num_data_points <= 5:
5177
- return 'fdr_bh' # FDR control for large number of comparisons and small sample size
5178
- elif num_comparisons <= 10:
5179
- return 'sidak' # Less conservative than Bonferroni, good for independent comparisons
5180
- else:
5181
- return 'bonferroni' # Very conservative, use for strict control of Type I errors
5182
5159
 
5183
5160
  def rename_columns_in_db(db_path):
5184
5161
  with sqlite3.connect(db_path) as conn:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spacr
3
- Version: 0.3.62
3
+ Version: 0.3.65
4
4
  Summary: Spatial phenotype analysis of crisp screens (SpaCr)
5
5
  Home-page: https://github.com/EinarOlafsson/spacr
6
6
  Author: Einar Birnir Olafsson
@@ -1,4 +1,4 @@
1
- spacr/__init__.py,sha256=CZtAdU5etLcb9dVmz-4Y7Hjhw3ubjMzfjG0L5ybyFVA,1592
1
+ spacr/__init__.py,sha256=fvk5JfLpOqUA1W0yPcsVZnS9qbpXFOceFk09LKolVfw,1627
2
2
  spacr/__main__.py,sha256=bkAJJD2kjIqOP-u1kLvct9jQQCeUXzlEjdgitwi1Lm8,75
3
3
  spacr/app_annotate.py,sha256=W9eLPa_LZIvXsXx_-0iDFEU938LBDvRy6prXo0qF4KQ,2533
4
4
  spacr/app_classify.py,sha256=urTP_wlZ58hSyM5a19slYlBxN0PdC-9-ga0hvq8CGWc,165
@@ -15,20 +15,21 @@ spacr/gui.py,sha256=ARyn9Q_g8HoP-cXh1nzMLVFCKqthY4v2u9yORyaQqQE,8230
15
15
  spacr/gui_core.py,sha256=N7R7yvfK_dJhOReM_kW3Ci8Bokhi1OzsxeKqvSGdvV4,41460
16
16
  spacr/gui_elements.py,sha256=EKlvEg_4_je7jciEdR3NTgPrcTraowa2e2RUt-xqd6M,138254
17
17
  spacr/gui_utils.py,sha256=u9RoIOWpAXFEOnUlLpMQZrc1pWSg6omZsJMIhJdRv_g,41211
18
- spacr/io.py,sha256=0cBVmhqMaPkdEXib5Vhp19FC_1qfaK_NgtoImuDuwGU,142664
18
+ spacr/io.py,sha256=YlJAT6H8l4ipunMyKzjqoPcf-1AXgUmSyR1YN9WxmDI,142857
19
19
  spacr/logger.py,sha256=lJhTqt-_wfAunCPl93xE65Wr9Y1oIHJWaZMjunHUeIw,1538
20
20
  spacr/measure.py,sha256=2lK-ZcTxLM-MpXV1oZnucRD9iz5aprwahRKw9IEqshg,55085
21
21
  spacr/mediar.py,sha256=FwLvbLQW5LQzPgvJZG8Lw7GniA2vbZx6Jv6vIKu7I5c,14743
22
- spacr/ml.py,sha256=aLDeeaAl0d4-RP1CzFHPqz5br2HrFbJhvPexEm9lvSI,68198
22
+ spacr/ml.py,sha256=GOQJH8jdTrJQwiLlDrcc9-yCxLFaMx4YD4OJs0-R5YI,77947
23
23
  spacr/openai.py,sha256=5vBZ3Jl2llYcW3oaTEXgdyCB2aJujMUIO5K038z7w_A,1246
24
- spacr/plot.py,sha256=zITe54dzQRz-gk_ZT0qJyARuUWJivIBKW8V4rjUH8SE,160320
24
+ spacr/plot.py,sha256=LApfosnN9gaF6eGRrPGt3uZIwSwAT7kgRbMnUDuxx0Y,165160
25
25
  spacr/sequencing.py,sha256=ClUfwPPK6rNUbUuiEkzcwakzVyDKKUMv9ricrxT8qQY,25227
26
- spacr/settings.py,sha256=zANLspVmllDZeYjQWIfrHN3VkVgicnYGTduv30MmQ18,77257
26
+ spacr/settings.py,sha256=LSoDNuz1m7rySh7MWXEL1xlUU4rFiCRVlGvZCSCOqzU,80085
27
27
  spacr/sim.py,sha256=1xKhXimNU3ukzIw-3l9cF3Znc_brW8h20yv8fSTzvss,71173
28
- spacr/submodules.py,sha256=Xq4gjvooHN8S7cTk5PIAkd7XD2c7CMVqNpeo8GCvtHc,42489
28
+ spacr/stats.py,sha256=mbhwsyIqt5upsSD346qGjdCw7CFBa0tIS7zHU9e0jNI,9536
29
+ spacr/submodules.py,sha256=3hgY8MWQTfajJbUIYmHMzYNd42d80L_0aN6bpoTUnu0,55059
29
30
  spacr/timelapse.py,sha256=KGfG4L4-QnFfgbF7L6C5wL_3gd_rqr05Foje6RsoTBg,39603
30
31
  spacr/toxo.py,sha256=z2nT5aAze3NUIlwnBQcnkARihDwoPfqOgQIVoUluyK0,25087
31
- spacr/utils.py,sha256=vvciLh1gH0nsrCWQw3taUcDjxP59wme3gqrejeNO05w,222943
32
+ spacr/utils.py,sha256=zojZlZtGwwDVDY0fgRt5XViVuJLuxadRO1IYctWm_SQ,221885
32
33
  spacr/version.py,sha256=axH5tnGwtgSnJHb5IDhiu4Zjk5GhLyAEDRe-rnaoFOA,409
33
34
  spacr/resources/MEDIAR/.gitignore,sha256=Ff1q9Nme14JUd-4Q3jZ65aeQ5X4uttptssVDgBVHYo8,152
34
35
  spacr/resources/MEDIAR/LICENSE,sha256=yEj_TRDLUfDpHDNM0StALXIt6mLqSgaV2hcCwa6_TcY,1065
@@ -151,9 +152,9 @@ spacr/resources/icons/umap.png,sha256=dOLF3DeLYy9k0nkUybiZMe1wzHQwLJFRmgccppw-8b
151
152
  spacr/resources/images/plate1_E01_T0001F001L01A01Z01C02.tif,sha256=Tl0ZUfZ_AYAbu0up_nO0tPRtF1BxXhWQ3T3pURBCCRo,7958528
152
153
  spacr/resources/images/plate1_E01_T0001F001L01A02Z01C01.tif,sha256=m8N-V71rA1TT4dFlENNg8s0Q0YEXXs8slIn7yObmZJQ,7958528
153
154
  spacr/resources/images/plate1_E01_T0001F001L01A03Z01C03.tif,sha256=Pbhk7xn-KUP6RSIhJsxQcrHFImBm3GEpLkzx7WOc-5M,7958528
154
- spacr-0.3.62.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
155
- spacr-0.3.62.dist-info/METADATA,sha256=Ox14lWGxbXuMW36MriYHppKcZDqD_4HopfbcLAi8dLc,6032
156
- spacr-0.3.62.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
157
- spacr-0.3.62.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
158
- spacr-0.3.62.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
159
- spacr-0.3.62.dist-info/RECORD,,
155
+ spacr-0.3.65.dist-info/LICENSE,sha256=SR-2MeGc6SCM1UORJYyarSWY_A-JaOMFDj7ReSs9tRM,1083
156
+ spacr-0.3.65.dist-info/METADATA,sha256=FHAKN1FrIXWI6vqz43lT8VPSPzBpEwRIC54aQaL0Mr8,6032
157
+ spacr-0.3.65.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
158
+ spacr-0.3.65.dist-info/entry_points.txt,sha256=BMC0ql9aNNpv8lUZ8sgDLQMsqaVnX5L535gEhKUP5ho,296
159
+ spacr-0.3.65.dist-info/top_level.txt,sha256=GJPU8FgwRXGzKeut6JopsSRY2R8T3i9lDgya42tLInY,6
160
+ spacr-0.3.65.dist-info/RECORD,,
File without changes