tdaphantom 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Moriarty
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,140 @@
1
+ Metadata-Version: 2.4
2
+ Name: tdaphantom
3
+ Version: 1.0.0
4
+ Summary: Statistical hypothesis testing for persistence diagrams and barcodes
5
+ Author: W. Moriarty
6
+ License: MIT
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: numpy>=1.26
15
+ Requires-Dist: matplotlib>=3.7
16
+ Requires-Dist: gudhi>=3.11.0
17
+ Requires-Dist: ripser>=0.6.14
18
+ Dynamic: author
19
+ Dynamic: classifier
20
+ Dynamic: description
21
+ Dynamic: description-content-type
22
+ Dynamic: license
23
+ Dynamic: license-file
24
+ Dynamic: requires-dist
25
+ Dynamic: requires-python
26
+ Dynamic: summary
27
+
28
+ # TDA-PHANTOM
29
+ Topological data analysis -
30
+ Persistent Homology Analysis via Null Testing On Manifolds (TDA-PHANTOM)
31
+ is a tool for statistically analysing significance of persistence diagrams and barcodes.
32
+
33
+ This project implements hypothesis tests from:
34
+
35
+ - *Confidence Sets for Persistence Diagrams*
36
+ Fasy et al. (2014)
37
+ DOI: https://doi.org/10.1214/14-AOS1252
38
+
39
+ - *A Universal Null-Distribution for Topological Data Analysis*
40
+ Bobrowski and Skraba (2023)
41
+ DOI: https://doi.org/10.1038/s41598-023-37842-2
42
+
43
+
44
+ ## Installation
45
+
46
+ Via [PyPI](https://pypi.org/project/tdaphantom/):
47
+
48
+ ```bash
49
+ pip install tdaphantom
50
+ ```
51
+ Or you can clone this repository and install it manually:
52
+
53
+ ```bash
54
+ python setup.py install
55
+ ```
56
+
57
+ ## Overview
58
+
59
+ This tool can build a Vietoris-Rips complex from either a point cloud or distance matrix.
60
+
61
+ It can then be used to visualise the persistence diagram for that complex, and run various hypothesis tests for it.
62
+
63
+ The results of these hypothesis tests can be analysed via a return results array, or visualised in a signifiance persistence diagram.
64
+
65
+ ## Example Usage
66
+
67
+ ### Generate data
68
+
69
+ ```python
70
+ def _make_circle(n=2000, noise=0.03, seed=1):
71
+ rng = np.random.default_rng(seed)
72
+ theta = rng.uniform(0, 2 * np.pi, n)
73
+ pts = np.stack([np.cos(theta), np.sin(theta)], axis=1)
74
+ return pts + rng.normal(0, noise, pts.shape)
75
+
76
+ pc_circle = _make_circle()
77
+ ```
78
+
79
+ ### Init Phantom class
80
+
81
+ ```python
82
+ phantom_circle = Phantom(pc_circle)
83
+ ```
84
+ ### Calculate persistence diagram
85
+ Here we go up to homological dimension 1
86
+
87
+ ```python
88
+ phantom_circle.calculate_dgms_from_point_cloud_ripser(k=1)
89
+ ```
90
+
91
+
92
+ ### Display persistence diagram
93
+
94
+ ```python
95
+ phantom_circle.display_dgms()
96
+ ```
97
+
98
+ ![Persistence diagram for a circle using phatom](./README_SRC/circle_dgms.png)
99
+
100
+ ### Run hypothesis test
101
+ ```python
102
+ alpha = 0.01
103
+ correction = None
104
+ methods = ["universal_null"]
105
+
106
+ phantom_circle.hypothesis_test(alpha, correction_method=correction, methods=methods, k=1)
107
+ ```
108
+
109
+ ### Display signifiance persistence diagram
110
+
111
+ ```python
112
+ phantom_circle.display_results()
113
+ ```
114
+ ![signifiance Persistence diagram for a circle using phatom](./README_SRC/circle_sig.png)
115
+
116
+
117
+ ## Basic useage
118
+
119
+
120
+ ## Avaliable methods
121
+
122
+ ## Universal null median
123
+ ### Useage
124
+ ### Theory
125
+
126
+ ## Universal null mean
127
+ ### Useage
128
+ ### Theory
129
+
130
+ ## Bottleneck subsampling
131
+ ### Useage
132
+ ### Theory
133
+
134
+ ## TODO
135
+
136
+ * Add bottleneck shells
137
+ * Add bottleneck density
138
+ * Add bottleneck concentration
139
+ * Add more integeration tests
140
+ * Add more unit tests
@@ -0,0 +1,113 @@
1
+ # TDA-PHANTOM
2
+ Topological data analysis -
3
+ Persistent Homology Analysis via Null Testing On Manifolds (TDA-PHANTOM)
4
+ is a tool for statistically analysing significance of persistence diagrams and barcodes.
5
+
6
+ This project implements hypothesis tests from:
7
+
8
+ - *Confidence Sets for Persistence Diagrams*
9
+ Fasy et al. (2014)
10
+ DOI: https://doi.org/10.1214/14-AOS1252
11
+
12
+ - *A Universal Null-Distribution for Topological Data Analysis*
13
+ Bobrowski and Skraba (2023)
14
+ DOI: https://doi.org/10.1038/s41598-023-37842-2
15
+
16
+
17
+ ## Installation
18
+
19
+ Via [PyPI](https://pypi.org/project/tdaphantom/):
20
+
21
+ ```bash
22
+ pip install tdaphantom
23
+ ```
24
+ Or you can clone this repository and install it manually:
25
+
26
+ ```bash
27
+ python setup.py install
28
+ ```
29
+
30
+ ## Overview
31
+
32
+ This tool can build a Vietoris-Rips complex from either a point cloud or distance matrix.
33
+
34
+ It can then be used to visualise the persistence diagram for that complex, and run various hypothesis tests for it.
35
+
36
+ The results of these hypothesis tests can be analysed via a return results array, or visualised in a signifiance persistence diagram.
37
+
38
+ ## Example Usage
39
+
40
+ ### Generate data
41
+
42
+ ```python
43
+ def _make_circle(n=2000, noise=0.03, seed=1):
44
+ rng = np.random.default_rng(seed)
45
+ theta = rng.uniform(0, 2 * np.pi, n)
46
+ pts = np.stack([np.cos(theta), np.sin(theta)], axis=1)
47
+ return pts + rng.normal(0, noise, pts.shape)
48
+
49
+ pc_circle = _make_circle()
50
+ ```
51
+
52
+ ### Init Phantom class
53
+
54
+ ```python
55
+ phantom_circle = Phantom(pc_circle)
56
+ ```
57
+ ### Calculate persistence diagram
58
+ Here we go up to homological dimension 1
59
+
60
+ ```python
61
+ phantom_circle.calculate_dgms_from_point_cloud_ripser(k=1)
62
+ ```
63
+
64
+
65
+ ### Display persistence diagram
66
+
67
+ ```python
68
+ phantom_circle.display_dgms()
69
+ ```
70
+
71
+ ![Persistence diagram for a circle using phatom](./README_SRC/circle_dgms.png)
72
+
73
+ ### Run hypothesis test
74
+ ```python
75
+ alpha = 0.01
76
+ correction = None
77
+ methods = ["universal_null"]
78
+
79
+ phantom_circle.hypothesis_test(alpha, correction_method=correction, methods=methods, k=1)
80
+ ```
81
+
82
+ ### Display signifiance persistence diagram
83
+
84
+ ```python
85
+ phantom_circle.display_results()
86
+ ```
87
+ ![signifiance Persistence diagram for a circle using phatom](./README_SRC/circle_sig.png)
88
+
89
+
90
+ ## Basic useage
91
+
92
+
93
+ ## Avaliable methods
94
+
95
+ ## Universal null median
96
+ ### Useage
97
+ ### Theory
98
+
99
+ ## Universal null mean
100
+ ### Useage
101
+ ### Theory
102
+
103
+ ## Bottleneck subsampling
104
+ ### Useage
105
+ ### Theory
106
+
107
+ ## TODO
108
+
109
+ * Add bottleneck shells
110
+ * Add bottleneck density
111
+ * Add bottleneck concentration
112
+ * Add more integeration tests
113
+ * Add more unit tests
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,29 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ with open("README.md", "r", encoding="utf-8") as f:
4
+ long_description = f.read()
5
+
6
+ setup(
7
+ name="tdaphantom",
8
+ version="1.0.0",
9
+ description="Statistical hypothesis testing for persistence diagrams and barcodes",
10
+ long_description=long_description,
11
+ long_description_content_type="text/markdown",
12
+ author="W. Moriarty",
13
+ license="MIT",
14
+ packages=find_packages(),
15
+ include_package_data=True,
16
+ python_requires=">=3.10",
17
+ install_requires=[
18
+ "numpy>=1.26",
19
+ "matplotlib>=3.7",
20
+ "gudhi>=3.11.0",
21
+ "ripser>=0.6.14",
22
+ ],
23
+ classifiers=[
24
+ "Programming Language :: Python :: 3",
25
+ "License :: OSI Approved :: MIT License",
26
+ "Operating System :: OS Independent",
27
+ "Topic :: Scientific/Engineering :: Mathematics",
28
+ ],
29
+ )
@@ -0,0 +1,2 @@
1
+
2
+ from .tdaphantom import Phantom
@@ -0,0 +1,190 @@
1
+ import numpy as np
2
+ from typing import List, Optional
3
+ import math
4
+ import random
5
+ import gudhi
6
+ from scipy.spatial.distance import cdist
7
+ from tdaphantom.metrics.metrics import w_infinity, hausdorff_dist_matrix, hausdorff
8
+
9
+
10
+ class BNTest:
11
+ def __init__(
12
+ self,
13
+ point_cloud: np.ndarray,
14
+ is_distance_matrix: bool = False,
15
+ dgm: np.ndarray = None,
16
+ k: int = 1,
17
+ alpha: float = 0.05,
18
+ complex: str = "VR",
19
+ max_depth: int = 50, # low and slow
20
+ method: str = "bottleneck:subsample"
21
+ ):
22
+ """
23
+ The bottleneck hypothesis testing from
24
+ 'confidence sets for persistence diagrams'
25
+ by Fasy et al
26
+ """
27
+ self.method = method
28
+ self.method_calls = {
29
+ "bottleneck:subsample": self.subsample,
30
+ "bottleneck:concentration": self.concentration,
31
+ "bottleneck:shells": self.shells,
32
+ "bottleneck:density": self.density
33
+ }
34
+ self.pc = point_cloud
35
+ self.dgm = dgm
36
+ self.is_distance_matrix = is_distance_matrix
37
+ self.k = k
38
+ self.complex = complex # currently only VR is supported
39
+ self.max_depth = max_depth
40
+ self.alpha = alpha
41
+
42
+ def _subsampling_method_via_persistence(self, subsample_percentage: float = 0.3) -> float:
43
+ """
44
+ DEPRECIATED - DO NOT USE
45
+ E[W_infnity(hat(P),P)] != E[W_infnity(hat(P),subsample_hat(P)]
46
+ """
47
+
48
+ n = len(self.dgm)
49
+ b = int(0.4*n)
50
+ try:
51
+ N = min(int(subsample_percentage * math.comb(n, b)), self.max_depth)
52
+ except OverflowError:
53
+ N = self.max_depth
54
+
55
+ T_j_array = np.zeros(N)
56
+ for i in range(N):
57
+ idx = np.random.choice(n, size=b, replace=False)
58
+ subsample = self.dgm[idx]
59
+ T_j_array[i] = self.w_infinity(subsample, self.dgm)
60
+
61
+ def _subsampling_method(self, subsample_percentage: float = 0.8) -> np.ndarray:
62
+ """
63
+ Fasy et al. 4.1 subsampling
64
+ b = subsample size = O(n / log(n))
65
+ N = number of subsamples (theory uses n choose b, but we will use a subset)
66
+ A bar with persistence > C_b is significant at level alpha.
67
+ By the bottleneck stability theorem, W_inf(PH(S_n), PH(P)) <= C_b
68
+ with probability >= 1 - alpha.
69
+
70
+ From the paper:
71
+ P(H(S_n, M) > C_n) <= alpha + O((b/n)^(1/4))
72
+
73
+ The bias term O((b/n)^(1/4)) -> 0 as b/n -> 0, so theory requires b << n.
74
+ The paper uses b = O(n / log(n)) for the theoretical guarantee.
75
+ In practice, larger b gives smaller c_n and more power but looser theory guarantees.
76
+ """
77
+ n = len(self.pc)
78
+ b = min(int(3.5*(n / np.log(n))), int(0.8*n))
79
+ try:
80
+ N = min(int(subsample_percentage * math.comb(n, b)), self.max_depth)
81
+ except OverflowError:
82
+ N = self.max_depth
83
+ all_idx = np.arange(n)
84
+
85
+ if not self.is_distance_matrix:
86
+ D = cdist(self.pc, self.pc)
87
+ else:
88
+ D = self.pc
89
+
90
+ T_j_array = np.zeros(N)
91
+ for i in range(N):
92
+ idx = np.random.choice(n, size=b, replace=False)
93
+ # h(S_n, S_b*) = max_{i in S_n} min_{j in S_b*} D[i,j]
94
+ T_j_array[i] = float(D[:, idx].min(axis=1).max())
95
+
96
+ # bias_order = (b/n)**(0.25)
97
+
98
+ return T_j_array
99
+
100
+ def subsample(self):
101
+ """
102
+ Calls subsampling method to calculate c_n and p_values
103
+ """
104
+ births = self.dgm[:, 0]
105
+ deaths = self.dgm[:, 1]
106
+ pers = deaths - births
107
+ T_j_array = self._subsampling_method()
108
+ c_n = float(np.quantile(T_j_array, 1.0 - self.alpha))
109
+ p_values = np.array([
110
+ float(np.mean(T_j_array >= p / 2)) for p in pers
111
+ ])
112
+ return c_n, p_values
113
+
114
+ def concentration_of_measure_method(self):
115
+ """
116
+ Fasy et al. 4.2 concentration of measure
117
+
118
+ From the paper:
119
+ P(H(S_n, M) > \hat(t_n)) <= alpha + O((log(n)/n)^(1/(2+d)))
120
+ """
121
+
122
+ ...
123
+
124
+ def concentration(self):
125
+ """
126
+ Calls concentration method to calculate c_n
127
+ """
128
+ c_n = self.concentration_of_measure_method()
129
+ return c_n, None
130
+
131
+ def shells_method(self):
132
+ """
133
+ Fasy et al. 4.3 method of shells
134
+
135
+ From the paper:
136
+ P(H(S_{2,n}, M) > \hat(t_{1,n}) <= alpha + O(r_n)
137
+ """
138
+ ...
139
+
140
+ def shells(self):
141
+ """
142
+ Calls shells method to calculate c_n
143
+ """
144
+ c_n = self.shells_method()
145
+ return c_n, None
146
+
147
+ def denisty_method(self):
148
+ """
149
+ Fasy et al. 4.4 Density estimation
150
+
151
+ From the paper:
152
+ P(||\hat{p}_h - p_h||_infinity > Z_alpha / sqrt(nh^D) ) <= alpha + O(log(n)/nh^D)^((4+D)/(4+2D))
153
+ """
154
+ ...
155
+
156
+ def denisty(self):
157
+ """
158
+ Calls shells method to calculate c_n
159
+ """
160
+ c_n = self.density_method()
161
+ return c_n, None
162
+
163
+ def results(self) -> dict:
164
+ """
165
+ Returns a structured array with one row per bar.
166
+ Cols: birth, death, pers, p_value, significant
167
+
168
+ and the threshold c_n
169
+ """
170
+ c_n = -np.inf
171
+ p_values = None
172
+ if self.method in self.method_calls.keys():
173
+ c_n, p_values = self.method_calls[self.method]()
174
+
175
+ births = self.dgm[:, 0]
176
+ deaths = self.dgm[:, 1]
177
+ pers = deaths - births
178
+
179
+ rejected = pers > 2*c_n
180
+
181
+ return {
182
+ "results_array": np.column_stack([
183
+ births,
184
+ deaths,
185
+ pers,
186
+ p_values,
187
+ rejected.astype(float),
188
+ ]),
189
+ "threshold": 2*c_n # used for diagram
190
+ }