tigramite-fast 5.2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tigramite/__init__.py +0 -0
- tigramite/causal_effects.py +1525 -0
- tigramite/causal_mediation.py +1592 -0
- tigramite/data_processing.py +1574 -0
- tigramite/graphs.py +1509 -0
- tigramite/independence_tests/LBFGS.py +1114 -0
- tigramite/independence_tests/__init__.py +0 -0
- tigramite/independence_tests/cmiknn.py +661 -0
- tigramite/independence_tests/cmiknn_mixed.py +1397 -0
- tigramite/independence_tests/cmisymb.py +286 -0
- tigramite/independence_tests/gpdc.py +664 -0
- tigramite/independence_tests/gpdc_torch.py +820 -0
- tigramite/independence_tests/gsquared.py +190 -0
- tigramite/independence_tests/independence_tests_base.py +1310 -0
- tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
- tigramite/independence_tests/pairwise_CI.py +383 -0
- tigramite/independence_tests/parcorr.py +369 -0
- tigramite/independence_tests/parcorr_mult.py +485 -0
- tigramite/independence_tests/parcorr_wls.py +451 -0
- tigramite/independence_tests/regressionCI.py +403 -0
- tigramite/independence_tests/robust_parcorr.py +403 -0
- tigramite/jpcmciplus.py +966 -0
- tigramite/lpcmci.py +3649 -0
- tigramite/models.py +2257 -0
- tigramite/pcmci.py +3935 -0
- tigramite/pcmci_base.py +1218 -0
- tigramite/plotting.py +4735 -0
- tigramite/rpcmci.py +467 -0
- tigramite/toymodels/__init__.py +0 -0
- tigramite/toymodels/context_model.py +261 -0
- tigramite/toymodels/non_additive.py +1231 -0
- tigramite/toymodels/structural_causal_processes.py +1201 -0
- tigramite/toymodels/surrogate_generator.py +319 -0
- tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
- tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
- tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
- tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
- tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Tigramite causal discovery for time series."""
|
|
2
|
+
|
|
3
|
+
# Author: Sagar Nagaraj Simha, Jakob Runge <jakob@jakob-runge.com>
|
|
4
|
+
#
|
|
5
|
+
# License: GNU General Public License v3.0
|
|
6
|
+
|
|
7
|
+
from __future__ import print_function
|
|
8
|
+
from scipy import special, spatial
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from scipy.stats import chi2
|
|
12
|
+
from scipy.special import xlogy
|
|
13
|
+
from scipy.stats.contingency import crosstab
|
|
14
|
+
from scipy.stats.contingency import expected_freq
|
|
15
|
+
from scipy.stats.contingency import margins
|
|
16
|
+
from .independence_tests_base import CondIndTest
|
|
17
|
+
|
|
18
|
+
class Gsquared(CondIndTest):
|
|
19
|
+
r"""G-squared conditional independence test for categorical data.
|
|
20
|
+
|
|
21
|
+
Uses Chi2 as the null distribution and the method from [7]_ to
|
|
22
|
+
adjust the degrees of freedom. Valid only asymptotically, recommended are
|
|
23
|
+
above 1000-2000 samples (depends on data). For smaller sample sizes use the
|
|
24
|
+
CMIsymb class which includes a local permutation test.
|
|
25
|
+
|
|
26
|
+
Assumes one-dimensional X, Y. But can be combined with PairwiseMultCI to
|
|
27
|
+
obtain a test for multivariate X, Y.
|
|
28
|
+
|
|
29
|
+
This method requires the scipy.stats package.
|
|
30
|
+
|
|
31
|
+
Notes
|
|
32
|
+
-----
|
|
33
|
+
The general formula is
|
|
34
|
+
|
|
35
|
+
.. math:: G(X;Y|Z) &= 2 n \sum p(z) \sum \sum p(x,y|z) \log
|
|
36
|
+
\frac{ p(x,y |z)}{p(x|z)\cdot p(y |z)}
|
|
37
|
+
|
|
38
|
+
where :math:`n` is the sample size. This is simply :math:`2 n CMI(X;Y|Z)`.
|
|
39
|
+
|
|
40
|
+
References
|
|
41
|
+
----------
|
|
42
|
+
|
|
43
|
+
.. [7] Bishop, Y.M.M., Fienberg, S.E. and Holland, P.W. (1975) Discrete
|
|
44
|
+
Multivariate Analysis: Theory and Practice. MIT Press, Cambridge.
|
|
45
|
+
|
|
46
|
+
Parameters
|
|
47
|
+
----------
|
|
48
|
+
n_symbs : int, optional (default: None)
|
|
49
|
+
Number of symbols in input data. Should be at least as large as the
|
|
50
|
+
maximum array entry + 1. If None, n_symbs is inferred by scipy's crosstab
|
|
51
|
+
|
|
52
|
+
**kwargs :
|
|
53
|
+
Arguments passed on to parent class CondIndTest.
|
|
54
|
+
"""
|
|
55
|
+
@property
|
|
56
|
+
def measure(self):
|
|
57
|
+
"""
|
|
58
|
+
Concrete property to return the measure of the independence test
|
|
59
|
+
"""
|
|
60
|
+
return self._measure
|
|
61
|
+
|
|
62
|
+
def __init__(self,
|
|
63
|
+
n_symbs=None,
|
|
64
|
+
**kwargs):
|
|
65
|
+
|
|
66
|
+
# Setup the member variables
|
|
67
|
+
self._measure = 'gsquared'
|
|
68
|
+
self.n_symbs = n_symbs
|
|
69
|
+
self.two_sided = False
|
|
70
|
+
self.residual_based = False
|
|
71
|
+
self.recycle_residuals = False
|
|
72
|
+
CondIndTest.__init__(self, **kwargs)
|
|
73
|
+
|
|
74
|
+
if self.verbosity > 0:
|
|
75
|
+
print("n_symbs = %s" % self.n_symbs)
|
|
76
|
+
print("")
|
|
77
|
+
|
|
78
|
+
def get_dependence_measure(self, array, xyz, data_type=None):
|
|
79
|
+
"""Returns Gsquared/G-test test statistic.
|
|
80
|
+
|
|
81
|
+
Parameters
|
|
82
|
+
----------
|
|
83
|
+
array : array-like
|
|
84
|
+
data array with X, Y, Z in rows and observations in columns.
|
|
85
|
+
|
|
86
|
+
xyz : array of ints
|
|
87
|
+
XYZ identifier array of shape (dim,).
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
val : float
|
|
92
|
+
G-squared estimate.
|
|
93
|
+
"""
|
|
94
|
+
_, T = array.shape
|
|
95
|
+
z_indices = np.where(xyz == 2)[0]
|
|
96
|
+
|
|
97
|
+
# Flip 2D-array so that order is ([zn...z0, ym...y0, xk...x0], T). The
|
|
98
|
+
# contingency table is built in this order to ease creating subspaces
|
|
99
|
+
# of Z=z.
|
|
100
|
+
array_flip = np.flipud(array)
|
|
101
|
+
|
|
102
|
+
# When n_symbs is given, levels=range(0, n_symbs). If data does not
|
|
103
|
+
# have a symbol in levels, then count=0 in the corresponding N-D
|
|
104
|
+
# position of contingency table. If levels does not contain a certain
|
|
105
|
+
# symbol that is present in the data, then the symbol from data is
|
|
106
|
+
# ignored. If None, then levels are inferred from data (default).
|
|
107
|
+
|
|
108
|
+
if self.n_symbs is None:
|
|
109
|
+
levels = None
|
|
110
|
+
else:
|
|
111
|
+
levels = np.tile(np.arange(self.n_symbs), (len(xyz), 1))
|
|
112
|
+
# Assuming same list of levels for (z, y, x).
|
|
113
|
+
|
|
114
|
+
_, observed = crosstab(*(np.asarray(np.split(array_flip, len(xyz), axis=0)).reshape((-1, T))), levels=levels,
|
|
115
|
+
sparse=False)
|
|
116
|
+
|
|
117
|
+
observed_shape = observed.shape
|
|
118
|
+
|
|
119
|
+
gsquare = 0.0
|
|
120
|
+
dof = 0
|
|
121
|
+
|
|
122
|
+
# The following loop is over the z-subspace to sum over the G-squared
|
|
123
|
+
# statistic and count empty entries to adjust the degrees of freedom.
|
|
124
|
+
|
|
125
|
+
# TODO: Can be further optimized to operate entirely on observed array
|
|
126
|
+
# without 'for', to operate only within slice of z. sparse=True can
|
|
127
|
+
# also optimize further.
|
|
128
|
+
|
|
129
|
+
# For each permutation of z = (zn ... z1, z0). Example - (0...1,0,1)
|
|
130
|
+
for zs in np.ndindex(observed_shape[:len(z_indices)]):
|
|
131
|
+
observedYX = observed[zs]
|
|
132
|
+
mY, mX = margins(observedYX)
|
|
133
|
+
|
|
134
|
+
if(np.sum(mY)!=0):
|
|
135
|
+
expectedYX = expected_freq(observedYX)
|
|
136
|
+
gsquare += 2 * np.sum(xlogy(observedYX, observedYX)
|
|
137
|
+
- xlogy(observedYX, expectedYX))
|
|
138
|
+
|
|
139
|
+
# Check how many rows and columns are all-zeros. i.e. how may
|
|
140
|
+
# marginals are zero in expected-frq
|
|
141
|
+
nzero_rows = np.sum(~expectedYX.any(axis=1))
|
|
142
|
+
nzero_cols = np.sum(~expectedYX.any(axis=0))
|
|
143
|
+
|
|
144
|
+
# Compute dof. Reduce by 1 dof for every marginal row & column=
|
|
145
|
+
# 0 and add to global degrees of freedom [adapted from
|
|
146
|
+
# Bishop, 1975].
|
|
147
|
+
cardYX = observedYX.shape
|
|
148
|
+
dof += ((cardYX[0] - 1 - nzero_rows) * (cardYX[1] - 1 - nzero_cols))
|
|
149
|
+
|
|
150
|
+
# dof cannot be lesser than 1
|
|
151
|
+
dof = max(dof, 1)
|
|
152
|
+
self._temp_dof = dof
|
|
153
|
+
return gsquare
|
|
154
|
+
|
|
155
|
+
def get_analytic_significance(self, value, T, dim, xyz):
|
|
156
|
+
"""Return the p_value of test statistic value 'value', according to a
|
|
157
|
+
chi-square distribution with 'dof' degrees of freedom."""
|
|
158
|
+
|
|
159
|
+
# Calculate the p_value
|
|
160
|
+
p_value = chi2.sf(value, self._temp_dof)
|
|
161
|
+
del self._temp_dof
|
|
162
|
+
|
|
163
|
+
return p_value
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
if __name__ == '__main__':
|
|
167
|
+
|
|
168
|
+
import tigramite
|
|
169
|
+
from tigramite.data_processing import DataFrame
|
|
170
|
+
import tigramite.data_processing as pp
|
|
171
|
+
import numpy as np
|
|
172
|
+
|
|
173
|
+
seed=42
|
|
174
|
+
random_state = np.random.default_rng(seed=seed)
|
|
175
|
+
cmi = Gsquared()
|
|
176
|
+
|
|
177
|
+
T = 1000
|
|
178
|
+
dimz = 3
|
|
179
|
+
z = random_state.binomial(n=1, p=0.5, size=(T, dimz)).reshape(T, dimz)
|
|
180
|
+
x = np.empty(T).reshape(T, 1)
|
|
181
|
+
y = np.empty(T).reshape(T, 1)
|
|
182
|
+
for t in range(T):
|
|
183
|
+
val = z[t, 0].squeeze()
|
|
184
|
+
prob = 0.2+val*0.6
|
|
185
|
+
x[t] = random_state.choice([0,1], p=[prob, 1.-prob])
|
|
186
|
+
y[t] = random_state.choice([0,1, 2], p=[prob, (1.-prob)/2., (1.-prob)/2.])
|
|
187
|
+
|
|
188
|
+
print('start')
|
|
189
|
+
print(cmi.run_test_raw(x, y, z=None))
|
|
190
|
+
print(cmi.run_test_raw(x, y, z=z))
|