tigramite-fast 5.2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tigramite/__init__.py +0 -0
- tigramite/causal_effects.py +1525 -0
- tigramite/causal_mediation.py +1592 -0
- tigramite/data_processing.py +1574 -0
- tigramite/graphs.py +1509 -0
- tigramite/independence_tests/LBFGS.py +1114 -0
- tigramite/independence_tests/__init__.py +0 -0
- tigramite/independence_tests/cmiknn.py +661 -0
- tigramite/independence_tests/cmiknn_mixed.py +1397 -0
- tigramite/independence_tests/cmisymb.py +286 -0
- tigramite/independence_tests/gpdc.py +664 -0
- tigramite/independence_tests/gpdc_torch.py +820 -0
- tigramite/independence_tests/gsquared.py +190 -0
- tigramite/independence_tests/independence_tests_base.py +1310 -0
- tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
- tigramite/independence_tests/pairwise_CI.py +383 -0
- tigramite/independence_tests/parcorr.py +369 -0
- tigramite/independence_tests/parcorr_mult.py +485 -0
- tigramite/independence_tests/parcorr_wls.py +451 -0
- tigramite/independence_tests/regressionCI.py +403 -0
- tigramite/independence_tests/robust_parcorr.py +403 -0
- tigramite/jpcmciplus.py +966 -0
- tigramite/lpcmci.py +3649 -0
- tigramite/models.py +2257 -0
- tigramite/pcmci.py +3935 -0
- tigramite/pcmci_base.py +1218 -0
- tigramite/plotting.py +4735 -0
- tigramite/rpcmci.py +467 -0
- tigramite/toymodels/__init__.py +0 -0
- tigramite/toymodels/context_model.py +261 -0
- tigramite/toymodels/non_additive.py +1231 -0
- tigramite/toymodels/structural_causal_processes.py +1201 -0
- tigramite/toymodels/surrogate_generator.py +319 -0
- tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
- tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
- tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
- tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
- tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
"""Tigramite causal discovery for time series."""
|
|
2
|
+
|
|
3
|
+
# Author: Jakob Runge <jakob@jakob-runge.com>
|
|
4
|
+
#
|
|
5
|
+
# License: GNU General Public License v3.0
|
|
6
|
+
|
|
7
|
+
from __future__ import print_function
|
|
8
|
+
from scipy import stats
|
|
9
|
+
import numpy as np
|
|
10
|
+
import sys
|
|
11
|
+
import warnings
|
|
12
|
+
|
|
13
|
+
from .independence_tests_base import CondIndTest
|
|
14
|
+
|
|
15
|
+
class RobustParCorr(CondIndTest):
|
|
16
|
+
r"""Robust partial correlation test based on non-paranormal models.
|
|
17
|
+
|
|
18
|
+
Partial correlation is estimated through transformation to standard
|
|
19
|
+
normal marginals, ordinary least squares (OLS) regression, and a test for
|
|
20
|
+
non-zero linear Pearson correlation on the residuals.
|
|
21
|
+
|
|
22
|
+
Assumes one-dimensional X, Y. But can be combined with PairwiseMultCI to
|
|
23
|
+
obtain a test for multivariate X, Y.
|
|
24
|
+
|
|
25
|
+
Notes
|
|
26
|
+
-----
|
|
27
|
+
To test :math:`X \perp Y | Z`, firstly, each marginal is transformed to be
|
|
28
|
+
standard normally distributed. For that, the transform
|
|
29
|
+
:math:`\Phi^{-1}\circ\hat{F}` is used. Here, :math:`\Phi^{-1}` is the
|
|
30
|
+
quantile function of a standard normal distribution and
|
|
31
|
+
:math:`\hat{F}` is the empirical distribution function for the respective
|
|
32
|
+
marginal.
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
This idea stems from the literature on nonparanormal models, see:
|
|
36
|
+
|
|
37
|
+
- Han Liu, John Lafferty, and Larry Wasserman. The nonparanormal:
|
|
38
|
+
semiparametric estimation of high dimensional undirected graphs. J.
|
|
39
|
+
Mach. Learn. Res., 10:2295–2328, 2009.
|
|
40
|
+
|
|
41
|
+
- Han Liu, Fang Han, Ming Yuan, John Lafferty, and Larry Wasserman.
|
|
42
|
+
High-dimensional semiparametric Gaussian copula graphical models. Ann.
|
|
43
|
+
Statist., 40(4):2293–2326, 2012a.
|
|
44
|
+
|
|
45
|
+
- Naftali Harris, Mathias Drton. PC Algorithm for Nonparanormal Graphical
|
|
46
|
+
Models. Journal of Machine Learning Research, 14: 3365-3383, 2013.
|
|
47
|
+
|
|
48
|
+
Afterwards (where Z, X, and Y are now assumed to be transformed to the
|
|
49
|
+
standard normal scale):
|
|
50
|
+
|
|
51
|
+
:math:`Z` is regressed out from
|
|
52
|
+
:math:`X` and :math:`Y` assuming the model
|
|
53
|
+
|
|
54
|
+
.. math:: X & = Z \beta_X + \epsilon_{X} \\
|
|
55
|
+
Y & = Z \beta_Y + \epsilon_{Y}
|
|
56
|
+
|
|
57
|
+
using OLS regression. Then the dependency of the residuals is tested with
|
|
58
|
+
the Pearson correlation test.
|
|
59
|
+
|
|
60
|
+
.. math:: \rho\left(r_X, r_Y\right)
|
|
61
|
+
|
|
62
|
+
For the ``significance='analytic'`` Student's-*t* distribution with
|
|
63
|
+
:math:`T-D_Z-2` degrees of freedom is implemented.
|
|
64
|
+
|
|
65
|
+
Parameters
|
|
66
|
+
----------
|
|
67
|
+
**kwargs :
|
|
68
|
+
Arguments passed on to Parent class CondIndTest.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def measure(self):
|
|
73
|
+
"""
|
|
74
|
+
Concrete property to return the measure of the independence test
|
|
75
|
+
"""
|
|
76
|
+
return self._measure
|
|
77
|
+
|
|
78
|
+
def __init__(self, **kwargs):
|
|
79
|
+
self._measure = 'robust_par_corr'
|
|
80
|
+
self.two_sided = True
|
|
81
|
+
self.residual_based = True
|
|
82
|
+
|
|
83
|
+
CondIndTest.__init__(self, **kwargs)
|
|
84
|
+
|
|
85
|
+
def trafo2normal(self, x, thres=0.00001):
|
|
86
|
+
"""Transforms input array to standard normal marginals.
|
|
87
|
+
|
|
88
|
+
For that, the code first transforms to uniform :math:`[0,1]` marginals
|
|
89
|
+
using the empirical distribution function, and then transforms to
|
|
90
|
+
normal marginals by applying the quantile function of a standard
|
|
91
|
+
normal. Assumes x.shape = (dim, T)
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
x : array-like
|
|
96
|
+
Input array.
|
|
97
|
+
|
|
98
|
+
thres : float
|
|
99
|
+
Small number between 0 and 1; after transformation to the uniform
|
|
100
|
+
scale, all values that are too close to zero are replaced by thres,
|
|
101
|
+
similarly, all values that are too close to one, are replaced by
|
|
102
|
+
1-thres. This avoids NaNs.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
normal : array-like
|
|
107
|
+
array with normal marginals.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
def trafo(xi):
|
|
111
|
+
xisorted = np.sort(xi)
|
|
112
|
+
yi = np.linspace(1. / len(xi), 1, len(xi))
|
|
113
|
+
return np.interp(xi, xisorted, yi)
|
|
114
|
+
|
|
115
|
+
if np.ndim(x) == 1:
|
|
116
|
+
u = trafo(x)
|
|
117
|
+
u[u==0.] = thres
|
|
118
|
+
u[u==1.] = 1. - thres
|
|
119
|
+
normal = stats.norm.ppf(u)
|
|
120
|
+
else:
|
|
121
|
+
normal = np.empty(x.shape)
|
|
122
|
+
for i in range(x.shape[0]):
|
|
123
|
+
uniform = trafo(x[i])
|
|
124
|
+
|
|
125
|
+
uniform[uniform==0.] = thres
|
|
126
|
+
uniform[uniform==1.] = 1. - thres
|
|
127
|
+
normal[i] = stats.norm.ppf(uniform)
|
|
128
|
+
|
|
129
|
+
return normal
|
|
130
|
+
|
|
131
|
+
def _get_single_residuals(self, array, target_var,
|
|
132
|
+
standardize=True,
|
|
133
|
+
return_means=False):
|
|
134
|
+
"""Returns residuals of linear multiple regression.
|
|
135
|
+
|
|
136
|
+
Performs a OLS regression of the variable indexed by target_var on the
|
|
137
|
+
conditions Z. Here array is assumed to contain X and Y as the first two
|
|
138
|
+
rows with the remaining rows (if present) containing the conditions Z.
|
|
139
|
+
Optionally returns the estimated regression line.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
array : array-like
|
|
144
|
+
data array with X, Y, Z in rows and observations in columns
|
|
145
|
+
|
|
146
|
+
target_var : {0, 1}
|
|
147
|
+
Variable to regress out conditions from.
|
|
148
|
+
|
|
149
|
+
standardize : bool, optional (default: True)
|
|
150
|
+
Whether to standardize the array beforehand. Must be used for
|
|
151
|
+
partial correlation.
|
|
152
|
+
|
|
153
|
+
return_means : bool, optional (default: False)
|
|
154
|
+
Whether to return the estimated regression line.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
resid [, mean] : array-like
|
|
159
|
+
The residual of the regression and optionally the estimated line.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
dim, T = array.shape
|
|
163
|
+
dim_z = dim - 2
|
|
164
|
+
|
|
165
|
+
# Standardize
|
|
166
|
+
if standardize:
|
|
167
|
+
array -= array.mean(axis=1).reshape(dim, 1)
|
|
168
|
+
std = array.std(axis=1)
|
|
169
|
+
for i in range(dim):
|
|
170
|
+
if std[i] != 0.:
|
|
171
|
+
array[i] /= std[i]
|
|
172
|
+
if np.any(std == 0.) and self.verbosity > 0:
|
|
173
|
+
warnings.warn("Possibly constant array!")
|
|
174
|
+
# array /= array.std(axis=1).reshape(dim, 1)
|
|
175
|
+
# if np.isnan(array).sum() != 0:
|
|
176
|
+
# raise ValueError("nans after standardizing, "
|
|
177
|
+
# "possibly constant array!")
|
|
178
|
+
|
|
179
|
+
y = array[target_var, :]
|
|
180
|
+
|
|
181
|
+
if dim_z > 0:
|
|
182
|
+
z = array[2:, :].T.copy()
|
|
183
|
+
beta_hat = np.linalg.lstsq(z, y, rcond=None)[0]
|
|
184
|
+
mean = np.dot(z, beta_hat)
|
|
185
|
+
resid = y - mean
|
|
186
|
+
else:
|
|
187
|
+
resid = y
|
|
188
|
+
mean = None
|
|
189
|
+
|
|
190
|
+
if return_means:
|
|
191
|
+
return (resid, mean)
|
|
192
|
+
return resid
|
|
193
|
+
|
|
194
|
+
def get_dependence_measure(self, array, xyz, data_type=None):
|
|
195
|
+
"""Return partial correlation.
|
|
196
|
+
|
|
197
|
+
Marginals are firstly transformed to standard normal scale. Dependence
|
|
198
|
+
Measure is then estimated as the Pearson correlation of the residuals
|
|
199
|
+
of a linear OLS regression.
|
|
200
|
+
|
|
201
|
+
Parameters
|
|
202
|
+
----------
|
|
203
|
+
array : array-like
|
|
204
|
+
data array with X, Y, Z in rows and observations in columns
|
|
205
|
+
|
|
206
|
+
xyz : array of ints
|
|
207
|
+
XYZ identifier array of shape (dim,).
|
|
208
|
+
|
|
209
|
+
Returns
|
|
210
|
+
-------
|
|
211
|
+
val : float
|
|
212
|
+
Partial correlation coefficient.
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
# Transform to normal marginals
|
|
216
|
+
array = self.trafo2normal(array)
|
|
217
|
+
|
|
218
|
+
x_vals = self._get_single_residuals(array, target_var=0)
|
|
219
|
+
y_vals = self._get_single_residuals(array, target_var=1)
|
|
220
|
+
|
|
221
|
+
val, _ = stats.pearsonr(x_vals, y_vals)
|
|
222
|
+
return val
|
|
223
|
+
|
|
224
|
+
def get_shuffle_significance(self, array, xyz, value,
|
|
225
|
+
return_null_dist=False,
|
|
226
|
+
data_type=None):
|
|
227
|
+
"""Returns p-value for shuffle significance test.
|
|
228
|
+
|
|
229
|
+
Firstly, each marginal is transformed to the standard normal scale.
|
|
230
|
+
For residual-based test statistics only the residuals are shuffled.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
array : array-like
|
|
235
|
+
data array with X, Y, Z in rows and observations in columns
|
|
236
|
+
|
|
237
|
+
xyz : array of ints
|
|
238
|
+
XYZ identifier array of shape (dim,).
|
|
239
|
+
|
|
240
|
+
value : number
|
|
241
|
+
Value of test statistic for unshuffled estimate.
|
|
242
|
+
|
|
243
|
+
Returns
|
|
244
|
+
-------
|
|
245
|
+
pval : float
|
|
246
|
+
p-value
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
# Transform to normal marginals
|
|
250
|
+
array = self.trafo2normal(array)
|
|
251
|
+
|
|
252
|
+
x_vals = self._get_single_residuals(array, target_var=0)
|
|
253
|
+
y_vals = self._get_single_residuals(array, target_var=1)
|
|
254
|
+
array_resid = np.array([x_vals, y_vals])
|
|
255
|
+
xyz_resid = np.array([0, 1])
|
|
256
|
+
|
|
257
|
+
null_dist = self._get_shuffle_dist(array_resid, xyz_resid,
|
|
258
|
+
self.get_dependence_measure,
|
|
259
|
+
sig_samples=self.sig_samples,
|
|
260
|
+
sig_blocklength=self.sig_blocklength,
|
|
261
|
+
verbosity=self.verbosity)
|
|
262
|
+
|
|
263
|
+
# pval = (null_dist >= np.abs(value)).mean()
|
|
264
|
+
pval = float(np.sum(np.abs(null_dist) >= np.abs(value)) + 1) / (self.sig_samples + 1)
|
|
265
|
+
|
|
266
|
+
# # Adjust p-value for two-sided measures
|
|
267
|
+
# if pval < 1.:
|
|
268
|
+
# pval *= 2.
|
|
269
|
+
|
|
270
|
+
if return_null_dist:
|
|
271
|
+
return pval, null_dist
|
|
272
|
+
return pval
|
|
273
|
+
|
|
274
|
+
def get_analytic_significance(self, value, T, dim, xyz):
|
|
275
|
+
"""Returns analytic p-value from Student's t-test for the Pearson
|
|
276
|
+
correlation coefficient.
|
|
277
|
+
|
|
278
|
+
Assumes two-sided correlation. If the degrees of freedom are less than
|
|
279
|
+
1, numpy.nan is returned.
|
|
280
|
+
|
|
281
|
+
Parameters
|
|
282
|
+
----------
|
|
283
|
+
value : float
|
|
284
|
+
Test statistic value.
|
|
285
|
+
|
|
286
|
+
T : int
|
|
287
|
+
Sample length
|
|
288
|
+
|
|
289
|
+
dim : int
|
|
290
|
+
Dimensionality, ie, number of features.
|
|
291
|
+
|
|
292
|
+
xyz : array of ints
|
|
293
|
+
XYZ identifier array of shape (dim,).
|
|
294
|
+
|
|
295
|
+
Returns
|
|
296
|
+
-------
|
|
297
|
+
pval : float or numpy.nan
|
|
298
|
+
P-value.
|
|
299
|
+
"""
|
|
300
|
+
# Get the number of degrees of freedom
|
|
301
|
+
deg_f = T - dim
|
|
302
|
+
|
|
303
|
+
if deg_f < 1:
|
|
304
|
+
pval = np.nan
|
|
305
|
+
elif abs(abs(value) - 1.0) <= sys.float_info.min:
|
|
306
|
+
pval = 0.0
|
|
307
|
+
else:
|
|
308
|
+
trafo_val = value * np.sqrt(deg_f/(1. - value*value))
|
|
309
|
+
# Two sided significance level
|
|
310
|
+
pval = stats.t.sf(np.abs(trafo_val), deg_f) * 2
|
|
311
|
+
|
|
312
|
+
return pval
|
|
313
|
+
|
|
314
|
+
def get_analytic_confidence(self, value, df, conf_lev):
|
|
315
|
+
"""Returns analytic confidence interval for correlation coefficient.
|
|
316
|
+
|
|
317
|
+
Based on Student's t-distribution.
|
|
318
|
+
|
|
319
|
+
Parameters
|
|
320
|
+
----------
|
|
321
|
+
value : float
|
|
322
|
+
Test statistic value.
|
|
323
|
+
|
|
324
|
+
df : int
|
|
325
|
+
degrees of freedom of the test
|
|
326
|
+
|
|
327
|
+
conf_lev : float
|
|
328
|
+
Confidence interval, eg, 0.9
|
|
329
|
+
|
|
330
|
+
Returns
|
|
331
|
+
-------
|
|
332
|
+
(conf_lower, conf_upper) : Tuple of floats
|
|
333
|
+
Upper and lower confidence bound of confidence interval.
|
|
334
|
+
"""
|
|
335
|
+
# Confidence interval is two-sided
|
|
336
|
+
c_int = (1. - (1. - conf_lev) / 2.)
|
|
337
|
+
|
|
338
|
+
value_tdist = value * np.sqrt(df) / np.sqrt(1. - value**2)
|
|
339
|
+
conf_lower = (stats.t.ppf(q=1. - c_int, df=df, loc=value_tdist)
|
|
340
|
+
/ np.sqrt(df + stats.t.ppf(q=1. - c_int, df=df,
|
|
341
|
+
loc=value_tdist)**2))
|
|
342
|
+
conf_upper = (stats.t.ppf(q=c_int, df=df, loc=value_tdist)
|
|
343
|
+
/ np.sqrt(df + stats.t.ppf(q=c_int, df=df,
|
|
344
|
+
loc=value_tdist)**2))
|
|
345
|
+
return (conf_lower, conf_upper)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def get_model_selection_criterion(self, j, parents, tau_max=0, corrected_aic=False):
|
|
349
|
+
"""Returns Akaike's Information criterion modulo constants.
|
|
350
|
+
|
|
351
|
+
First of all, each marginal is transformed to the standard normal
|
|
352
|
+
scale. For this, each marginal is transformed to the uniform scale
|
|
353
|
+
using the empirical distribution function and then, transformed to
|
|
354
|
+
the standard normal scale by applying the quantile function of a
|
|
355
|
+
standard normal. Afterwards, fits a linear model of the parents to
|
|
356
|
+
variable j and returns the score. Leave-one-out cross-validation is
|
|
357
|
+
asymptotically equivalent to AIC for ordinary linear regression
|
|
358
|
+
models. Here used to determine optimal hyperparameters in
|
|
359
|
+
PCMCI(plus), in particular the pc_alpha value.
|
|
360
|
+
|
|
361
|
+
Parameters
|
|
362
|
+
----------
|
|
363
|
+
j : int
|
|
364
|
+
Index of target variable in data array.
|
|
365
|
+
|
|
366
|
+
parents : list
|
|
367
|
+
List of form [(0, -1), (3, -2), ...] containing parents.
|
|
368
|
+
|
|
369
|
+
tau_max : int, optional (default: 0)
|
|
370
|
+
Maximum time lag. This may be used to make sure that estimates for
|
|
371
|
+
different lags in X, Z, all have the same sample size.
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
score : float
|
|
375
|
+
Model score.
|
|
376
|
+
"""
|
|
377
|
+
|
|
378
|
+
Y = [(j, 0)]
|
|
379
|
+
X = [(j, 0)] # dummy variable here
|
|
380
|
+
Z = parents
|
|
381
|
+
array, xyz, _ = self.dataframe.construct_array(X=X, Y=Y, Z=Z,
|
|
382
|
+
tau_max=tau_max,
|
|
383
|
+
mask_type=self.mask_type,
|
|
384
|
+
return_cleaned_xyz=False,
|
|
385
|
+
do_checks=True,
|
|
386
|
+
verbosity=self.verbosity)
|
|
387
|
+
|
|
388
|
+
dim, T = array.shape
|
|
389
|
+
|
|
390
|
+
# Transform to normal marginals
|
|
391
|
+
array = self.trafo2normal(array)
|
|
392
|
+
|
|
393
|
+
y = self._get_single_residuals(array, target_var=1, return_means=False)
|
|
394
|
+
# Get RSS
|
|
395
|
+
rss = (y**2).sum()
|
|
396
|
+
# Number of parameters
|
|
397
|
+
p = dim - 1
|
|
398
|
+
# Get AIC
|
|
399
|
+
if corrected_aic:
|
|
400
|
+
score = T * np.log(rss) + 2. * p + (2.*p**2 + 2.*p)/(T - p - 1)
|
|
401
|
+
else:
|
|
402
|
+
score = T * np.log(rss) + 2. * p
|
|
403
|
+
return score
|