tigramite-fast 5.2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tigramite/__init__.py +0 -0
- tigramite/causal_effects.py +1525 -0
- tigramite/causal_mediation.py +1592 -0
- tigramite/data_processing.py +1574 -0
- tigramite/graphs.py +1509 -0
- tigramite/independence_tests/LBFGS.py +1114 -0
- tigramite/independence_tests/__init__.py +0 -0
- tigramite/independence_tests/cmiknn.py +661 -0
- tigramite/independence_tests/cmiknn_mixed.py +1397 -0
- tigramite/independence_tests/cmisymb.py +286 -0
- tigramite/independence_tests/gpdc.py +664 -0
- tigramite/independence_tests/gpdc_torch.py +820 -0
- tigramite/independence_tests/gsquared.py +190 -0
- tigramite/independence_tests/independence_tests_base.py +1310 -0
- tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
- tigramite/independence_tests/pairwise_CI.py +383 -0
- tigramite/independence_tests/parcorr.py +369 -0
- tigramite/independence_tests/parcorr_mult.py +485 -0
- tigramite/independence_tests/parcorr_wls.py +451 -0
- tigramite/independence_tests/regressionCI.py +403 -0
- tigramite/independence_tests/robust_parcorr.py +403 -0
- tigramite/jpcmciplus.py +966 -0
- tigramite/lpcmci.py +3649 -0
- tigramite/models.py +2257 -0
- tigramite/pcmci.py +3935 -0
- tigramite/pcmci_base.py +1218 -0
- tigramite/plotting.py +4735 -0
- tigramite/rpcmci.py +467 -0
- tigramite/toymodels/__init__.py +0 -0
- tigramite/toymodels/context_model.py +261 -0
- tigramite/toymodels/non_additive.py +1231 -0
- tigramite/toymodels/structural_causal_processes.py +1201 -0
- tigramite/toymodels/surrogate_generator.py +319 -0
- tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
- tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
- tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
- tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
- tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
"""Tigramite causal discovery for time series."""
|
|
2
|
+
|
|
3
|
+
# Author: Jakob Runge <jakob@jakob-runge.com>
|
|
4
|
+
#
|
|
5
|
+
# License: GNU General Public License v3.0
|
|
6
|
+
|
|
7
|
+
from __future__ import print_function
|
|
8
|
+
from scipy import stats
|
|
9
|
+
import numpy as np
|
|
10
|
+
import sys
|
|
11
|
+
import warnings
|
|
12
|
+
|
|
13
|
+
from .independence_tests_base import CondIndTest
|
|
14
|
+
|
|
15
|
+
class ParCorr(CondIndTest):
|
|
16
|
+
r"""Partial correlation test.
|
|
17
|
+
|
|
18
|
+
Partial correlation is estimated through linear ordinary least squares (OLS)
|
|
19
|
+
regression and a test for non-zero linear Pearson correlation on the
|
|
20
|
+
residuals.
|
|
21
|
+
|
|
22
|
+
Notes
|
|
23
|
+
-----
|
|
24
|
+
To test :math:`X \perp Y | Z`, first :math:`Z` is regressed out from
|
|
25
|
+
:math:`X` and :math:`Y` assuming the model
|
|
26
|
+
|
|
27
|
+
.. math:: X & = Z \beta_X + \epsilon_{X} \\
|
|
28
|
+
Y & = Z \beta_Y + \epsilon_{Y}
|
|
29
|
+
|
|
30
|
+
using OLS regression. Then the dependency of the residuals is tested with
|
|
31
|
+
the Pearson correlation test.
|
|
32
|
+
|
|
33
|
+
.. math:: \rho\left(r_X, r_Y\right)
|
|
34
|
+
|
|
35
|
+
For the ``significance='analytic'`` Student's-*t* distribution with
|
|
36
|
+
:math:`T-D_Z-2` degrees of freedom is implemented.
|
|
37
|
+
|
|
38
|
+
Assumes one-dimensional X, Y.Use ParCorrMult for multivariate X, Y.
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
**kwargs :
|
|
44
|
+
Arguments passed on to Parent class CondIndTest.
|
|
45
|
+
"""
|
|
46
|
+
# documentation
|
|
47
|
+
@property
|
|
48
|
+
def measure(self):
|
|
49
|
+
"""
|
|
50
|
+
Concrete property to return the measure of the independence test
|
|
51
|
+
"""
|
|
52
|
+
return self._measure
|
|
53
|
+
|
|
54
|
+
def __init__(self, **kwargs):
|
|
55
|
+
self._measure = 'par_corr'
|
|
56
|
+
self.two_sided = True
|
|
57
|
+
self.residual_based = True
|
|
58
|
+
|
|
59
|
+
CondIndTest.__init__(self, **kwargs)
|
|
60
|
+
|
|
61
|
+
def _get_single_residuals(self, array, target_var,
|
|
62
|
+
standardize=True,
|
|
63
|
+
return_means=False):
|
|
64
|
+
"""Returns residuals of linear multiple regression.
|
|
65
|
+
|
|
66
|
+
Performs a OLS regression of the variable indexed by target_var on the
|
|
67
|
+
conditions Z. Here array is assumed to contain X and Y as the first two
|
|
68
|
+
rows with the remaining rows (if present) containing the conditions Z.
|
|
69
|
+
Optionally returns the estimated regression line.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
array : array-like
|
|
74
|
+
data array with X, Y, Z in rows and observations in columns
|
|
75
|
+
|
|
76
|
+
target_var : {0, 1}
|
|
77
|
+
Variable to regress out conditions from.
|
|
78
|
+
|
|
79
|
+
standardize : bool, optional (default: True)
|
|
80
|
+
Whether to standardize the array beforehand. Must be used for
|
|
81
|
+
partial correlation.
|
|
82
|
+
|
|
83
|
+
return_means : bool, optional (default: False)
|
|
84
|
+
Whether to return the estimated regression line.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
resid [, mean] : array-like
|
|
89
|
+
The residual of the regression and optionally the estimated line.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
dim, T = array.shape
|
|
93
|
+
dim_z = dim - 2
|
|
94
|
+
|
|
95
|
+
# Standardize
|
|
96
|
+
if standardize:
|
|
97
|
+
array -= array.mean(axis=1).reshape(dim, 1)
|
|
98
|
+
std = array.std(axis=1)
|
|
99
|
+
nonzero = std != 0.
|
|
100
|
+
if np.any(nonzero):
|
|
101
|
+
array[nonzero] /= std[nonzero, np.newaxis]
|
|
102
|
+
if np.any(std == 0.) and self.verbosity > 0:
|
|
103
|
+
warnings.warn("Possibly constant array!")
|
|
104
|
+
# array /= array.std(axis=1).reshape(dim, 1)
|
|
105
|
+
# if np.isnan(array).sum() != 0:
|
|
106
|
+
# raise ValueError("nans after standardizing, "
|
|
107
|
+
# "possibly constant array!")
|
|
108
|
+
|
|
109
|
+
y = array[target_var, :]
|
|
110
|
+
|
|
111
|
+
if dim_z > 0:
|
|
112
|
+
z = array[2:, :].T.copy()
|
|
113
|
+
beta_hat = np.linalg.lstsq(z, y, rcond=None)[0]
|
|
114
|
+
mean = np.dot(z, beta_hat)
|
|
115
|
+
resid = y - mean
|
|
116
|
+
else:
|
|
117
|
+
resid = y
|
|
118
|
+
mean = None
|
|
119
|
+
|
|
120
|
+
if return_means:
|
|
121
|
+
return (resid, mean)
|
|
122
|
+
return resid
|
|
123
|
+
|
|
124
|
+
def _get_both_residuals(self, array, standardize=True):
|
|
125
|
+
"""Returns residuals of linear multiple regression for both X and Y.
|
|
126
|
+
|
|
127
|
+
Performs a single OLS regression of both X and Y on Z simultaneously,
|
|
128
|
+
avoiding redundant QR decomposition. This is faster than calling
|
|
129
|
+
_get_single_residuals twice.
|
|
130
|
+
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
array : array-like
|
|
134
|
+
data array with X, Y, Z in rows and observations in columns
|
|
135
|
+
|
|
136
|
+
standardize : bool, optional (default: True)
|
|
137
|
+
Whether to standardize the array beforehand.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
x_resid, y_resid : tuple of array-like
|
|
142
|
+
The residuals of X and Y after regressing out Z.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
dim, T = array.shape
|
|
146
|
+
dim_z = dim - 2
|
|
147
|
+
|
|
148
|
+
# Standardize once for both targets
|
|
149
|
+
if standardize:
|
|
150
|
+
array -= array.mean(axis=1).reshape(dim, 1)
|
|
151
|
+
std = array.std(axis=1)
|
|
152
|
+
nonzero = std != 0.
|
|
153
|
+
if np.any(nonzero):
|
|
154
|
+
array[nonzero] /= std[nonzero, np.newaxis]
|
|
155
|
+
if np.any(std == 0.) and self.verbosity > 0:
|
|
156
|
+
warnings.warn("Possibly constant array!")
|
|
157
|
+
|
|
158
|
+
if dim_z > 0:
|
|
159
|
+
z = array[2:, :].T.copy()
|
|
160
|
+
# Solve for both X and Y targets in a single lstsq call
|
|
161
|
+
beta_hat = np.linalg.lstsq(z, array[:2, :].T, rcond=None)[0]
|
|
162
|
+
means = np.dot(z, beta_hat)
|
|
163
|
+
x_resid = array[0, :] - means[:, 0]
|
|
164
|
+
y_resid = array[1, :] - means[:, 1]
|
|
165
|
+
else:
|
|
166
|
+
x_resid = array[0, :]
|
|
167
|
+
y_resid = array[1, :]
|
|
168
|
+
|
|
169
|
+
return x_resid, y_resid
|
|
170
|
+
|
|
171
|
+
def get_dependence_measure(self, array, xyz, data_type=None):
|
|
172
|
+
"""Return partial correlation.
|
|
173
|
+
|
|
174
|
+
Estimated as the Pearson correlation of the residuals of a linear
|
|
175
|
+
OLS regression.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
array : array-like
|
|
180
|
+
data array with X, Y, Z in rows and observations in columns
|
|
181
|
+
|
|
182
|
+
xyz : array of ints
|
|
183
|
+
XYZ identifier array of shape (dim,).
|
|
184
|
+
|
|
185
|
+
Returns
|
|
186
|
+
-------
|
|
187
|
+
val : float
|
|
188
|
+
Partial correlation coefficient.
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
x_vals, y_vals = self._get_both_residuals(array)
|
|
192
|
+
val, _ = stats.pearsonr(x_vals, y_vals)
|
|
193
|
+
return val
|
|
194
|
+
|
|
195
|
+
def get_shuffle_significance(self, array, xyz, value,
|
|
196
|
+
return_null_dist=False,
|
|
197
|
+
data_type=None):
|
|
198
|
+
"""Returns p-value for shuffle significance test.
|
|
199
|
+
|
|
200
|
+
For residual-based test statistics only the residuals are shuffled.
|
|
201
|
+
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
array : array-like
|
|
205
|
+
data array with X, Y, Z in rows and observations in columns
|
|
206
|
+
|
|
207
|
+
xyz : array of ints
|
|
208
|
+
XYZ identifier array of shape (dim,).
|
|
209
|
+
|
|
210
|
+
value : number
|
|
211
|
+
Value of test statistic for unshuffled estimate.
|
|
212
|
+
|
|
213
|
+
Returns
|
|
214
|
+
-------
|
|
215
|
+
pval : float
|
|
216
|
+
p-value
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
x_vals, y_vals = self._get_both_residuals(array)
|
|
220
|
+
array_resid = np.array([x_vals, y_vals])
|
|
221
|
+
xyz_resid = np.array([0, 1])
|
|
222
|
+
|
|
223
|
+
null_dist = self._get_shuffle_dist(array_resid, xyz_resid,
|
|
224
|
+
self.get_dependence_measure,
|
|
225
|
+
sig_samples=self.sig_samples,
|
|
226
|
+
sig_blocklength=self.sig_blocklength,
|
|
227
|
+
verbosity=self.verbosity)
|
|
228
|
+
|
|
229
|
+
# pval = (null_dist >= np.abs(value)).mean()
|
|
230
|
+
pval = float(np.sum(np.abs(null_dist) >= np.abs(value)) + 1) / (self.sig_samples + 1)
|
|
231
|
+
|
|
232
|
+
# # Adjust p-value for two-sided measures
|
|
233
|
+
# if pval < 1.:
|
|
234
|
+
# pval *= 2.
|
|
235
|
+
|
|
236
|
+
if return_null_dist:
|
|
237
|
+
return pval, null_dist
|
|
238
|
+
return pval
|
|
239
|
+
|
|
240
|
+
def get_analytic_significance(self, value, T, dim, xyz):
|
|
241
|
+
"""Returns analytic p-value from Student's t-test for the Pearson
|
|
242
|
+
correlation coefficient.
|
|
243
|
+
|
|
244
|
+
Assumes two-sided correlation. If the degrees of freedom are less than
|
|
245
|
+
1, numpy.nan is returned.
|
|
246
|
+
|
|
247
|
+
Parameters
|
|
248
|
+
----------
|
|
249
|
+
value : float
|
|
250
|
+
Test statistic value.
|
|
251
|
+
|
|
252
|
+
T : int
|
|
253
|
+
Sample length
|
|
254
|
+
|
|
255
|
+
dim : int
|
|
256
|
+
Dimensionality, ie, number of features.
|
|
257
|
+
|
|
258
|
+
xyz : array of ints
|
|
259
|
+
XYZ identifier array of shape (dim,).
|
|
260
|
+
|
|
261
|
+
Returns
|
|
262
|
+
-------
|
|
263
|
+
pval : float or numpy.nan
|
|
264
|
+
P-value.
|
|
265
|
+
"""
|
|
266
|
+
# Get the number of degrees of freedom
|
|
267
|
+
deg_f = T - dim
|
|
268
|
+
|
|
269
|
+
if deg_f < 1:
|
|
270
|
+
pval = np.nan
|
|
271
|
+
elif abs(abs(value) - 1.0) <= sys.float_info.min:
|
|
272
|
+
pval = 0.0
|
|
273
|
+
else:
|
|
274
|
+
trafo_val = value * np.sqrt(deg_f/(1. - value*value))
|
|
275
|
+
# Two sided significance level
|
|
276
|
+
pval = stats.t.sf(np.abs(trafo_val), deg_f) * 2
|
|
277
|
+
|
|
278
|
+
return pval
|
|
279
|
+
|
|
280
|
+
def get_analytic_confidence(self, value, df, conf_lev):
|
|
281
|
+
"""Returns analytic confidence interval for correlation coefficient.
|
|
282
|
+
|
|
283
|
+
Based on Student's t-distribution.
|
|
284
|
+
|
|
285
|
+
Parameters
|
|
286
|
+
----------
|
|
287
|
+
value : float
|
|
288
|
+
Test statistic value.
|
|
289
|
+
|
|
290
|
+
df : int
|
|
291
|
+
degrees of freedom of the test
|
|
292
|
+
|
|
293
|
+
conf_lev : float
|
|
294
|
+
Confidence interval, eg, 0.9
|
|
295
|
+
|
|
296
|
+
Returns
|
|
297
|
+
-------
|
|
298
|
+
(conf_lower, conf_upper) : Tuple of floats
|
|
299
|
+
Upper and lower confidence bound of confidence interval.
|
|
300
|
+
"""
|
|
301
|
+
# Confidence interval is two-sided
|
|
302
|
+
c_int = (1. - (1. - conf_lev) / 2.)
|
|
303
|
+
|
|
304
|
+
value_tdist = value * np.sqrt(df) / np.sqrt(1. - value**2)
|
|
305
|
+
conf_lower = (stats.t.ppf(q=1. - c_int, df=df, loc=value_tdist)
|
|
306
|
+
/ np.sqrt(df + stats.t.ppf(q=1. - c_int, df=df,
|
|
307
|
+
loc=value_tdist)**2))
|
|
308
|
+
conf_upper = (stats.t.ppf(q=c_int, df=df, loc=value_tdist)
|
|
309
|
+
/ np.sqrt(df + stats.t.ppf(q=c_int, df=df,
|
|
310
|
+
loc=value_tdist)**2))
|
|
311
|
+
return (conf_lower, conf_upper)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def get_model_selection_criterion(self, j, parents, tau_max=0, criterion='aic'):
|
|
315
|
+
"""Returns model selection criterion modulo constants.
|
|
316
|
+
|
|
317
|
+
Fits a linear model of the parents to variable j and returns the
|
|
318
|
+
score. Here used to determine optimal hyperparameters in PCMCI,
|
|
319
|
+
in particular the pc_alpha value.
|
|
320
|
+
|
|
321
|
+
Parameters
|
|
322
|
+
----------
|
|
323
|
+
j : int
|
|
324
|
+
Index of target variable in data array.
|
|
325
|
+
|
|
326
|
+
parents : list
|
|
327
|
+
List of form [(0, -1), (3, -2), ...] containing parents.
|
|
328
|
+
|
|
329
|
+
tau_max : int, optional (default: 0)
|
|
330
|
+
Maximum time lag. This may be used to make sure that estimates for
|
|
331
|
+
different lags in X, Z, all have the same sample size.
|
|
332
|
+
|
|
333
|
+
criterion : string
|
|
334
|
+
Scoring criterion among AIC, BIC, or corrected AIC.
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
score : float
|
|
338
|
+
Model score.
|
|
339
|
+
"""
|
|
340
|
+
|
|
341
|
+
Y = [(j, 0)]
|
|
342
|
+
X = [(j, 0)] # dummy variable here
|
|
343
|
+
Z = parents
|
|
344
|
+
array, xyz, _ = self.dataframe.construct_array(X=X, Y=Y, Z=Z,
|
|
345
|
+
tau_max=tau_max,
|
|
346
|
+
mask_type=self.mask_type,
|
|
347
|
+
return_cleaned_xyz=False,
|
|
348
|
+
do_checks=True,
|
|
349
|
+
verbosity=self.verbosity)
|
|
350
|
+
|
|
351
|
+
dim, T = array.shape
|
|
352
|
+
|
|
353
|
+
y = self._get_single_residuals(array, target_var=1, return_means=False)
|
|
354
|
+
# Get RSS
|
|
355
|
+
rss = (y**2).sum()
|
|
356
|
+
# Number of parameters dim includes dummy x, therefore -1 which includes de-meaning
|
|
357
|
+
p = dim - 1
|
|
358
|
+
|
|
359
|
+
# Get AIC
|
|
360
|
+
if criterion == 'corrected_aic':
|
|
361
|
+
score = T * np.log(rss) + 2. * p + (2.*p**2 + 2.*p)/(T - p - 1)
|
|
362
|
+
elif criterion == 'bic':
|
|
363
|
+
score = T * np.log(rss / float(T)) + p * np.log(T) # BIC = n*log(residual sum of squares/n) + K*log(n)
|
|
364
|
+
elif criterion == 'aic':
|
|
365
|
+
score = T * np.log(rss) + 2. * p
|
|
366
|
+
else:
|
|
367
|
+
raise ValueError("Unknown scoring criterion.")
|
|
368
|
+
|
|
369
|
+
return score
|