tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,403 @@
1
+ """Tigramite causal discovery for time series."""
2
+
3
+ # Author: Jakob Runge <jakob@jakob-runge.com>
4
+ #
5
+ # License: GNU General Public License v3.0
6
+
7
+ from __future__ import print_function
8
+ from scipy import stats
9
+ import numpy as np
10
+ import sys
11
+ import warnings
12
+
13
+ from .independence_tests_base import CondIndTest
14
+
15
+ class RobustParCorr(CondIndTest):
16
+ r"""Robust partial correlation test based on non-paranormal models.
17
+
18
+ Partial correlation is estimated through transformation to standard
19
+ normal marginals, ordinary least squares (OLS) regression, and a test for
20
+ non-zero linear Pearson correlation on the residuals.
21
+
22
+ Assumes one-dimensional X, Y. But can be combined with PairwiseMultCI to
23
+ obtain a test for multivariate X, Y.
24
+
25
+ Notes
26
+ -----
27
+ To test :math:`X \perp Y | Z`, firstly, each marginal is transformed to be
28
+ standard normally distributed. For that, the transform
29
+ :math:`\Phi^{-1}\circ\hat{F}` is used. Here, :math:`\Phi^{-1}` is the
30
+ quantile function of a standard normal distribution and
31
+ :math:`\hat{F}` is the empirical distribution function for the respective
32
+ marginal.
33
+
34
+
35
+ This idea stems from the literature on nonparanormal models, see:
36
+
37
+ - Han Liu, John Lafferty, and Larry Wasserman. The nonparanormal:
38
+ semiparametric estimation of high dimensional undirected graphs. J.
39
+ Mach. Learn. Res., 10:2295–2328, 2009.
40
+
41
+ - Han Liu, Fang Han, Ming Yuan, John Lafferty, and Larry Wasserman.
42
+ High-dimensional semiparametric Gaussian copula graphical models. Ann.
43
+ Statist., 40(4):2293–2326, 2012a.
44
+
45
+ - Naftali Harris, Mathias Drton. PC Algorithm for Nonparanormal Graphical
46
+ Models. Journal of Machine Learning Research, 14: 3365-3383, 2013.
47
+
48
+ Afterwards (where Z, X, and Y are now assumed to be transformed to the
49
+ standard normal scale):
50
+
51
+ :math:`Z` is regressed out from
52
+ :math:`X` and :math:`Y` assuming the model
53
+
54
+ .. math:: X & = Z \beta_X + \epsilon_{X} \\
55
+ Y & = Z \beta_Y + \epsilon_{Y}
56
+
57
+ using OLS regression. Then the dependency of the residuals is tested with
58
+ the Pearson correlation test.
59
+
60
+ .. math:: \rho\left(r_X, r_Y\right)
61
+
62
+ For the ``significance='analytic'`` Student's-*t* distribution with
63
+ :math:`T-D_Z-2` degrees of freedom is implemented.
64
+
65
+ Parameters
66
+ ----------
67
+ **kwargs :
68
+ Arguments passed on to Parent class CondIndTest.
69
+ """
70
+
71
+ @property
72
+ def measure(self):
73
+ """
74
+ Concrete property to return the measure of the independence test
75
+ """
76
+ return self._measure
77
+
78
+ def __init__(self, **kwargs):
79
+ self._measure = 'robust_par_corr'
80
+ self.two_sided = True
81
+ self.residual_based = True
82
+
83
+ CondIndTest.__init__(self, **kwargs)
84
+
85
+ def trafo2normal(self, x, thres=0.00001):
86
+ """Transforms input array to standard normal marginals.
87
+
88
+ For that, the code first transforms to uniform :math:`[0,1]` marginals
89
+ using the empirical distribution function, and then transforms to
90
+ normal marginals by applying the quantile function of a standard
91
+ normal. Assumes x.shape = (dim, T)
92
+
93
+ Parameters
94
+ ----------
95
+ x : array-like
96
+ Input array.
97
+
98
+ thres : float
99
+ Small number between 0 and 1; after transformation to the uniform
100
+ scale, all values that are too close to zero are replaced by thres,
101
+ similarly, all values that are too close to one, are replaced by
102
+ 1-thres. This avoids NaNs.
103
+
104
+ Returns
105
+ -------
106
+ normal : array-like
107
+ array with normal marginals.
108
+ """
109
+
110
+ def trafo(xi):
111
+ xisorted = np.sort(xi)
112
+ yi = np.linspace(1. / len(xi), 1, len(xi))
113
+ return np.interp(xi, xisorted, yi)
114
+
115
+ if np.ndim(x) == 1:
116
+ u = trafo(x)
117
+ u[u==0.] = thres
118
+ u[u==1.] = 1. - thres
119
+ normal = stats.norm.ppf(u)
120
+ else:
121
+ normal = np.empty(x.shape)
122
+ for i in range(x.shape[0]):
123
+ uniform = trafo(x[i])
124
+
125
+ uniform[uniform==0.] = thres
126
+ uniform[uniform==1.] = 1. - thres
127
+ normal[i] = stats.norm.ppf(uniform)
128
+
129
+ return normal
130
+
131
+ def _get_single_residuals(self, array, target_var,
132
+ standardize=True,
133
+ return_means=False):
134
+ """Returns residuals of linear multiple regression.
135
+
136
+ Performs a OLS regression of the variable indexed by target_var on the
137
+ conditions Z. Here array is assumed to contain X and Y as the first two
138
+ rows with the remaining rows (if present) containing the conditions Z.
139
+ Optionally returns the estimated regression line.
140
+
141
+ Parameters
142
+ ----------
143
+ array : array-like
144
+ data array with X, Y, Z in rows and observations in columns
145
+
146
+ target_var : {0, 1}
147
+ Variable to regress out conditions from.
148
+
149
+ standardize : bool, optional (default: True)
150
+ Whether to standardize the array beforehand. Must be used for
151
+ partial correlation.
152
+
153
+ return_means : bool, optional (default: False)
154
+ Whether to return the estimated regression line.
155
+
156
+ Returns
157
+ -------
158
+ resid [, mean] : array-like
159
+ The residual of the regression and optionally the estimated line.
160
+ """
161
+
162
+ dim, T = array.shape
163
+ dim_z = dim - 2
164
+
165
+ # Standardize
166
+ if standardize:
167
+ array -= array.mean(axis=1).reshape(dim, 1)
168
+ std = array.std(axis=1)
169
+ for i in range(dim):
170
+ if std[i] != 0.:
171
+ array[i] /= std[i]
172
+ if np.any(std == 0.) and self.verbosity > 0:
173
+ warnings.warn("Possibly constant array!")
174
+ # array /= array.std(axis=1).reshape(dim, 1)
175
+ # if np.isnan(array).sum() != 0:
176
+ # raise ValueError("nans after standardizing, "
177
+ # "possibly constant array!")
178
+
179
+ y = array[target_var, :]
180
+
181
+ if dim_z > 0:
182
+ z = array[2:, :].T.copy()
183
+ beta_hat = np.linalg.lstsq(z, y, rcond=None)[0]
184
+ mean = np.dot(z, beta_hat)
185
+ resid = y - mean
186
+ else:
187
+ resid = y
188
+ mean = None
189
+
190
+ if return_means:
191
+ return (resid, mean)
192
+ return resid
193
+
194
+ def get_dependence_measure(self, array, xyz, data_type=None):
195
+ """Return partial correlation.
196
+
197
+ Marginals are firstly transformed to standard normal scale. Dependence
198
+ Measure is then estimated as the Pearson correlation of the residuals
199
+ of a linear OLS regression.
200
+
201
+ Parameters
202
+ ----------
203
+ array : array-like
204
+ data array with X, Y, Z in rows and observations in columns
205
+
206
+ xyz : array of ints
207
+ XYZ identifier array of shape (dim,).
208
+
209
+ Returns
210
+ -------
211
+ val : float
212
+ Partial correlation coefficient.
213
+ """
214
+
215
+ # Transform to normal marginals
216
+ array = self.trafo2normal(array)
217
+
218
+ x_vals = self._get_single_residuals(array, target_var=0)
219
+ y_vals = self._get_single_residuals(array, target_var=1)
220
+
221
+ val, _ = stats.pearsonr(x_vals, y_vals)
222
+ return val
223
+
224
+ def get_shuffle_significance(self, array, xyz, value,
225
+ return_null_dist=False,
226
+ data_type=None):
227
+ """Returns p-value for shuffle significance test.
228
+
229
+ Firstly, each marginal is transformed to the standard normal scale.
230
+ For residual-based test statistics only the residuals are shuffled.
231
+
232
+ Parameters
233
+ ----------
234
+ array : array-like
235
+ data array with X, Y, Z in rows and observations in columns
236
+
237
+ xyz : array of ints
238
+ XYZ identifier array of shape (dim,).
239
+
240
+ value : number
241
+ Value of test statistic for unshuffled estimate.
242
+
243
+ Returns
244
+ -------
245
+ pval : float
246
+ p-value
247
+ """
248
+
249
+ # Transform to normal marginals
250
+ array = self.trafo2normal(array)
251
+
252
+ x_vals = self._get_single_residuals(array, target_var=0)
253
+ y_vals = self._get_single_residuals(array, target_var=1)
254
+ array_resid = np.array([x_vals, y_vals])
255
+ xyz_resid = np.array([0, 1])
256
+
257
+ null_dist = self._get_shuffle_dist(array_resid, xyz_resid,
258
+ self.get_dependence_measure,
259
+ sig_samples=self.sig_samples,
260
+ sig_blocklength=self.sig_blocklength,
261
+ verbosity=self.verbosity)
262
+
263
+ # pval = (null_dist >= np.abs(value)).mean()
264
+ pval = float(np.sum(np.abs(null_dist) >= np.abs(value)) + 1) / (self.sig_samples + 1)
265
+
266
+ # # Adjust p-value for two-sided measures
267
+ # if pval < 1.:
268
+ # pval *= 2.
269
+
270
+ if return_null_dist:
271
+ return pval, null_dist
272
+ return pval
273
+
274
+ def get_analytic_significance(self, value, T, dim, xyz):
275
+ """Returns analytic p-value from Student's t-test for the Pearson
276
+ correlation coefficient.
277
+
278
+ Assumes two-sided correlation. If the degrees of freedom are less than
279
+ 1, numpy.nan is returned.
280
+
281
+ Parameters
282
+ ----------
283
+ value : float
284
+ Test statistic value.
285
+
286
+ T : int
287
+ Sample length
288
+
289
+ dim : int
290
+ Dimensionality, ie, number of features.
291
+
292
+ xyz : array of ints
293
+ XYZ identifier array of shape (dim,).
294
+
295
+ Returns
296
+ -------
297
+ pval : float or numpy.nan
298
+ P-value.
299
+ """
300
+ # Get the number of degrees of freedom
301
+ deg_f = T - dim
302
+
303
+ if deg_f < 1:
304
+ pval = np.nan
305
+ elif abs(abs(value) - 1.0) <= sys.float_info.min:
306
+ pval = 0.0
307
+ else:
308
+ trafo_val = value * np.sqrt(deg_f/(1. - value*value))
309
+ # Two sided significance level
310
+ pval = stats.t.sf(np.abs(trafo_val), deg_f) * 2
311
+
312
+ return pval
313
+
314
+ def get_analytic_confidence(self, value, df, conf_lev):
315
+ """Returns analytic confidence interval for correlation coefficient.
316
+
317
+ Based on Student's t-distribution.
318
+
319
+ Parameters
320
+ ----------
321
+ value : float
322
+ Test statistic value.
323
+
324
+ df : int
325
+ degrees of freedom of the test
326
+
327
+ conf_lev : float
328
+ Confidence interval, eg, 0.9
329
+
330
+ Returns
331
+ -------
332
+ (conf_lower, conf_upper) : Tuple of floats
333
+ Upper and lower confidence bound of confidence interval.
334
+ """
335
+ # Confidence interval is two-sided
336
+ c_int = (1. - (1. - conf_lev) / 2.)
337
+
338
+ value_tdist = value * np.sqrt(df) / np.sqrt(1. - value**2)
339
+ conf_lower = (stats.t.ppf(q=1. - c_int, df=df, loc=value_tdist)
340
+ / np.sqrt(df + stats.t.ppf(q=1. - c_int, df=df,
341
+ loc=value_tdist)**2))
342
+ conf_upper = (stats.t.ppf(q=c_int, df=df, loc=value_tdist)
343
+ / np.sqrt(df + stats.t.ppf(q=c_int, df=df,
344
+ loc=value_tdist)**2))
345
+ return (conf_lower, conf_upper)
346
+
347
+
348
+ def get_model_selection_criterion(self, j, parents, tau_max=0, corrected_aic=False):
349
+ """Returns Akaike's Information criterion modulo constants.
350
+
351
+ First of all, each marginal is transformed to the standard normal
352
+ scale. For this, each marginal is transformed to the uniform scale
353
+ using the empirical distribution function and then, transformed to
354
+ the standard normal scale by applying the quantile function of a
355
+ standard normal. Afterwards, fits a linear model of the parents to
356
+ variable j and returns the score. Leave-one-out cross-validation is
357
+ asymptotically equivalent to AIC for ordinary linear regression
358
+ models. Here used to determine optimal hyperparameters in
359
+ PCMCI(plus), in particular the pc_alpha value.
360
+
361
+ Parameters
362
+ ----------
363
+ j : int
364
+ Index of target variable in data array.
365
+
366
+ parents : list
367
+ List of form [(0, -1), (3, -2), ...] containing parents.
368
+
369
+ tau_max : int, optional (default: 0)
370
+ Maximum time lag. This may be used to make sure that estimates for
371
+ different lags in X, Z, all have the same sample size.
372
+
373
+ Returns:
374
+ score : float
375
+ Model score.
376
+ """
377
+
378
+ Y = [(j, 0)]
379
+ X = [(j, 0)] # dummy variable here
380
+ Z = parents
381
+ array, xyz, _ = self.dataframe.construct_array(X=X, Y=Y, Z=Z,
382
+ tau_max=tau_max,
383
+ mask_type=self.mask_type,
384
+ return_cleaned_xyz=False,
385
+ do_checks=True,
386
+ verbosity=self.verbosity)
387
+
388
+ dim, T = array.shape
389
+
390
+ # Transform to normal marginals
391
+ array = self.trafo2normal(array)
392
+
393
+ y = self._get_single_residuals(array, target_var=1, return_means=False)
394
+ # Get RSS
395
+ rss = (y**2).sum()
396
+ # Number of parameters
397
+ p = dim - 1
398
+ # Get AIC
399
+ if corrected_aic:
400
+ score = T * np.log(rss) + 2. * p + (2.*p**2 + 2.*p)/(T - p - 1)
401
+ else:
402
+ score = T * np.log(rss) + 2. * p
403
+ return score