tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,369 @@
1
+ """Tigramite causal discovery for time series."""
2
+
3
+ # Author: Jakob Runge <jakob@jakob-runge.com>
4
+ #
5
+ # License: GNU General Public License v3.0
6
+
7
+ from __future__ import print_function
8
+ from scipy import stats
9
+ import numpy as np
10
+ import sys
11
+ import warnings
12
+
13
+ from .independence_tests_base import CondIndTest
14
+
15
+ class ParCorr(CondIndTest):
16
+ r"""Partial correlation test.
17
+
18
+ Partial correlation is estimated through linear ordinary least squares (OLS)
19
+ regression and a test for non-zero linear Pearson correlation on the
20
+ residuals.
21
+
22
+ Notes
23
+ -----
24
+ To test :math:`X \perp Y | Z`, first :math:`Z` is regressed out from
25
+ :math:`X` and :math:`Y` assuming the model
26
+
27
+ .. math:: X & = Z \beta_X + \epsilon_{X} \\
28
+ Y & = Z \beta_Y + \epsilon_{Y}
29
+
30
+ using OLS regression. Then the dependency of the residuals is tested with
31
+ the Pearson correlation test.
32
+
33
+ .. math:: \rho\left(r_X, r_Y\right)
34
+
35
+ For the ``significance='analytic'`` Student's-*t* distribution with
36
+ :math:`T-D_Z-2` degrees of freedom is implemented.
37
+
38
+ Assumes one-dimensional X, Y.Use ParCorrMult for multivariate X, Y.
39
+
40
+
41
+ Parameters
42
+ ----------
43
+ **kwargs :
44
+ Arguments passed on to Parent class CondIndTest.
45
+ """
46
+ # documentation
47
+ @property
48
+ def measure(self):
49
+ """
50
+ Concrete property to return the measure of the independence test
51
+ """
52
+ return self._measure
53
+
54
+ def __init__(self, **kwargs):
55
+ self._measure = 'par_corr'
56
+ self.two_sided = True
57
+ self.residual_based = True
58
+
59
+ CondIndTest.__init__(self, **kwargs)
60
+
61
+ def _get_single_residuals(self, array, target_var,
62
+ standardize=True,
63
+ return_means=False):
64
+ """Returns residuals of linear multiple regression.
65
+
66
+ Performs a OLS regression of the variable indexed by target_var on the
67
+ conditions Z. Here array is assumed to contain X and Y as the first two
68
+ rows with the remaining rows (if present) containing the conditions Z.
69
+ Optionally returns the estimated regression line.
70
+
71
+ Parameters
72
+ ----------
73
+ array : array-like
74
+ data array with X, Y, Z in rows and observations in columns
75
+
76
+ target_var : {0, 1}
77
+ Variable to regress out conditions from.
78
+
79
+ standardize : bool, optional (default: True)
80
+ Whether to standardize the array beforehand. Must be used for
81
+ partial correlation.
82
+
83
+ return_means : bool, optional (default: False)
84
+ Whether to return the estimated regression line.
85
+
86
+ Returns
87
+ -------
88
+ resid [, mean] : array-like
89
+ The residual of the regression and optionally the estimated line.
90
+ """
91
+
92
+ dim, T = array.shape
93
+ dim_z = dim - 2
94
+
95
+ # Standardize
96
+ if standardize:
97
+ array -= array.mean(axis=1).reshape(dim, 1)
98
+ std = array.std(axis=1)
99
+ nonzero = std != 0.
100
+ if np.any(nonzero):
101
+ array[nonzero] /= std[nonzero, np.newaxis]
102
+ if np.any(std == 0.) and self.verbosity > 0:
103
+ warnings.warn("Possibly constant array!")
104
+ # array /= array.std(axis=1).reshape(dim, 1)
105
+ # if np.isnan(array).sum() != 0:
106
+ # raise ValueError("nans after standardizing, "
107
+ # "possibly constant array!")
108
+
109
+ y = array[target_var, :]
110
+
111
+ if dim_z > 0:
112
+ z = array[2:, :].T.copy()
113
+ beta_hat = np.linalg.lstsq(z, y, rcond=None)[0]
114
+ mean = np.dot(z, beta_hat)
115
+ resid = y - mean
116
+ else:
117
+ resid = y
118
+ mean = None
119
+
120
+ if return_means:
121
+ return (resid, mean)
122
+ return resid
123
+
124
+ def _get_both_residuals(self, array, standardize=True):
125
+ """Returns residuals of linear multiple regression for both X and Y.
126
+
127
+ Performs a single OLS regression of both X and Y on Z simultaneously,
128
+ avoiding redundant QR decomposition. This is faster than calling
129
+ _get_single_residuals twice.
130
+
131
+ Parameters
132
+ ----------
133
+ array : array-like
134
+ data array with X, Y, Z in rows and observations in columns
135
+
136
+ standardize : bool, optional (default: True)
137
+ Whether to standardize the array beforehand.
138
+
139
+ Returns
140
+ -------
141
+ x_resid, y_resid : tuple of array-like
142
+ The residuals of X and Y after regressing out Z.
143
+ """
144
+
145
+ dim, T = array.shape
146
+ dim_z = dim - 2
147
+
148
+ # Standardize once for both targets
149
+ if standardize:
150
+ array -= array.mean(axis=1).reshape(dim, 1)
151
+ std = array.std(axis=1)
152
+ nonzero = std != 0.
153
+ if np.any(nonzero):
154
+ array[nonzero] /= std[nonzero, np.newaxis]
155
+ if np.any(std == 0.) and self.verbosity > 0:
156
+ warnings.warn("Possibly constant array!")
157
+
158
+ if dim_z > 0:
159
+ z = array[2:, :].T.copy()
160
+ # Solve for both X and Y targets in a single lstsq call
161
+ beta_hat = np.linalg.lstsq(z, array[:2, :].T, rcond=None)[0]
162
+ means = np.dot(z, beta_hat)
163
+ x_resid = array[0, :] - means[:, 0]
164
+ y_resid = array[1, :] - means[:, 1]
165
+ else:
166
+ x_resid = array[0, :]
167
+ y_resid = array[1, :]
168
+
169
+ return x_resid, y_resid
170
+
171
+ def get_dependence_measure(self, array, xyz, data_type=None):
172
+ """Return partial correlation.
173
+
174
+ Estimated as the Pearson correlation of the residuals of a linear
175
+ OLS regression.
176
+
177
+ Parameters
178
+ ----------
179
+ array : array-like
180
+ data array with X, Y, Z in rows and observations in columns
181
+
182
+ xyz : array of ints
183
+ XYZ identifier array of shape (dim,).
184
+
185
+ Returns
186
+ -------
187
+ val : float
188
+ Partial correlation coefficient.
189
+ """
190
+
191
+ x_vals, y_vals = self._get_both_residuals(array)
192
+ val, _ = stats.pearsonr(x_vals, y_vals)
193
+ return val
194
+
195
+ def get_shuffle_significance(self, array, xyz, value,
196
+ return_null_dist=False,
197
+ data_type=None):
198
+ """Returns p-value for shuffle significance test.
199
+
200
+ For residual-based test statistics only the residuals are shuffled.
201
+
202
+ Parameters
203
+ ----------
204
+ array : array-like
205
+ data array with X, Y, Z in rows and observations in columns
206
+
207
+ xyz : array of ints
208
+ XYZ identifier array of shape (dim,).
209
+
210
+ value : number
211
+ Value of test statistic for unshuffled estimate.
212
+
213
+ Returns
214
+ -------
215
+ pval : float
216
+ p-value
217
+ """
218
+
219
+ x_vals, y_vals = self._get_both_residuals(array)
220
+ array_resid = np.array([x_vals, y_vals])
221
+ xyz_resid = np.array([0, 1])
222
+
223
+ null_dist = self._get_shuffle_dist(array_resid, xyz_resid,
224
+ self.get_dependence_measure,
225
+ sig_samples=self.sig_samples,
226
+ sig_blocklength=self.sig_blocklength,
227
+ verbosity=self.verbosity)
228
+
229
+ # pval = (null_dist >= np.abs(value)).mean()
230
+ pval = float(np.sum(np.abs(null_dist) >= np.abs(value)) + 1) / (self.sig_samples + 1)
231
+
232
+ # # Adjust p-value for two-sided measures
233
+ # if pval < 1.:
234
+ # pval *= 2.
235
+
236
+ if return_null_dist:
237
+ return pval, null_dist
238
+ return pval
239
+
240
+ def get_analytic_significance(self, value, T, dim, xyz):
241
+ """Returns analytic p-value from Student's t-test for the Pearson
242
+ correlation coefficient.
243
+
244
+ Assumes two-sided correlation. If the degrees of freedom are less than
245
+ 1, numpy.nan is returned.
246
+
247
+ Parameters
248
+ ----------
249
+ value : float
250
+ Test statistic value.
251
+
252
+ T : int
253
+ Sample length
254
+
255
+ dim : int
256
+ Dimensionality, ie, number of features.
257
+
258
+ xyz : array of ints
259
+ XYZ identifier array of shape (dim,).
260
+
261
+ Returns
262
+ -------
263
+ pval : float or numpy.nan
264
+ P-value.
265
+ """
266
+ # Get the number of degrees of freedom
267
+ deg_f = T - dim
268
+
269
+ if deg_f < 1:
270
+ pval = np.nan
271
+ elif abs(abs(value) - 1.0) <= sys.float_info.min:
272
+ pval = 0.0
273
+ else:
274
+ trafo_val = value * np.sqrt(deg_f/(1. - value*value))
275
+ # Two sided significance level
276
+ pval = stats.t.sf(np.abs(trafo_val), deg_f) * 2
277
+
278
+ return pval
279
+
280
+ def get_analytic_confidence(self, value, df, conf_lev):
281
+ """Returns analytic confidence interval for correlation coefficient.
282
+
283
+ Based on Student's t-distribution.
284
+
285
+ Parameters
286
+ ----------
287
+ value : float
288
+ Test statistic value.
289
+
290
+ df : int
291
+ degrees of freedom of the test
292
+
293
+ conf_lev : float
294
+ Confidence interval, eg, 0.9
295
+
296
+ Returns
297
+ -------
298
+ (conf_lower, conf_upper) : Tuple of floats
299
+ Upper and lower confidence bound of confidence interval.
300
+ """
301
+ # Confidence interval is two-sided
302
+ c_int = (1. - (1. - conf_lev) / 2.)
303
+
304
+ value_tdist = value * np.sqrt(df) / np.sqrt(1. - value**2)
305
+ conf_lower = (stats.t.ppf(q=1. - c_int, df=df, loc=value_tdist)
306
+ / np.sqrt(df + stats.t.ppf(q=1. - c_int, df=df,
307
+ loc=value_tdist)**2))
308
+ conf_upper = (stats.t.ppf(q=c_int, df=df, loc=value_tdist)
309
+ / np.sqrt(df + stats.t.ppf(q=c_int, df=df,
310
+ loc=value_tdist)**2))
311
+ return (conf_lower, conf_upper)
312
+
313
+
314
+ def get_model_selection_criterion(self, j, parents, tau_max=0, criterion='aic'):
315
+ """Returns model selection criterion modulo constants.
316
+
317
+ Fits a linear model of the parents to variable j and returns the
318
+ score. Here used to determine optimal hyperparameters in PCMCI,
319
+ in particular the pc_alpha value.
320
+
321
+ Parameters
322
+ ----------
323
+ j : int
324
+ Index of target variable in data array.
325
+
326
+ parents : list
327
+ List of form [(0, -1), (3, -2), ...] containing parents.
328
+
329
+ tau_max : int, optional (default: 0)
330
+ Maximum time lag. This may be used to make sure that estimates for
331
+ different lags in X, Z, all have the same sample size.
332
+
333
+ criterion : string
334
+ Scoring criterion among AIC, BIC, or corrected AIC.
335
+
336
+ Returns:
337
+ score : float
338
+ Model score.
339
+ """
340
+
341
+ Y = [(j, 0)]
342
+ X = [(j, 0)] # dummy variable here
343
+ Z = parents
344
+ array, xyz, _ = self.dataframe.construct_array(X=X, Y=Y, Z=Z,
345
+ tau_max=tau_max,
346
+ mask_type=self.mask_type,
347
+ return_cleaned_xyz=False,
348
+ do_checks=True,
349
+ verbosity=self.verbosity)
350
+
351
+ dim, T = array.shape
352
+
353
+ y = self._get_single_residuals(array, target_var=1, return_means=False)
354
+ # Get RSS
355
+ rss = (y**2).sum()
356
+ # Number of parameters dim includes dummy x, therefore -1 which includes de-meaning
357
+ p = dim - 1
358
+
359
+ # Get AIC
360
+ if criterion == 'corrected_aic':
361
+ score = T * np.log(rss) + 2. * p + (2.*p**2 + 2.*p)/(T - p - 1)
362
+ elif criterion == 'bic':
363
+ score = T * np.log(rss / float(T)) + p * np.log(T) # BIC = n*log(residual sum of squares/n) + K*log(n)
364
+ elif criterion == 'aic':
365
+ score = T * np.log(rss) + 2. * p
366
+ else:
367
+ raise ValueError("Unknown scoring criterion.")
368
+
369
+ return score