tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,485 @@
1
+ """Tigramite causal discovery for time series."""
2
+
3
+ # Author: Jakob Runge <jakob@jakob-runge.com>
4
+ #
5
+ # License: GNU General Public License v3.0
6
+
7
+ from __future__ import print_function
8
+ from scipy import stats, linalg
9
+ import numpy as np
10
+ import sys
11
+ import warnings
12
+
13
+ from tigramite.independence_tests.independence_tests_base import CondIndTest
14
+
15
+ class ParCorrMult(CondIndTest):
16
+ r"""Partial correlation test for multivariate X and Y.
17
+
18
+ Multivariate partial correlation is estimated through ordinary least squares (OLS)
19
+ regression and some test for multivariate dependency among the residuals.
20
+
21
+ Notes
22
+ -----
23
+ To test :math:`X \perp Y | Z`, first :math:`Z` is regressed out from
24
+ :math:`X` and :math:`Y` assuming the model
25
+
26
+ .. math:: X & = Z \beta_X + \epsilon_{X} \\
27
+ Y & = Z \beta_Y + \epsilon_{Y}
28
+
29
+ using OLS regression. Then different measures for the dependency among the residuals
30
+ can be used. Currently only a test for zero correlation on the maximum of the residuals'
31
+ correlation is performed.
32
+
33
+ Parameters
34
+ ----------
35
+ correlation_type : {'max_corr'}
36
+ Which dependency measure to use on residuals.
37
+ **kwargs :
38
+ Arguments passed on to Parent class CondIndTest.
39
+ """
40
+ # documentation
41
+ @property
42
+ def measure(self):
43
+ """
44
+ Concrete property to return the measure of the independence test
45
+ """
46
+ return self._measure
47
+
48
+ def __init__(self, correlation_type='max_corr', **kwargs):
49
+ self._measure = 'par_corr_mult'
50
+ self.two_sided = True
51
+ self.residual_based = True
52
+
53
+ self.correlation_type = correlation_type
54
+
55
+ if self.correlation_type not in ['max_corr', 'PCCA_WilksLambda']:
56
+ raise ValueError("correlation_type must be in ['max_corr', 'PCCA_WilksLambda']")
57
+
58
+ CondIndTest.__init__(self, **kwargs)
59
+
60
+ def _get_single_residuals(self, array, xyz, target_var,
61
+ standardize=True,
62
+ return_means=False):
63
+ """Returns residuals of linear multiple regression.
64
+
65
+ Performs a OLS regression of the variable indexed by target_var on the
66
+ conditions Z. Here array is assumed to contain X and Y as the first two
67
+ rows with the remaining rows (if present) containing the conditions Z.
68
+ Optionally returns the estimated regression line.
69
+
70
+ Parameters
71
+ ----------
72
+ array : array-like
73
+ data array with X, Y, Z in rows and observations in columns
74
+
75
+ xyz : array of ints
76
+ XYZ identifier array of shape (dim,).
77
+
78
+ target_var : {0, 1}
79
+ Variable to regress out conditions from.
80
+
81
+ standardize : bool, optional (default: True)
82
+ Whether to standardize the array beforehand. Must be used for
83
+ partial correlation.
84
+
85
+ return_means : bool, optional (default: False)
86
+ Whether to return the estimated regression line.
87
+
88
+ Returns
89
+ -------
90
+ resid [, mean] : array-like
91
+ The residual of the regression and optionally the estimated line.
92
+ """
93
+
94
+ dim, T = array.shape
95
+ dim_z = (xyz == 2).sum()
96
+
97
+ # Standardize
98
+ if standardize:
99
+ array -= array.mean(axis=1).reshape(dim, 1)
100
+ std = array.std(axis=1)
101
+ for i in range(dim):
102
+ if std[i] != 0.:
103
+ array[i] /= std[i]
104
+ if np.any(std == 0.) and self.verbosity > 0:
105
+ warnings.warn("Possibly constant array!")
106
+ # array /= array.std(axis=1).reshape(dim, 1)
107
+ # if np.isnan(array).sum() != 0:
108
+ # raise ValueError("nans after standardizing, "
109
+ # "possibly constant array!")
110
+
111
+ y = array[np.where(xyz==target_var)[0], :].T.copy()
112
+
113
+ if dim_z > 0:
114
+ z = (array[np.where(xyz==2)[0], :]).T.copy()
115
+ beta_hat = np.linalg.lstsq(z, y, rcond=None)[0]
116
+ mean = np.dot(z, beta_hat)
117
+ resid = y - mean
118
+ else:
119
+ resid = y
120
+ mean = None
121
+
122
+ if return_means:
123
+ return (np.fastCopyAndTranspose(resid), np.fastCopyAndTranspose(mean))
124
+
125
+ return resid.T.copy()
126
+
127
+ def get_dependence_measure(self, array, xyz, data_type=None):
128
+ """Return multivariate kernel correlation coefficient.
129
+
130
+ Estimated as some dependency measure on the
131
+ residuals of a linear OLS regression.
132
+
133
+ Parameters
134
+ ----------
135
+ array : array-like
136
+ data array with X, Y, Z in rows and observations in columns
137
+
138
+ xyz : array of ints
139
+ XYZ identifier array of shape (dim,).
140
+
141
+ Returns
142
+ -------
143
+ val : float
144
+ Partial correlation coefficient.
145
+ """
146
+
147
+ dim, T = array.shape
148
+ dim_x = (xyz==0).sum()
149
+ dim_y = (xyz==1).sum()
150
+
151
+ x_vals = self._get_single_residuals(array, xyz, target_var=0)
152
+ y_vals = self._get_single_residuals(array, xyz, target_var=1)
153
+
154
+ array_resid = np.vstack((x_vals.reshape(dim_x, T), y_vals.reshape(dim_y, T)))
155
+ xyz_resid = np.array([index_code for index_code in xyz if index_code != 2])
156
+
157
+ val = self.mult_corr(array_resid, xyz_resid)
158
+
159
+ return val
160
+
161
+ def compute_wilks_lambda_cca(self, X, Y, num_components=None):
162
+ """
163
+ Compute Wilks Lambda from canonical correlations between X and Y using NumPy.
164
+
165
+ Parameters:
166
+ - X, Y: Arrays of shape (n_features, n_samples)
167
+ - num_components: Number of canonical correlations to return (default: all)
168
+
169
+ Returns:
170
+ - wilks_lambda: Wilks Lambda of canonical correlations
171
+ """
172
+
173
+ # Center variables (per feature)
174
+ X = X - X.mean(axis=1, keepdims=True)
175
+ Y = Y - Y.mean(axis=1, keepdims=True)
176
+
177
+ # Compute covariance matrices (features are rows, samples are columns)
178
+ Cxx = np.cov(X, rowvar=True)
179
+ Cyy = np.cov(Y, rowvar=True)
180
+ Cxy = np.cov(X, Y, rowvar=True)[:X.shape[0], X.shape[0]:]
181
+
182
+ # Ensure matrices are at least 2D
183
+ if Cxx.ndim == 0:
184
+ Cxx = np.array([[Cxx]])
185
+ if Cyy.ndim == 0:
186
+ Cyy = np.array([[Cyy]])
187
+ if Cxy.ndim == 1:
188
+ Cxy = Cxy[:, None]
189
+
190
+ # Regularization for stability
191
+ eps = 1e-10
192
+ Cxx += eps * np.eye(Cxx.shape[0])
193
+ Cyy += eps * np.eye(Cyy.shape[0])
194
+
195
+ # Solve generalized eigenvalue problem
196
+ M = linalg.inv(Cxx) @ Cxy @ linalg.inv(Cyy) @ Cxy.T
197
+
198
+ eigvals = np.linalg.eigvalsh(M)
199
+ eigvals = np.flip(np.real(eigvals)) # sort descending
200
+ rho_sq = np.abs(np.maximum(eigvals, 0.0))
201
+
202
+ if num_components is not None:
203
+ rho_sq = rho_sq[:num_components]
204
+
205
+ wilks_lambda = np.prod([1 - rho for rho in rho_sq])
206
+
207
+ return wilks_lambda
208
+
209
+ def mult_corr(self, array, xyz, standardize=True):
210
+ """Return multivariate dependency measure.
211
+
212
+ Parameters
213
+ ----------
214
+ array : array-like
215
+ data array with X, Y in rows and observations in columns
216
+
217
+ xyz : array of ints
218
+ XYZ identifier array of shape (dim,).
219
+
220
+ standardize : bool, optional (default: True)
221
+ Whether to standardize the array beforehand. Must be used for
222
+ partial correlation.
223
+
224
+ Returns
225
+ -------
226
+ val : float
227
+ Multivariate dependency measure.
228
+ """
229
+
230
+ dim, n = array.shape
231
+ dim_x = (xyz==0).sum()
232
+ dim_y = (xyz==1).sum()
233
+
234
+ # Standardize
235
+ if standardize:
236
+ array -= array.mean(axis=1).reshape(dim, 1)
237
+ std = array.std(axis=1)
238
+ for i in range(dim):
239
+ if std[i] != 0.:
240
+ array[i] /= std[i]
241
+ if np.any(std == 0.) and self.verbosity > 0:
242
+ warnings.warn("Possibly constant array!")
243
+ # array /= array.std(axis=1).reshape(dim, 1)
244
+ # if np.isnan(array).sum() != 0:
245
+ # raise ValueError("nans after standardizing, "
246
+ # "possibly constant array!")
247
+
248
+ x = array[np.where(xyz==0)[0]]
249
+ y = array[np.where(xyz==1)[0]]
250
+
251
+ if self.correlation_type == 'max_corr':
252
+ # Get (positive or negative) absolute maximum correlation value
253
+ corr = np.corrcoef(x, y)[:len(x), len(x):].flatten()
254
+ val = corr[np.argmax(np.abs(corr))]
255
+
256
+ # val = 0.
257
+ # for x_vals in x:
258
+ # for y_vals in y:
259
+ # val_here, _ = stats.pearsonr(x_vals, y_vals)
260
+ # val = max(val, np.abs(val_here))
261
+ elif self.correlation_type == 'PCCA_WilksLambda':
262
+ val = self.compute_wilks_lambda_cca(x, y)
263
+
264
+ # elif self.correlation_type == 'linear_hsci':
265
+ # # For linear kernel and standardized data (centered and divided by std)
266
+ # # biased V -statistic of HSIC reduces to sum of squared inner products
267
+ # # over all dimensions
268
+ # val = ((x.dot(y.T)/float(n))**2).sum()
269
+ else:
270
+ raise NotImplementedError("Currently only"
271
+ "correlation_type == 'max_corr' and 'PCCA_WilksLambda' implemented.")
272
+
273
+ return val
274
+
275
+ def get_shuffle_significance(self, array, xyz, value,
276
+ return_null_dist=False,
277
+ data_type=None):
278
+ """Returns p-value for shuffle significance test.
279
+
280
+ For residual-based test statistics only the residuals are shuffled.
281
+
282
+ Parameters
283
+ ----------
284
+ array : array-like
285
+ data array with X, Y, Z in rows and observations in columns
286
+
287
+ xyz : array of ints
288
+ XYZ identifier array of shape (dim,).
289
+
290
+ value : number
291
+ Value of test statistic for unshuffled estimate.
292
+
293
+ Returns
294
+ -------
295
+ pval : float
296
+ p-value
297
+ """
298
+
299
+ dim, T = array.shape
300
+ dim_x = (xyz==0).sum()
301
+ dim_y = (xyz==1).sum()
302
+
303
+ x_vals = self._get_single_residuals(array, xyz, target_var=0)
304
+ y_vals = self._get_single_residuals(array, xyz, target_var=1)
305
+
306
+ array_resid = np.vstack((x_vals.reshape(dim_x, T), y_vals.reshape(dim_y, T)))
307
+ xyz_resid = np.array([index_code for index_code in xyz if index_code != 2])
308
+
309
+
310
+ null_dist = self._get_shuffle_dist(array_resid, xyz_resid,
311
+ self.get_dependence_measure,
312
+ sig_samples=self.sig_samples,
313
+ sig_blocklength=self.sig_blocklength,
314
+ verbosity=self.verbosity)
315
+
316
+ # pval = (null_dist >= np.abs(value)).mean()
317
+ pval = float(np.sum(np.abs(null_dist) >= np.abs(value)) + 1) / (self.sig_samples + 1)
318
+
319
+ # # Adjust p-value for two-sided measures
320
+ # if pval < 1.:
321
+ # pval *= 2.
322
+
323
+ # Adjust p-value for dimensions of x and y (conservative Bonferroni-correction)
324
+ # pval *= dim_x*dim_y
325
+
326
+ if return_null_dist:
327
+ return pval, null_dist
328
+ return pval
329
+
330
+ def get_analytic_significance(self, value, T, dim, xyz):
331
+ """Returns analytic p-value depending on correlation_type.
332
+
333
+ Assumes two-sided correlation. If the degrees of freedom are less than
334
+ 1, numpy.nan is returned.
335
+
336
+ Parameters
337
+ ----------
338
+ value : float
339
+ Test statistic value.
340
+
341
+ T : int
342
+ Sample length
343
+
344
+ dim : int
345
+ Dimensionality, ie, number of features.
346
+
347
+ xyz : array of ints
348
+ XYZ identifier array of shape (dim,).
349
+
350
+ Returns
351
+ -------
352
+ pval : float or numpy.nan
353
+ P-value.
354
+ """
355
+ # Get the number of degrees of freedom
356
+ deg_f = T - dim
357
+
358
+ dim_x = (xyz==0).sum()
359
+ dim_y = (xyz==1).sum()
360
+ dim_z = dim - dim_x - dim_y
361
+
362
+ if self.correlation_type == 'max_corr':
363
+ if deg_f < 1:
364
+ pval = np.nan
365
+ elif abs(abs(value) - 1.0) <= sys.float_info.min:
366
+ pval = 0.0
367
+ else:
368
+ trafo_val = value * np.sqrt(deg_f/(1. - value*value))
369
+ # Two sided significance level
370
+ pval = stats.t.sf(np.abs(trafo_val), deg_f) * 2
371
+ # Adjust p-value for dimensions of x and y (conservative Bonferroni-correction)
372
+ pval *= dim_x*dim_y
373
+ elif self.correlation_type == 'PCCA_WilksLambda':
374
+ if deg_f < 1:
375
+ pval = np.nan
376
+ else:
377
+ trafo_val = -(T - dim_z - 0.5 * (dim_x + dim_y + 1)) * np.log(value)
378
+ dof = dim_x * dim_y
379
+ pval = 1 - stats.chi2.cdf(trafo_val, dof)
380
+ else:
381
+ raise NotImplementedError("Currently only"
382
+ "correlation_type == 'max_corr' and 'PCCA_WilksLambda' implemented.")
383
+
384
+
385
+ return pval
386
+
387
+ def get_model_selection_criterion(self, j, parents, tau_max=0, corrected_aic=False):
388
+ """Returns Akaike's Information criterion modulo constants.
389
+
390
+ Fits a linear model of the parents to each variable in j and returns
391
+ the average score. Leave-one-out cross-validation is asymptotically
392
+ equivalent to AIC for ordinary linear regression models. Here used to
393
+ determine optimal hyperparameters in PCMCI, in particular the
394
+ pc_alpha value.
395
+
396
+ Parameters
397
+ ----------
398
+ j : int
399
+ Index of target variable in data array.
400
+
401
+ parents : list
402
+ List of form [(0, -1), (3, -2), ...] containing parents.
403
+
404
+ tau_max : int, optional (default: 0)
405
+ Maximum time lag. This may be used to make sure that estimates for
406
+ different lags in X, Z, all have the same sample size.
407
+
408
+ Returns:
409
+ score : float
410
+ Model score.
411
+ """
412
+
413
+ Y = [(j, 0)]
414
+ X = [(j, 0)] # dummy variable here
415
+ Z = parents
416
+ array, xyz, _ = self.dataframe.construct_array(X=X, Y=Y, Z=Z,
417
+ tau_max=tau_max,
418
+ mask_type=self.mask_type,
419
+ return_cleaned_xyz=False,
420
+ do_checks=True,
421
+ verbosity=self.verbosity)
422
+
423
+ dim, T = array.shape
424
+
425
+ y = self._get_single_residuals(array, xyz, target_var=0)
426
+
427
+ n_comps = y.shape[0]
428
+ score = 0.
429
+ for y_component in y:
430
+ # Get RSS
431
+ rss = (y_component**2).sum()
432
+ # Number of parameters
433
+ p = dim - 1
434
+ # Get AIC
435
+ if corrected_aic:
436
+ comp_score = T * np.log(rss) + 2. * p + (2.*p**2 + 2.*p)/(T - p - 1)
437
+ else:
438
+ comp_score = T * np.log(rss) + 2. * p
439
+ score += comp_score
440
+
441
+ score /= float(n_comps)
442
+ return score
443
+
444
+
445
+ if __name__ == '__main__':
446
+
447
+ import tigramite
448
+ from tigramite.data_processing import DataFrame
449
+ # import numpy as np
450
+ import timeit
451
+
452
+ seed=None
453
+ random_state = np.random.default_rng(seed=seed)
454
+ cmi = ParCorrMult(
455
+ correlation_type='PCCA_WilksLambda',
456
+ # significance = 'shuffle_test',
457
+ # sig_samples=1000,
458
+ )
459
+
460
+ samples=5000
461
+ rate = np.zeros(samples)
462
+ for i in range(samples):
463
+ print(i)
464
+ data = random_state.standard_normal((100, 6))
465
+ # data[:,2] += -0.3*data[:,0]
466
+ # data[:,1] += data[:,2]
467
+ dataframe = DataFrame(data,
468
+ # vector_vars={0:[(0,0), (1,0)], 1:[(2,0),(3,0)], 2:[(4,0),(5,0)]}
469
+ )
470
+
471
+ cmi.set_dataframe(dataframe)
472
+
473
+ pval = cmi.run_test(
474
+ X=[(0,0), (1,0), ],
475
+ Y=[(2,0),(3, 0)],
476
+ # Z=[(5,0)]
477
+ Z = [(4, 0), (5, 0)]
478
+ )[1]
479
+
480
+ rate[i] = pval <= 0.05
481
+
482
+ # cmi.get_model_selection_criterion(j=0, parents=[(1, 0), (2, 0)], tau_max=0, corrected_aic=False)
483
+
484
+ # print(cmi.run_test(X=[(0,0),(1,0)], Y=[(2,0), (3, 0)], Z=[(5,0)]))
485
+ print(rate.mean())