tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,451 @@
1
+ from __future__ import print_function
2
+ import numpy as np
3
+ import warnings
4
+
5
+ from .parcorr import ParCorr
6
+ from .robust_parcorr import RobustParCorr
7
+ # from tigramite.independence_tests.parcorr import ParCorr
8
+ # from tigramite.independence_tests.robust_parcorr import RobustParCorr
9
+ from tigramite import data_processing as pp
10
+
11
+
12
+ class ParCorrWLS(ParCorr):
13
+ r"""Weighted partial correlation test.
14
+
15
+ Partial correlation is estimated through linear weighted least squares (WLS)
16
+ regression and a test for non-zero linear Pearson correlation on the
17
+ residuals.
18
+ Either the variances, i.e. weights, are known, or they can be estimated using non-parametric regression
19
+ (using k nearest neighbour).
20
+
21
+ Notes
22
+ -----
23
+ To test :math:`X \perp Y | Z`, first :math:`Z` is regressed out from
24
+ :math:`X` and :math:`Y` assuming the model
25
+
26
+ .. math:: X & = Z \beta_X + \epsilon_{X} \\
27
+ Y & = Z \beta_Y + \epsilon_{Y}
28
+
29
+ using WLS regression. Here, we do not assume homoskedasticity of the error terms.
30
+ Then the dependency of the residuals is tested with
31
+ the Pearson correlation test.
32
+
33
+ .. math:: \rho\left(r_X, r_Y\right)
34
+
35
+ For the ``significance='analytic'`` Student's-*t* distribution with
36
+ :math:`T-D_Z-2` degrees of freedom is implemented.
37
+
38
+ Parameters
39
+ ----------
40
+ gt_std_matrix: array-like, optional (default: None)
41
+ Standard deviations of the noise of shape (T, nb_nodes)
42
+ expert_knowledge: string or dict (default: time-dependent heteroskedasticity)
43
+ Either string "time-dependent heteroskedasticity" meaning that every variable only has time-dependent
44
+ heteroskedasticity, or string "homoskedasticity" where we assume homoskedasticity for all variables, or
45
+ dictionary containing expert knowledge about heteroskedastic relationships as list of tuples or strings.
46
+ window_size: int (default: 10)
47
+ Number of nearest neighbours that we are using for estimating the variance function.
48
+ robustify: bool (default: False)
49
+ Indicates whether the robust partial correlation test should be used, i.e. whether the data should be
50
+ transformed to normal marginals before testing
51
+ **kwargs :
52
+ Arguments passed on to Parent class ParCorr.
53
+ """
54
+
55
+ # documentation
56
+
57
+ def __init__(self, gt_std_matrix=None,
58
+ expert_knowledge="time-dependent heteroskedasticity",
59
+ window_size=10, robustify=False, **kwargs):
60
+
61
+ self.gt_std_matrix = gt_std_matrix
62
+ self.expert_knowledge = expert_knowledge
63
+ self.window_size = window_size
64
+ self.robustify = robustify
65
+
66
+ self.stds = None
67
+
68
+ ParCorr.__init__(self,
69
+ recycle_residuals=False, # Doesn't work with ParCorrWLS
70
+ **kwargs)
71
+ self._measure = 'par_corr_wls'
72
+
73
+ def _stds_preparation(self, X, Y, Z, tau_max=0, cut_off='2xtau_max', verbosity=0):
74
+ """Helper function to bring expert_knowledge into standard form."""
75
+
76
+ if self.expert_knowledge == "time-dependent heteroskedasticity":
77
+ self.expert_knowledge = {variable: ["time-dependent heteroskedasticity"]
78
+ for variable in range(self.dataframe.N)}
79
+ elif self.expert_knowledge == "homoskedasticity":
80
+ self.expert_knowledge = {}
81
+
82
+ def _get_array(self, X, Y, Z, tau_max=0, cut_off='2xtau_max', verbosity=0, return_cleaned_xyz=True,
83
+ remove_constant_data=False):
84
+ """Convenience wrapper around construct_array. Simultaneously, construct self.stds which needs to correspond
85
+ to the variables in the array."""
86
+
87
+ if self.measure in ['par_corr_wls']:
88
+ if len(X) > 1 or len(Y) > 1:
89
+ raise ValueError("X and Y for %s must be univariate." % self.measure)
90
+
91
+ Z_orig = Z.copy()
92
+ expert_knowledge_XY = []
93
+ for var in [X[0][0], Y[0][0]]:
94
+ if type(self.expert_knowledge) != str and var in self.expert_knowledge:
95
+ expert_knowledge_XY += self.expert_knowledge[var]
96
+
97
+ # add heteroskedasticity-inducing parents to Z (later these are removed again)
98
+ # to obtain data cleaned the same as X and Y for weight estimation
99
+ for item in expert_knowledge_XY:
100
+ if type(item) == tuple:
101
+ Z += [item]
102
+
103
+ # Call the _get_array function of the parent class
104
+ if remove_constant_data:
105
+ array, xyz, XYZ, data_type, nonzero_array, nonzero_xyz, nonzero_XYZ, nonzero_data_type = super()._get_array(
106
+ X=X, Y=Y, Z=Z,
107
+ tau_max=tau_max,
108
+ cut_off=cut_off,
109
+ verbosity=verbosity,
110
+ remove_constant_data=remove_constant_data)
111
+
112
+ X, Y, Z = XYZ
113
+ flat_XYZ = X + Y + Z
114
+ counter = None if (len(Z) - len(Z_orig)) <= 0 else -1 * (len(Z) - len(Z_orig))
115
+ data_hs_parent = {}
116
+ for i, item in enumerate(expert_knowledge_XY):
117
+ if type(item) == tuple:
118
+ data_hs_parent[item] = array[flat_XYZ.index(item), :]
119
+
120
+ # stds have to correspond to array without the zero-rows
121
+ nonzero_array_copy = nonzero_array.copy()
122
+ nonzero_X, nonzero_Y, nonzero_Z = nonzero_XYZ
123
+ self._get_std_estimation(nonzero_array_copy, nonzero_X, nonzero_Y, nonzero_Z, tau_max,
124
+ cut_off, verbosity, data_hs_parent)
125
+
126
+ if data_type:
127
+ data_type = data_type[:counter]
128
+ nonzero_data_type = nonzero_data_type[:counter]
129
+
130
+ return array[:counter], xyz[:counter], (X, Y, Z[:counter]), data_type, \
131
+ nonzero_array[:counter], nonzero_xyz[:counter], (nonzero_X, nonzero_Y, nonzero_Z[:counter]), \
132
+ nonzero_data_type
133
+
134
+ else:
135
+ array, xyz, XYZ, data_type = super()._get_array(
136
+ X=X, Y=Y, Z=Z,
137
+ tau_max=tau_max,
138
+ cut_off=cut_off,
139
+ verbosity=verbosity,
140
+ remove_constant_data=remove_constant_data)
141
+
142
+ X, Y, Z = XYZ
143
+ flat_XYZ = X + Y + Z
144
+ counter = None if (len(Z) - len(Z_orig)) <= 0 else -1 * (len(Z) - len(Z_orig))
145
+
146
+ dim, T = array.shape
147
+ # save the data of the heteroskedasticity inducing parents to use for weight estimation
148
+ data_hs_parent = np.zeros((len(expert_knowledge_XY), T))
149
+ for i, item in enumerate(expert_knowledge_XY):
150
+ if type(item) == tuple:
151
+ data_hs_parent[i, :] = array[flat_XYZ.index(item), :]
152
+
153
+ array_copy = array.copy()
154
+ self._get_std_estimation(array_copy, X, Y, Z, tau_max, cut_off, verbosity, data_hs_parent)
155
+ if data_type:
156
+ data_type = data_type[:counter]
157
+
158
+ return array[:counter], xyz[:counter], (X, Y, Z[:counter]), data_type
159
+
160
+ def _estimate_std_time(self, arr, target_var):
161
+ """
162
+ Estimate the standard deviations of the error terms using the squared-residuals approach. First calculate
163
+ the absolute value of the residuals using OLS, then smooth them using a sliding window while keeping the time
164
+ order of the residuals.
165
+ In this way we can approximate variances that are time-dependent.
166
+
167
+ Parameters
168
+ ----------
169
+ arr: array
170
+ Data array of shape (dim, T)
171
+ target_var: {0, 1}
172
+ Variable to regress out conditions from.
173
+
174
+ Returns
175
+ -------
176
+ std_est: array
177
+ Standard deviation array of shape (T,)
178
+
179
+ """
180
+ dim, T = arr.shape
181
+ dim_z = dim - 2
182
+ # Standardization not necessary for variance estimation
183
+ y = np.copy(arr[target_var, :])
184
+
185
+ if dim_z > 0:
186
+ z = arr[2:, :].T.copy()
187
+ beta_hat = np.linalg.lstsq(z, y, rcond=None)[0]
188
+ mean = np.dot(z, beta_hat)
189
+ resid = abs(y - mean)
190
+ else:
191
+ resid = abs(y)
192
+
193
+ # average variance within window
194
+ std_est = np.concatenate(
195
+ (np.ones(self.window_size - 1), np.convolve(resid, np.ones(self.window_size), 'valid') / self.window_size))
196
+ return std_est
197
+
198
+ def _estimate_std_parent(self, arr, target_var, target_lag, H, data_hs_parent):
199
+ """
200
+ Estimate the standard deviations of the error terms using a residual-based approach.
201
+ First calculate the absolute value of the residuals using OLS, then smooth them by averaging over the k ones
202
+ that are closest in H-value. In this way we are able to deal with parent-dependent heteroskedasticity.
203
+
204
+ Parameters
205
+ ----------
206
+ arr: array
207
+ Data array of shape (dim, T)
208
+ target_var: {0, 1}
209
+ Variable to obtain noise variance approximation for.
210
+ target_lag: -int
211
+ Lag of the variable to obtain noise variance approximation for.
212
+ H: of the form [(var, -tau)], where var specifies the variable index and tau the time lag
213
+ Variable to use for the sorting of the residuals, i.e. variable that the heteroskedasticity depends on.
214
+
215
+ Returns
216
+ -------
217
+ std_est: array
218
+ Standard deviation array of shape (T,)
219
+
220
+ """
221
+ dim, T = arr.shape
222
+ dim_z = dim - 2
223
+ y = np.copy(arr[target_var, :])
224
+
225
+ if dim_z > 0:
226
+ z = arr[2:, :].T.copy()
227
+ beta_hat = np.linalg.lstsq(z, y, rcond=None)[0]
228
+ mean = np.dot(z, beta_hat)
229
+ resid = abs(y - mean)
230
+ lag = H[1] + target_lag
231
+
232
+ # order the residuals w.r.t. the heteroskedasticity-inducing parent corresponding to sample h
233
+ h = data_hs_parent[-1 * lag:]
234
+
235
+ ordered_z_ind = np.argsort(h)
236
+ ordered_z_ind = ordered_z_ind * (ordered_z_ind > 0)
237
+ revert_argsort = np.argsort(ordered_z_ind)
238
+
239
+ truncate_resid = resid[np.abs(lag):]
240
+ sorted_resid = truncate_resid[ordered_z_ind]
241
+
242
+ # smooth the nearest neighbour residuals
243
+ variance_est_sorted = np.concatenate(
244
+ (np.ones(self.window_size - 1),
245
+ np.convolve(sorted_resid, np.ones(self.window_size), 'valid') / self.window_size,))
246
+ std_est = variance_est_sorted[revert_argsort]
247
+ std_est = np.concatenate((std_est, np.ones(np.abs(lag))))
248
+ std_est = np.roll(std_est, np.abs(lag))
249
+ else:
250
+ resid = abs(y)
251
+ std_est = np.concatenate(
252
+ (np.ones(self.window_size - 1),
253
+ np.convolve(resid, np.ones(self.window_size), 'valid') / self.window_size))
254
+
255
+ return std_est
256
+
257
+ def _get_std_estimation(self, array, X, Y, Z=[], tau_max=0, cut_off='2xtau_max', verbosity=0, data_hs_parent=None):
258
+ """Use expert knowledge on the heteroskedastic relationships contained in self.expert_knowledge to estimate the
259
+ standard deviations of the error terms.
260
+ The expert knowledge can specify whether there is sampling index / time dependent heteroskedasticity,
261
+ heteroskedasticity with respect to a specified parent, or homoskedasticity.
262
+
263
+ Parameters
264
+ ----------
265
+ array : array
266
+ Data array of shape (dim, T)
267
+
268
+ X, Y : list of tuples
269
+ X,Y are of the form [(var, -tau)], where var specifies the
270
+ variable index and tau the time lag.
271
+
272
+ Return
273
+ ------
274
+ stds: array-like
275
+ Array of standard deviations of error terms for X and Y of shape (2, T).
276
+ """
277
+ self._stds_preparation(X, Y, Z, tau_max, cut_off, verbosity)
278
+
279
+ dim, T = array.shape
280
+ if self.gt_std_matrix is not None:
281
+ stds_dataframe = pp.DataFrame(self.gt_std_matrix,
282
+ mask=self.dataframe.mask,
283
+ missing_flag=self.dataframe.missing_flag,
284
+ datatime={0: np.arange(len(self.gt_std_matrix[:, 0]))})
285
+ stds, _, _, _ = stds_dataframe.construct_array(X=X, Y=Y, Z=Z,
286
+ tau_max=tau_max,
287
+ mask_type=self.mask_type,
288
+ return_cleaned_xyz=True,
289
+ do_checks=True,
290
+ remove_overlaps=True,
291
+ cut_off=cut_off,
292
+ verbosity=verbosity)
293
+ else:
294
+ stds = np.ones((2, T))
295
+ for count, variable in enumerate([X[0], Y[0]]):
296
+ # Here we assume that it is known what the heteroskedasticity function depends on for every variable
297
+ if variable[0] in self.expert_knowledge:
298
+ hs_source = self.expert_knowledge[variable[0]][0]
299
+ if hs_source == "time-dependent heteroskedasticity":
300
+ stds[count] = self._estimate_std_time(array, count)
301
+ elif type(hs_source) is tuple:
302
+ stds[count] = self._estimate_std_parent(array, count, variable[1],
303
+ hs_source, data_hs_parent[hs_source])
304
+
305
+ self.stds = stds
306
+ return stds
307
+
308
+ def _get_single_residuals(self, array, target_var,
309
+ standardize=False,
310
+ return_means=False):
311
+ """Returns residuals of weighted linear multiple regression.
312
+
313
+ Performs a WLS regression of the variable indexed by target_var on the
314
+ conditions Z. Here array is assumed to contain X and Y as the first two
315
+ rows with the remaining rows (if present) containing the conditions Z.
316
+ Optionally returns the estimated regression line.
317
+
318
+ Parameters
319
+ ----------
320
+ array : array-like
321
+ data array with X, Y, Z in rows and observations in columns.
322
+
323
+ target_var : {0, 1}
324
+ Variable to regress out conditions from.
325
+
326
+ standardize : bool, optional (default: True)
327
+ Whether to standardize the array beforehand. Must be used for
328
+ partial correlation.
329
+
330
+ return_means : bool, optional (default: False)
331
+ Whether to return the estimated regression line.
332
+
333
+ Returns
334
+ -------
335
+ resid [, mean] : array-like
336
+ The residual of the regression and optionally the estimated line.
337
+ """
338
+ dim, T = array.shape
339
+ dim_z = dim - 2
340
+
341
+ x_vals_sum = np.sum(array)
342
+ x_vals_has_nan = np.isnan(x_vals_sum)
343
+ if x_vals_has_nan:
344
+ raise ValueError("array has nans")
345
+
346
+ try:
347
+ stds = self.stds[target_var]
348
+
349
+ except TypeError:
350
+ warnings.warn("No estimated or ground truth standard deviations supplied for weights. "
351
+ "Assume homoskedasticity, i.e. all weights are 1.")
352
+ stds = np.ones(T)
353
+
354
+ # Standardize
355
+ if standardize:
356
+ array -= array.mean(axis=1).reshape(dim, 1)
357
+ std = array.std(axis=1)
358
+ for i in range(dim):
359
+ if std[i] != 0.:
360
+ array[i] /= std[i]
361
+ if np.any(std == 0.) and self.verbosity > 0:
362
+ warnings.warn("Possibly constant array!")
363
+ x_vals_sum = np.sum(array)
364
+ x_vals_has_nan = np.isnan(x_vals_sum)
365
+ if x_vals_has_nan:
366
+ raise ValueError("array has nans")
367
+ y = np.copy(array[target_var, :])
368
+ weights = np.diag(np.reciprocal(stds))
369
+
370
+ if dim_z > 0:
371
+ z = array[2:, :].T.copy()
372
+ # include weights in z and y
373
+ zw = np.dot(weights, z)
374
+ yw = np.dot(y, weights)
375
+ beta_hat = np.linalg.lstsq(zw, yw, rcond=None)[0]
376
+ mean = np.dot(z, beta_hat)
377
+ resid = np.dot(y - mean, weights)
378
+ resid_vals_sum = np.sum(resid)
379
+ resid_vals_has_nan = np.isnan(resid_vals_sum)
380
+ if resid_vals_has_nan:
381
+ raise ValueError("resid has nans")
382
+ else:
383
+ # resid = y
384
+ resid = np.dot(y, weights)
385
+ mean = None
386
+
387
+ if return_means:
388
+ return resid, mean
389
+ return resid
390
+
391
+ def get_dependence_measure(self, array, xyz, data_type=None):
392
+ if self.robustify:
393
+ array = RobustParCorr.trafo2normal(self, array)
394
+ return ParCorr.get_dependence_measure(self, array, xyz)
395
+
396
+ def get_shuffle_significance(self, array, xyz, value,
397
+ return_null_dist=False,
398
+ data_type=None):
399
+ if self.robustify:
400
+ array = RobustParCorr.trafo2normal(self, array)
401
+ return ParCorr.get_shuffle_significance(self, array, xyz, value,
402
+ return_null_dist=False)
403
+
404
+ def get_model_selection_criterion(self, j, parents, tau_max=0, corrected_aic=False):
405
+ """Returns Akaike's Information criterion modulo constants.
406
+
407
+ Fits a linear model of the parents to variable j and returns the
408
+ score. Leave-one-out cross-validation is asymptotically equivalent to
409
+ AIC for ordinary linear regression models. Here used to determine
410
+ optimal hyperparameters in PCMCI, in particular the pc_alpha value.
411
+
412
+ Parameters
413
+ ----------
414
+ j : int
415
+ Index of target variable in data array.
416
+
417
+ parents : list
418
+ List of form [(0, -1), (3, -2), ...] containing parents.
419
+
420
+ tau_max : int, optional (default: 0)
421
+ Maximum time lag. This may be used to make sure that estimates for
422
+ different lags in X, Z, all have the same sample size.
423
+
424
+ Returns:
425
+ score : float
426
+ Model score.
427
+ """
428
+
429
+ Y = [(j, 0)]
430
+ X = [(j, 0)] # dummy variable here
431
+ Z = parents
432
+ array, xyz, _, _ = self._get_array(X, Y, Z, tau_max=tau_max, verbosity=self.verbosity,
433
+ return_cleaned_xyz=False)
434
+ dim, T = array.shape
435
+
436
+ # Transform to normal marginals
437
+ if self.robustify:
438
+ array = RobustParCorr.trafo2normal(self, array)
439
+
440
+ y = self._get_single_residuals(array, target_var=1, return_means=False)
441
+ # Get RSS
442
+ rss = (y ** 2).sum()
443
+ # Number of parameters
444
+ p = dim - 1
445
+ # Get AIC
446
+ if corrected_aic:
447
+ score = T * np.log(rss) + 2. * p + (2. * p ** 2 + 2. * p) / (T - p - 1)
448
+ else:
449
+ score = T * np.log(rss) + 2. * p
450
+ return score
451
+