tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
File without changes
@@ -0,0 +1,661 @@
1
+ """Tigramite causal discovery for time series."""
2
+
3
+ # Author: Jakob Runge <jakob@jakob-runge.com>
4
+ #
5
+ # License: GNU General Public License v3.0
6
+
7
+ from __future__ import print_function
8
+ from scipy import special, spatial, stats
9
+ import numpy as np
10
+ from numba import jit
11
+ import warnings
12
+
13
+ from tigramite.independence_tests.independence_tests_base import CondIndTest
14
+
15
+ class CMIknn(CondIndTest):
16
+ r"""Conditional mutual information test based on nearest-neighbor estimator.
17
+
18
+ Conditional mutual information is the most general dependency measure coming
19
+ from an information-theoretic framework. It makes no assumptions about the
20
+ parametric form of the dependencies by directly estimating the underlying
21
+ joint density. The test here is based on the estimator in S. Frenzel and B.
22
+ Pompe, Phys. Rev. Lett. 99, 204101 (2007), combined with a shuffle test to
23
+ generate the distribution under the null hypothesis of independence first
24
+ used in [3]_. The knn-estimator is suitable only for variables taking a
25
+ continuous range of values. For discrete variables use the CMIsymb class.
26
+
27
+ Notes
28
+ -----
29
+ CMI is given by
30
+
31
+ .. math:: I(X;Y|Z) &= \int p(z) \iint p(x,y|z) \log
32
+ \frac{ p(x,y |z)}{p(x|z)\cdot p(y |z)} \,dx dy dz
33
+
34
+ Its knn-estimator is given by
35
+
36
+ .. math:: \widehat{I}(X;Y|Z) &= \psi (k) + \frac{1}{T} \sum_{t=1}^T
37
+ \left[ \psi(k_{Z,t}) - \psi(k_{XZ,t}) - \psi(k_{YZ,t}) \right]
38
+
39
+ where :math:`\psi` is the Digamma function. This estimator has as a
40
+ parameter the number of nearest-neighbors :math:`k` which determines the
41
+ size of hyper-cubes around each (high-dimensional) sample point. Then
42
+ :math:`k_{Z,},k_{XZ},k_{YZ}` are the numbers of neighbors in the respective
43
+ subspaces.
44
+
45
+ :math:`k` can be viewed as a density smoothing parameter (although it is
46
+ data-adaptive unlike fixed-bandwidth estimators). For large :math:`k`, the
47
+ underlying dependencies are more smoothed and CMI has a larger bias,
48
+ but lower variance, which is more important for significance testing. Note
49
+ that the estimated CMI values can be slightly negative while CMI is a non-
50
+ negative quantity.
51
+
52
+ This method requires the scipy.spatial.cKDTree package.
53
+
54
+ References
55
+ ----------
56
+
57
+ .. [3] J. Runge (2018): Conditional Independence Testing Based on a
58
+ Nearest-Neighbor Estimator of Conditional Mutual Information.
59
+ In Proceedings of the 21st International Conference on Artificial
60
+ Intelligence and Statistics.
61
+ http://proceedings.mlr.press/v84/runge18a.html
62
+
63
+ Parameters
64
+ ----------
65
+ knn : int or float, optional (default: 0.2)
66
+ Number of nearest-neighbors which determines the size of hyper-cubes
67
+ around each (high-dimensional) sample point. If smaller than 1, this is
68
+ computed as a fraction of T, hence knn=knn*T. For knn larger or equal to
69
+ 1, this is the absolute number.
70
+
71
+ shuffle_neighbors : int, optional (default: 5)
72
+ Number of nearest-neighbors within Z for the shuffle surrogates which
73
+ determines the size of hyper-cubes around each (high-dimensional) sample
74
+ point.
75
+
76
+ transform : {'ranks', 'standardize', 'uniform', False}, optional
77
+ (default: 'ranks')
78
+ Whether to transform the array beforehand by standardizing
79
+ or transforming to uniform marginals.
80
+
81
+ workers : int (optional, default = -1)
82
+ Number of workers to use for parallel processing. If -1 is given
83
+ all processors are used. Default: -1.
84
+
85
+ null_fit : {None, 'normal', 'gamma'}, optional (default: None)
86
+ If None, the empirical surrogate distribution is used to compute
87
+ the p-value (default behavior). If 'normal' or 'gamma', a parametric
88
+ distribution is fit to the null samples:
89
+
90
+ * 'normal': Fit a Gaussian N(μ, σ²) to the null distribution.
91
+ * 'gamma' : Fit a three-parameter Gamma(a, loc, scale).
92
+
93
+ This can reduce the number of required surrogate samples for
94
+ significance testing, but may lead to miscalibrated p-values if
95
+ the parametric family is a poor fit.
96
+
97
+ permute : {'Y', 'X'}, optional (default: 'Y')
98
+ Which variable to permute in the restricted shuffle test.
99
+ - 'Y': shuffle Y within Z-neighborhoods (default). This is often
100
+ preferable when Z is chosen as (approximate) parents of Y.
101
+ - 'X': shuffle X within Z-neighborhoods. (This is the version
102
+ mentioned in the AISTATS paper J. Runge (2018).)
103
+
104
+ model_selection_folds : int (optional, default = 3)
105
+ Number of folds in cross-validation used in model selection.
106
+
107
+ significance : str, optional (default: 'shuffle_test')
108
+ Type of significance test to use. For CMIknn only 'fixed_thres' and
109
+ 'shuffle_test' are available.
110
+
111
+ **kwargs :
112
+ Arguments passed on to parent class CondIndTest.
113
+ """
114
+ @property
115
+ def measure(self):
116
+ """
117
+ Concrete property to return the measure of the independence test
118
+ """
119
+ return self._measure
120
+
121
+ def __init__(self,
122
+ knn=0.2,
123
+ shuffle_neighbors=5,
124
+ significance='shuffle_test',
125
+ transform='ranks',
126
+ workers=-1,
127
+ model_selection_folds=3,
128
+ null_fit=None,
129
+ permute='Y',
130
+ **kwargs):
131
+ # Set the member variables
132
+ self.knn = knn
133
+ self.shuffle_neighbors = shuffle_neighbors
134
+ self.transform = transform
135
+ self._measure = 'cmi_knn'
136
+ self.two_sided = False
137
+ self.residual_based = False
138
+ self.recycle_residuals = False
139
+ self.workers = workers
140
+ self.null_fit = null_fit
141
+ self.permute = permute
142
+ self.model_selection_folds = model_selection_folds
143
+ # Call the parent constructor
144
+ CondIndTest.__init__(self, significance=significance, **kwargs)
145
+ # Print some information about construction
146
+ if self.verbosity > 0:
147
+ if self.knn < 1:
148
+ print("knn/T = %s" % self.knn)
149
+ else:
150
+ print("knn = %s" % self.knn)
151
+ print("shuffle_neighbors = %d\n" % self.shuffle_neighbors)
152
+ print(f"Restricted shuffle permutes: {self.permute}")
153
+ if self.null_fit is not None:
154
+ print("Using parametric null fit:", self.null_fit)
155
+
156
+ @jit(forceobj=True)
157
+ def _get_nearest_neighbors(self, array, xyz, knn):
158
+ """Returns nearest neighbors according to Frenzel and Pompe (2007).
159
+
160
+ Retrieves the distances eps to the k-th nearest neighbors for every
161
+ sample in joint space XYZ and returns the numbers of nearest neighbors
162
+ within eps in subspaces Z, XZ, YZ.
163
+
164
+ Parameters
165
+ ----------
166
+ array : array-like
167
+ data array with X, Y, Z in rows and observations in columns
168
+
169
+ xyz : array of ints
170
+ XYZ identifier array of shape (dim,).
171
+
172
+ knn : int or float
173
+ Number of nearest-neighbors which determines the size of hyper-cubes
174
+ around each (high-dimensional) sample point. If smaller than 1, this
175
+ is computed as a fraction of T, hence knn=knn*T. For knn larger or
176
+ equal to 1, this is the absolute number.
177
+
178
+ Returns
179
+ -------
180
+ k_xz, k_yz, k_z : tuple of arrays of shape (T,)
181
+ Nearest neighbors in subspaces.
182
+ """
183
+
184
+ array = array.astype(np.float64)
185
+ xyz = xyz.astype(np.int32)
186
+
187
+ dim, T = array.shape
188
+
189
+ # Add noise to destroy ties...
190
+ array += (1E-6 * array.std(axis=1).reshape(dim, 1)
191
+ * self.random_state.random((array.shape[0], array.shape[1])))
192
+
193
+ if self.transform == 'standardize':
194
+ # Standardize
195
+ array = array.astype(np.float64)
196
+ array -= array.mean(axis=1).reshape(dim, 1)
197
+ std = array.std(axis=1)
198
+ nonzero = std != 0.
199
+ if np.any(nonzero):
200
+ array[nonzero] /= std[nonzero, np.newaxis]
201
+ # FIXME: If the time series is constant, return nan rather than
202
+ # raising Exception
203
+ if np.any(std == 0.) and self.verbosity > 0:
204
+ warnings.warn("Possibly constant array!")
205
+ # raise ValueError("nans after standardizing, "
206
+ # "possibly constant array!")
207
+ elif self.transform == 'uniform':
208
+ array = self._trafo2uniform(array)
209
+ elif self.transform == 'ranks':
210
+ array = array.argsort(axis=1).argsort(axis=1).astype(np.float64)
211
+
212
+ array = array.T
213
+
214
+ # Compute distance to k-th nearest neighbor, excluding points itself, hence k=[knn+1]
215
+ tree_xyz = spatial.cKDTree(array)
216
+ epsarray = tree_xyz.query(array, k=[knn+1], p=np.inf,
217
+ workers=self.workers)[0][:, 0].astype(np.float64)
218
+ # print("epsarray", epsarray)
219
+
220
+ # To search neighbors < eps instead of <= eps, we need to reduce eps by a bit
221
+ epsarray = np.multiply(epsarray, 0.999999999)
222
+ # print("epsarray", epsarray)
223
+
224
+ # Subsample indices
225
+ x_indices = np.where(xyz == 0)[0]
226
+ y_indices = np.where(xyz == 1)[0]
227
+ z_indices = np.where(xyz == 2)[0]
228
+
229
+ # Find nearest neighbors in subspaces
230
+ xz = array[:, np.concatenate((x_indices, z_indices))]
231
+ tree_xz = spatial.cKDTree(xz)
232
+ k_xz = tree_xz.query_ball_point(xz, r=epsarray, p=np.inf, workers=self.workers, return_length=True)
233
+
234
+ yz = array[:, np.concatenate((y_indices, z_indices))]
235
+ tree_yz = spatial.cKDTree(yz)
236
+ k_yz = tree_yz.query_ball_point(yz, r=epsarray, p=np.inf, workers=self.workers, return_length=True)
237
+
238
+ if len(z_indices) > 0:
239
+ z = array[:, z_indices]
240
+ tree_z = spatial.cKDTree(z)
241
+ k_z = tree_z.query_ball_point(z, r=epsarray, p=np.inf, workers=self.workers, return_length=True)
242
+ else:
243
+ # Number of neighbors is T when z is empty.
244
+ k_z = np.full(T, T, dtype=np.float64)
245
+
246
+
247
+ # print("knn", knn)
248
+ # print("k_xz", k_xz)
249
+ # print("k_yz", k_yz)
250
+ # print("k_z", k_z)
251
+ return k_xz, k_yz, k_z
252
+
253
+ def get_dependence_measure(self, array, xyz, data_type=None):
254
+ """Returns CMI estimate as described in Frenzel and Pompe PRL (2007).
255
+
256
+ Parameters
257
+ ----------
258
+ array : array-like
259
+ data array with X, Y, Z in rows and observations in columns
260
+
261
+ xyz : array of ints
262
+ XYZ identifier array of shape (dim,).
263
+
264
+ Returns
265
+ -------
266
+ val : float
267
+ Conditional mutual information estimate.
268
+ """
269
+
270
+ dim, T = array.shape
271
+
272
+ if self.knn < 1:
273
+ knn_here = max(1, int(self.knn*T))
274
+ else:
275
+ knn_here = max(1, int(self.knn))
276
+
277
+
278
+ k_xz, k_yz, k_z = self._get_nearest_neighbors(array=array,
279
+ xyz=xyz,
280
+ knn=knn_here)
281
+
282
+ val = special.digamma(knn_here) - (special.digamma(k_xz) +
283
+ special.digamma(k_yz) -
284
+ special.digamma(k_z)).mean()
285
+
286
+ return val
287
+
288
+
289
+ def get_shuffle_significance(self, array, xyz, value,
290
+ return_null_dist=False,
291
+ data_type=None):
292
+ """Returns p-value for nearest-neighbor shuffle significance test.
293
+
294
+ For non-empty Z, overwrites get_shuffle_significance from the parent
295
+ class which is a block shuffle test, which does not preserve
296
+ dependencies of X and Y with Z. Here the parameter shuffle_neighbors is
297
+ used to permute only those values :math:`y_i` and :math:`y_j` for which
298
+ :math:`z_j` is among the nearest neighbors of :math:`z_i`. If Z is
299
+ empty, the block-shuffle test is used. In the paper X is permuted, but
300
+ permuting Y is preferable when Z is chosen as (approximate) parents of Y.
301
+
302
+ Parameters
303
+ ----------
304
+ array : array-like
305
+ data array with X, Y, Z in rows and observations in columns
306
+
307
+ xyz : array of ints
308
+ XYZ identifier array of shape (dim,).
309
+
310
+ value : number
311
+ Value of test statistic for unshuffled estimate.
312
+
313
+ Returns
314
+ -------
315
+ pval : float
316
+ p-value
317
+ """
318
+ dim, T = array.shape
319
+
320
+ # Skip shuffle test if value is above threshold
321
+ # if value > self.minimum threshold:
322
+ # if return_null_dist:
323
+ # return 0., None
324
+ # else:
325
+ # return 0.
326
+
327
+ # max_neighbors = max(1, int(max_neighbor_ratio*T))
328
+ x_indices = np.where(xyz == 0)[0]
329
+ y_indices = np.where(xyz == 1)[0]
330
+ z_indices = np.where(xyz == 2)[0]
331
+
332
+ if len(z_indices) > 0 and self.shuffle_neighbors < T:
333
+ if self.verbosity > 2:
334
+ print(" nearest-neighbor shuffle significance "
335
+ "test with n = %d and %d surrogates" % (
336
+ self.shuffle_neighbors, self.sig_samples))
337
+
338
+ # Get nearest neighbors around each sample point in Z
339
+ z_array = array[z_indices, :].copy()
340
+ dim_z = len(z_indices)
341
+
342
+ if self.transform == 'standardize':
343
+ # Standardize
344
+ z_array = z_array.astype(np.float64)
345
+ z_array -= z_array.mean(axis=1).reshape(dim_z, 1)
346
+ std = z_array.std(axis=1)
347
+ nonzero = std != 0.
348
+ if np.any(nonzero):
349
+ z_array[nonzero] /= std[nonzero, np.newaxis]
350
+ # array /= array.std(axis=1).reshape(dim, 1)
351
+ # FIXME: If the time series is constant, return nan rather than
352
+ # raising Exception
353
+ if np.any(std == 0.) and self.verbosity > 0:
354
+ warnings.warn("Possibly constant array!")
355
+ # raise ValueError("nans after standardizing, "
356
+ # "possibly constant array!")
357
+ elif self.transform == 'uniform':
358
+ z_array = self._trafo2uniform(z_array)
359
+ elif self.transform == 'ranks':
360
+ z_array = z_array.argsort(axis=1).argsort(axis=1).astype(np.float64)
361
+
362
+ z_array = z_array.T
363
+
364
+ tree_xyz = spatial.cKDTree(z_array)
365
+ neighbors = tree_xyz.query(z_array,
366
+ k=self.shuffle_neighbors,
367
+ p=np.inf,
368
+ eps=0.)[1].astype(np.int32)
369
+
370
+ null_dist = np.zeros(self.sig_samples)
371
+ for sam in range(self.sig_samples):
372
+
373
+ # Generate random order in which to go through indices loop in
374
+ # next step
375
+ order = self.random_state.permutation(T).astype(np.int32)
376
+
377
+ # Shuffle neighbor indices for each sample index
378
+ for i in range(len(neighbors)):
379
+ self.random_state.shuffle(neighbors[i])
380
+ # neighbors = self.random_state.permuted(neighbors, axis=1)
381
+
382
+ # Select a series of neighbor indices that contains as few as
383
+ # possible duplicates
384
+ restricted_permutation = self.get_restricted_permutation(
385
+ T=T,
386
+ shuffle_neighbors=self.shuffle_neighbors,
387
+ neighbors=neighbors,
388
+ order=order)
389
+
390
+ array_shuffled = np.copy(array)
391
+ if self.permute == 'X':
392
+ array_shuffled[x_indices] = array[np.ix_(x_indices, restricted_permutation)]
393
+ else: # permute Y
394
+ array_shuffled[y_indices] = array[np.ix_(y_indices, restricted_permutation)]
395
+ # array_shuffled = np.copy(array)
396
+ # for i in x_indices:
397
+ # array_shuffled[i] = array[i, restricted_permutation]
398
+
399
+ null_dist[sam] = self.get_dependence_measure(array_shuffled,
400
+ xyz)
401
+
402
+ else:
403
+ null_dist = \
404
+ self._get_shuffle_dist(array, xyz,
405
+ self.get_dependence_measure,
406
+ sig_samples=self.sig_samples,
407
+ sig_blocklength=self.sig_blocklength,
408
+ verbosity=self.verbosity)
409
+
410
+ if self.null_fit == 'normal':
411
+ mu, sigma = stats.norm.fit(null_dist)
412
+ pval = 1.0 - stats.norm.cdf(value, loc=mu, scale=sigma)
413
+ elif self.null_fit == 'gamma':
414
+ try:
415
+ a, loc, scale = stats.gamma.fit(null_dist)
416
+ pval = 1.0 - stats.gamma.cdf(value, a, loc=loc, scale=scale)
417
+ except Exception as e:
418
+ warnings.warn(f"Gamma fit failed, falling back to empirical: {e}")
419
+ pval = float(np.sum(null_dist >= value) + 1) / (self.sig_samples + 1)
420
+ else:
421
+ # fallback: empirical Monte Carlo
422
+ pval = float(np.sum(null_dist >= value) + 1) / (self.sig_samples + 1)
423
+
424
+ if return_null_dist:
425
+ # Sort
426
+ null_dist.sort()
427
+ return pval, null_dist
428
+ return pval
429
+
430
+
431
+ def get_conditional_entropy(self, array, xyz):
432
+ """Returns the nearest-neighbor conditional entropy estimate of H(X|Y).
433
+
434
+ Parameters
435
+ ----------
436
+ array : array-like
437
+ data array with X, Y in rows and observations in columns
438
+
439
+ xyz : array of ints
440
+ XYZ identifier array of shape (dim,). Here only uses 0 for X and
441
+ 1 for Y.
442
+
443
+ Returns
444
+ -------
445
+ val : float
446
+ Entropy estimate.
447
+ """
448
+
449
+
450
+ dim, T = array.shape
451
+
452
+ if self.knn < 1:
453
+ knn_here = max(1, int(self.knn*T))
454
+ else:
455
+ knn_here = max(1, int(self.knn))
456
+
457
+
458
+ array = array.astype(np.float64)
459
+
460
+ # Add noise to destroy ties...
461
+ array += (1E-6 * array.std(axis=1).reshape(dim, 1)
462
+ * self.random_state.random((array.shape[0], array.shape[1])))
463
+
464
+ if self.transform == 'standardize':
465
+ # Standardize
466
+ array = array.astype(np.float64)
467
+ array -= array.mean(axis=1).reshape(dim, 1)
468
+ std = array.std(axis=1)
469
+ nonzero = std != 0.
470
+ if np.any(nonzero):
471
+ array[nonzero] /= std[nonzero, np.newaxis]
472
+ # FIXME: If the time series is constant, return nan rather than
473
+ # raising Exception
474
+ if np.any(std == 0.) and self.verbosity > 0:
475
+ warnings.warn("Possibly constant array!")
476
+ # if np.isnan(array).sum() != 0:
477
+ # raise ValueError("nans after standardizing, "
478
+ # "possibly constant array!")
479
+ elif self.transform == 'uniform':
480
+ array = self._trafo2uniform(array)
481
+ elif self.transform == 'ranks':
482
+ array = array.argsort(axis=1).argsort(axis=1).astype(np.float64)
483
+
484
+ # Compute conditional entropy as H(X|Y) = H(X) - I(X;Y)
485
+
486
+ # First compute H(X)
487
+ # Use cKDTree to get distances eps to the k-th nearest neighbors for
488
+ # every sample in joint space X with maximum norm
489
+ x_indices = np.where(xyz == 0)[0]
490
+ y_indices = np.where(xyz == 1)[0]
491
+
492
+ dim_x = int(np.where(xyz == 0)[0][-1] + 1)
493
+ if 1 in xyz:
494
+ dim_y = int(np.where(xyz == 1)[0][-1] + 1 - dim_x)
495
+ else:
496
+ dim_y = 0
497
+
498
+
499
+ x_array = array[x_indices, :].T.copy()
500
+ tree_xyz = spatial.cKDTree(x_array)
501
+ epsarray = tree_xyz.query(x_array, k=[knn_here+1], p=np.inf,
502
+ eps=0., workers=self.workers)[0][:, 0].astype(np.float64)
503
+
504
+ h_x = - special.digamma(knn_here) + special.digamma(T) + dim_x * np.log(2.*epsarray).mean()
505
+
506
+ # Then compute MI(X;Y)
507
+ if dim_y > 0:
508
+ xyz_here = np.array([index for index in xyz if index == 0 or index == 1])
509
+ array_xy = array[list(x_indices) + list(y_indices), :]
510
+ i_xy = self.get_dependence_measure(array_xy, xyz_here)
511
+ else:
512
+ i_xy = 0.
513
+
514
+ h_x_y = h_x - i_xy
515
+
516
+ return h_x_y
517
+
518
+
519
+ @jit(forceobj=True)
520
+ def get_restricted_permutation(self, T, shuffle_neighbors, neighbors, order):
521
+
522
+ restricted_permutation = np.zeros(T, dtype=np.int32)
523
+ used = set()
524
+
525
+ for sample_index in order:
526
+ m = 0
527
+ use = neighbors[sample_index, m]
528
+
529
+ while ((use in used) and (m < shuffle_neighbors - 1)):
530
+ m += 1
531
+ use = neighbors[sample_index, m]
532
+
533
+ restricted_permutation[sample_index] = use
534
+ used.add(use)
535
+
536
+ return restricted_permutation
537
+
538
+ def get_model_selection_criterion(self, j, parents, tau_max=0):
539
+ """Returns a cross-validation-based score for nearest-neighbor estimates.
540
+
541
+ Fits a nearest-neighbor model of the parents to variable j and returns
542
+ the score. The lower, the better the fit. Here used to determine
543
+ optimal hyperparameters in PCMCI(pc_alpha or fixed thres).
544
+
545
+ Parameters
546
+ ----------
547
+ j : int
548
+ Index of target variable in data array.
549
+
550
+ parents : list
551
+ List of form [(0, -1), (3, -2), ...] containing parents.
552
+
553
+ tau_max : int, optional (default: 0)
554
+ Maximum time lag. This may be used to make sure that estimates for
555
+ different lags in X, Z, all have the same sample size.
556
+
557
+ Returns:
558
+ score : float
559
+ Model score.
560
+ """
561
+
562
+ import sklearn
563
+ from sklearn.neighbors import KNeighborsRegressor
564
+ from sklearn.model_selection import cross_val_score
565
+
566
+ Y = [(j, 0)]
567
+ X = [(j, 0)] # dummy variable here
568
+ Z = parents
569
+ array, xyz, _ = self.dataframe.construct_array(X=X, Y=Y, Z=Z,
570
+ tau_max=tau_max,
571
+ mask_type=self.mask_type,
572
+ return_cleaned_xyz=False,
573
+ do_checks=True,
574
+ verbosity=self.verbosity)
575
+ dim, T = array.shape
576
+
577
+ # Standardize
578
+ array = array.astype(np.float64)
579
+ array -= array.mean(axis=1).reshape(dim, 1)
580
+ std = array.std(axis=1)
581
+ nonzero = std != 0.
582
+ if np.any(nonzero):
583
+ array[nonzero] /= std[nonzero, np.newaxis]
584
+ if np.any(std == 0.) and self.verbosity > 0:
585
+ warnings.warn("Possibly constant array!")
586
+ # raise ValueError("nans after standardizing, "
587
+ # "possibly constant array!")
588
+
589
+ predictor_indices = list(np.where(xyz==2)[0])
590
+ predictor_array = array[predictor_indices, :].T
591
+ # Target is only first entry of Y, ie [y]
592
+ target_array = array[np.where(xyz==1)[0][0], :]
593
+
594
+ if predictor_array.size == 0:
595
+ # Regressing on ones if empty parents
596
+ predictor_array = np.ones(T).reshape(T, 1)
597
+
598
+ if self.knn < 1:
599
+ knn_here = max(1, int(self.knn*T))
600
+ else:
601
+ knn_here = max(1, int(self.knn))
602
+
603
+ knn_model = KNeighborsRegressor(n_neighbors=knn_here)
604
+
605
+ scores = cross_val_score(estimator=knn_model,
606
+ X=predictor_array, y=target_array, cv=self.model_selection_folds, n_jobs=self.workers)
607
+
608
+ # print(scores)
609
+ return -scores.mean()
610
+
611
+ if __name__ == '__main__':
612
+
613
+ import tigramite
614
+ from tigramite.data_processing import DataFrame
615
+ import tigramite.data_processing as pp
616
+ import tigramite.toymodels.structural_causal_processes as toys
617
+ import numpy as np
618
+
619
+ seed = 42
620
+
621
+ random_state = np.random.default_rng(seed=seed)
622
+ def lin_f(x): return x
623
+
624
+ T = 200
625
+ auto = 0.
626
+ links = {0:[((0, -1), auto, lin_f)],
627
+ 1:[((1, -1), auto, lin_f)]
628
+ }
629
+ knn = 10
630
+ maxlag = 1
631
+ cmi = CMIknn(seed=seed, knn=knn,
632
+ sig_samples=50,
633
+ null_fit='gamma',
634
+ permute='X',
635
+ )
636
+
637
+ realizations = 100
638
+ realizations_data = toys.structural_causal_process_ensemble(realizations=realizations,
639
+ ensemble_seed=seed,
640
+ links=links, T=T, noises=None,
641
+ intervention=None, intervention_type='hard',
642
+ transient_fraction=0.2)
643
+
644
+ rate = np.zeros(realizations)
645
+ for r in range(realizations):
646
+ data = realizations_data[0][r]
647
+
648
+ cmi.set_dataframe(dataframe = DataFrame(data=data))
649
+ val, pval = cmi.run_test(
650
+ X = [(0, -lag) for lag in range(1, maxlag+1)],
651
+ Y = [(1, 0)],
652
+ Z = [(1, -lag) for lag in range(1, maxlag+1)])
653
+ rate[r] = pval
654
+
655
+ print((rate <= 0.05).mean())
656
+
657
+
658
+
659
+
660
+
661
+