tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,286 @@
1
+ """Tigramite causal discovery for time series."""
2
+
3
+ # Author: Sagar Nagaraj Simha, Jakob Runge <jakob@jakob-runge.com>
4
+ #
5
+ # License: GNU General Public License v3.0
6
+
7
+ from __future__ import print_function
8
+ import warnings
9
+ import numpy as np
10
+ from scipy.stats.contingency import crosstab
11
+ # from joblib import Parallel, delayed
12
+ # import dask
13
+ from numba import jit
14
+
15
+ from .independence_tests_base import CondIndTest
16
+
17
+ class CMIsymb(CondIndTest):
18
+ r"""Conditional mutual information test for discrete/categorical data.
19
+
20
+ Conditional mutual information is the most general dependency measure
21
+ coming from an information-theoretic framework. It makes no assumptions
22
+ about the parametric form of the dependencies by directly estimating the
23
+ underlying joint density. The test here is based on directly estimating
24
+ the joint distribution assuming symbolic input, combined with a
25
+ local shuffle test to generate the distribution under the null hypothesis of
26
+ independence. This estimator is suitable only for discrete variables.
27
+ For continuous variables use the CMIknn class and for mixed-variable
28
+ datasets the CMIknnMixed class (including mixed-type variables).
29
+
30
+ Allows for multi-dimensional X, Y.
31
+
32
+ Notes
33
+ -----
34
+ CMI and its estimator are given by
35
+
36
+ .. math:: I(X;Y|Z) &= \sum p(z) \sum \sum p(x,y|z) \log
37
+ \frac{ p(x,y |z)}{p(x|z)\cdot p(y |z)} \,dx dy dz
38
+
39
+ Parameters
40
+ ----------
41
+ n_symbs : int, optional (default: None)
42
+ Number of symbols in input data. Should be at least as large as the
43
+ maximum array entry + 1. If None, n_symbs is inferred by scipy's crosstab.
44
+
45
+ significance : str, optional (default: 'shuffle_test')
46
+ Type of significance test to use. For CMIsymb only 'fixed_thres' and
47
+ 'shuffle_test' are available.
48
+
49
+ sig_blocklength : int, optional (default: 1)
50
+ Block length for block-shuffle significance test.
51
+
52
+ conf_blocklength : int, optional (default: 1)
53
+ Block length for block-bootstrap.
54
+
55
+ **kwargs :
56
+ Arguments passed on to parent class CondIndTest.
57
+ """
58
+ @property
59
+ def measure(self):
60
+ """
61
+ Concrete property to return the measure of the independence test
62
+ """
63
+ return self._measure
64
+
65
+ def __init__(self,
66
+ n_symbs=None,
67
+ significance='shuffle_test',
68
+ sig_blocklength=1,
69
+ conf_blocklength=1,
70
+ **kwargs):
71
+ # Setup the member variables
72
+ self._measure = 'cmi_symb'
73
+ self.two_sided = False
74
+ self.residual_based = False
75
+ self.recycle_residuals = False
76
+ self.n_symbs = n_symbs
77
+ # Call the parent constructor
78
+ CondIndTest.__init__(self,
79
+ significance=significance,
80
+ sig_blocklength=sig_blocklength,
81
+ conf_blocklength=conf_blocklength,
82
+ **kwargs)
83
+
84
+ if self.verbosity > 0:
85
+ print("n_symbs = %s" % self.n_symbs)
86
+ print("")
87
+
88
+ if self.conf_blocklength is None or self.sig_blocklength is None:
89
+ warnings.warn("Automatic block-length estimations from decay of "
90
+ "autocorrelation may not be correct for discrete "
91
+ "data")
92
+
93
+ def get_dependence_measure(self, array, xyz, data_type=None):
94
+ """Returns CMI estimate based on contingency table from scipy's crosstab
95
+ to approximate probability mass.
96
+
97
+ Parameters
98
+ ----------
99
+ array : array-like
100
+ data array with X, Y, Z in rows and observations in columns
101
+
102
+ xyz : array of ints
103
+ XYZ identifier array of shape (dim,).
104
+
105
+ Returns
106
+ -------
107
+ val : float
108
+ Conditional mutual information estimate.
109
+ """
110
+
111
+ _, T = array.shape
112
+
113
+ if self.n_symbs is None:
114
+ levels = None
115
+ else:
116
+ # Assuming same list of levels for (z, y, x).
117
+ levels = np.tile(np.arange(self.n_symbs), (len(xyz), 1))
118
+
119
+ # High-dimensional contingency table
120
+ _, hist = crosstab(*(np.asarray(np.split(array, len(xyz), axis=0)).reshape((-1, T))), levels=levels,
121
+ sparse=False)
122
+
123
+ def _plogp_vector(T):
124
+ """Precalculation of p*log(p) needed for entropies."""
125
+ gfunc = np.zeros(T + 1)
126
+ data = np.arange(1, T + 1, 1)
127
+ gfunc[1:] = data * np.log(data)
128
+ def plogp_func(time):
129
+ return gfunc[time]
130
+ return np.vectorize(plogp_func)
131
+
132
+ # Dimensions are hist are (X, Y, Z^1, .... Z^dz)
133
+ # plogp = _plogp_vector(T)
134
+ # hxyz = (-(plogp(hist)).sum() + plogp(T)) / float(T)
135
+ # hxz = (-(plogp(hist.sum(axis=1))).sum() + plogp(T)) / float(T)
136
+ # hyz = (-(plogp(hist.sum(axis=0))).sum() + plogp(T)) / float(T)
137
+ # hz = (-(plogp(hist.sum(axis=0).sum(axis=0))).sum() + plogp(T)) / float(T)
138
+
139
+ # Multivariate X, Y version
140
+ plogp = _plogp_vector(T)
141
+ hxyz = (-(plogp(hist)).sum() + plogp(T)) / float(T)
142
+ hxz = (-(plogp(hist.sum(axis=tuple(np.where(xyz==1)[0])))).sum() + plogp(T)) / float(T)
143
+ hyz = (-(plogp(hist.sum(axis=tuple(np.where(xyz==0)[0])))).sum() + plogp(T)) / float(T)
144
+ hz = (-(plogp(hist.sum(axis=tuple(np.where((xyz==0) | (xyz==1))[0])))).sum() + plogp(T)) / float(T)
145
+ val = hxz + hyz - hz - hxyz
146
+
147
+ return val
148
+
149
+ def get_shuffle_significance(self, array, xyz, value,
150
+ return_null_dist=False,
151
+ data_type=None):
152
+ """Returns p-value for shuffle significance test.
153
+
154
+ Performes a local permutation test: x_i values are only permuted with
155
+ those x_j for which z_i = z_j. Samples are drawn without replacement
156
+ as much as possible.
157
+
158
+ Parameters
159
+ ----------
160
+ array : array-like
161
+ data array with X, Y, Z in rows and observations in columns.
162
+
163
+ xyz : array of ints
164
+ XYZ identifier array of shape (dim,).
165
+
166
+ value : number
167
+ Value of test statistic for original (unshuffled) estimate.
168
+
169
+ Returns
170
+ -------
171
+ pval : float
172
+ p-value.
173
+ """
174
+
175
+ dim, T = array.shape
176
+ x_indices = np.where(xyz == 0)[0]
177
+ z_indices = np.where(xyz == 2)[0]
178
+
179
+ if len(z_indices) > 0:
180
+ # Get neighbors around each sample point in z
181
+ z_array = array[z_indices, :].T
182
+ # Unique combinations of z in the data (z1, z2, z3 ...)
183
+ z_comb = np.unique(z_array, axis=0)
184
+
185
+ # Create neighbor indices of length z_comb with default as -1.
186
+ neighbors = np.full((len(z_comb), T), -1)
187
+ # Neighborhood indices for each unique combination in z_comb.
188
+ for i in range(len(z_comb)):
189
+ neighbor_indices = np.where((z_array == z_comb[i]).all(axis=1))[0]
190
+ neighbors[i, :len(neighbor_indices)] = neighbor_indices
191
+
192
+ random_seeds = self.random_state.integers(np.iinfo(np.int32).max, size=self.sig_samples)
193
+ # null_dist = Parallel(n_jobs=-1)(
194
+ # delayed(self.parallelize_shuffles)(array, xyz, z_indices, x_indices, T, z_comb, neighbors, seed=seed) for seed in random_seeds)
195
+ # dask_jobs = [dask.delayed(self.parallelize_shuffles)(array, xyz, z_indices, x_indices, T, z_comb, neighbors, seed=seed) for seed in random_seeds]
196
+ # null_dist = dask.compute(dask_jobs)
197
+ # null_dist = np.asarray(null_dist)
198
+
199
+ null_dist = np.zeros(self.sig_samples)
200
+ for i, seed in enumerate(random_seeds):
201
+ null_dist[i] = self.parallelize_shuffles(array, xyz, z_indices, x_indices, T, z_comb, neighbors, seed=seed)
202
+
203
+ else:
204
+ null_dist = \
205
+ self._get_shuffle_dist(array, xyz,
206
+ self.get_dependence_measure,
207
+ sig_samples=self.sig_samples,
208
+ sig_blocklength=self.sig_blocklength,
209
+ verbosity=self.verbosity)
210
+
211
+ # pval = (null_dist >= value).mean()
212
+ pval = float(np.sum(null_dist >= value) + 1) / (self.sig_samples + 1)
213
+
214
+ if return_null_dist:
215
+ return pval, null_dist
216
+ return pval
217
+
218
+ @jit(forceobj=True)
219
+ def parallelize_shuffles(self, array, xyz, z_indices, x_indices, T, z_comb, neighbors, seed=None):
220
+ # Generate random order in which to go through samples.
221
+ # order = self.random_state.permutation(T).astype('int32')
222
+ rng = np.random.default_rng(seed)
223
+ order = rng.permutation(T).astype('int32')
224
+
225
+ restricted_permutation = np.zeros(T, dtype='int32')
226
+ # A global list of used indices across time samples and combinations.
227
+ # Since there are no repetitive (z) indices across combinations, a global list can be used.
228
+ used = np.array([], dtype='int32')
229
+ for sample_index in order:
230
+ # Get the index of the z combination for sample_index in z_comb
231
+ z_choice_index = np.where((z_comb == array[z_indices, sample_index]).all(axis=1))[0][0]
232
+ neighbors_choices = neighbors[z_choice_index][neighbors[z_choice_index] > -1]
233
+ # Shuffle neighbors in-place to randomize the choice of indices
234
+ # self.random_state.shuffle(neighbors_choices)
235
+ rng.shuffle(neighbors_choices)
236
+
237
+ # Permuting indices
238
+ m = 0
239
+ use = neighbors_choices[m]
240
+ while ((use in used) and (m < len(neighbors_choices))):
241
+ m += 1
242
+ use = neighbors_choices[m]
243
+
244
+ restricted_permutation[sample_index] = use
245
+ used = np.append(used, use)
246
+
247
+ array_shuffled = np.copy(array)
248
+ for i in x_indices:
249
+ array_shuffled[i] = array[i, restricted_permutation]
250
+
251
+ return self.get_dependence_measure(array_shuffled,
252
+ xyz)
253
+
254
+
255
+ if __name__ == '__main__':
256
+
257
+ import tigramite
258
+ from tigramite.data_processing import DataFrame
259
+ import tigramite.data_processing as pp
260
+ import numpy as np
261
+ # from dask.distributed import Client
262
+
263
+ # client = dask.distributed.Client(processes=True)
264
+ seed = 42
265
+ random_state = np.random.default_rng(seed=seed)
266
+ cmi = CMIsymb(sig_samples=200, seed=seed)
267
+
268
+ T = 1000
269
+ dimz = 5
270
+ z = random_state.binomial(n=1, p=0.5, size=(T, dimz)).reshape(T, dimz)
271
+ x = np.empty(T).reshape(T, 1)
272
+ y = np.empty(T).reshape(T, 1)
273
+ for t in range(T):
274
+ val = z[t, 0].squeeze()
275
+ prob = 0.2+val*0.6
276
+ x[t] = random_state.choice([0,1], p=[prob, 1.-prob])
277
+ y[t] = random_state.choice([0,1, 2], p=[prob, (1.-prob)/2., (1.-prob)/2.])
278
+
279
+ print('start')
280
+ # print(client.dashboard_link)
281
+ # print(cmi.run_test_raw(x, y, z=None))
282
+ print(cmi.run_test_raw(x, y, z=z))
283
+
284
+ # client.close()
285
+
286
+