tigramite-fast 5.2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tigramite/__init__.py +0 -0
- tigramite/causal_effects.py +1525 -0
- tigramite/causal_mediation.py +1592 -0
- tigramite/data_processing.py +1574 -0
- tigramite/graphs.py +1509 -0
- tigramite/independence_tests/LBFGS.py +1114 -0
- tigramite/independence_tests/__init__.py +0 -0
- tigramite/independence_tests/cmiknn.py +661 -0
- tigramite/independence_tests/cmiknn_mixed.py +1397 -0
- tigramite/independence_tests/cmisymb.py +286 -0
- tigramite/independence_tests/gpdc.py +664 -0
- tigramite/independence_tests/gpdc_torch.py +820 -0
- tigramite/independence_tests/gsquared.py +190 -0
- tigramite/independence_tests/independence_tests_base.py +1310 -0
- tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
- tigramite/independence_tests/pairwise_CI.py +383 -0
- tigramite/independence_tests/parcorr.py +369 -0
- tigramite/independence_tests/parcorr_mult.py +485 -0
- tigramite/independence_tests/parcorr_wls.py +451 -0
- tigramite/independence_tests/regressionCI.py +403 -0
- tigramite/independence_tests/robust_parcorr.py +403 -0
- tigramite/jpcmciplus.py +966 -0
- tigramite/lpcmci.py +3649 -0
- tigramite/models.py +2257 -0
- tigramite/pcmci.py +3935 -0
- tigramite/pcmci_base.py +1218 -0
- tigramite/plotting.py +4735 -0
- tigramite/rpcmci.py +467 -0
- tigramite/toymodels/__init__.py +0 -0
- tigramite/toymodels/context_model.py +261 -0
- tigramite/toymodels/non_additive.py +1231 -0
- tigramite/toymodels/structural_causal_processes.py +1201 -0
- tigramite/toymodels/surrogate_generator.py +319 -0
- tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
- tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
- tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
- tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
- tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""Tigramite causal discovery for time series."""
|
|
2
|
+
|
|
3
|
+
# Author: Sagar Nagaraj Simha, Jakob Runge <jakob@jakob-runge.com>
|
|
4
|
+
#
|
|
5
|
+
# License: GNU General Public License v3.0
|
|
6
|
+
|
|
7
|
+
from __future__ import print_function
|
|
8
|
+
import warnings
|
|
9
|
+
import numpy as np
|
|
10
|
+
from scipy.stats.contingency import crosstab
|
|
11
|
+
# from joblib import Parallel, delayed
|
|
12
|
+
# import dask
|
|
13
|
+
from numba import jit
|
|
14
|
+
|
|
15
|
+
from .independence_tests_base import CondIndTest
|
|
16
|
+
|
|
17
|
+
class CMIsymb(CondIndTest):
|
|
18
|
+
r"""Conditional mutual information test for discrete/categorical data.
|
|
19
|
+
|
|
20
|
+
Conditional mutual information is the most general dependency measure
|
|
21
|
+
coming from an information-theoretic framework. It makes no assumptions
|
|
22
|
+
about the parametric form of the dependencies by directly estimating the
|
|
23
|
+
underlying joint density. The test here is based on directly estimating
|
|
24
|
+
the joint distribution assuming symbolic input, combined with a
|
|
25
|
+
local shuffle test to generate the distribution under the null hypothesis of
|
|
26
|
+
independence. This estimator is suitable only for discrete variables.
|
|
27
|
+
For continuous variables use the CMIknn class and for mixed-variable
|
|
28
|
+
datasets the CMIknnMixed class (including mixed-type variables).
|
|
29
|
+
|
|
30
|
+
Allows for multi-dimensional X, Y.
|
|
31
|
+
|
|
32
|
+
Notes
|
|
33
|
+
-----
|
|
34
|
+
CMI and its estimator are given by
|
|
35
|
+
|
|
36
|
+
.. math:: I(X;Y|Z) &= \sum p(z) \sum \sum p(x,y|z) \log
|
|
37
|
+
\frac{ p(x,y |z)}{p(x|z)\cdot p(y |z)} \,dx dy dz
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
n_symbs : int, optional (default: None)
|
|
42
|
+
Number of symbols in input data. Should be at least as large as the
|
|
43
|
+
maximum array entry + 1. If None, n_symbs is inferred by scipy's crosstab.
|
|
44
|
+
|
|
45
|
+
significance : str, optional (default: 'shuffle_test')
|
|
46
|
+
Type of significance test to use. For CMIsymb only 'fixed_thres' and
|
|
47
|
+
'shuffle_test' are available.
|
|
48
|
+
|
|
49
|
+
sig_blocklength : int, optional (default: 1)
|
|
50
|
+
Block length for block-shuffle significance test.
|
|
51
|
+
|
|
52
|
+
conf_blocklength : int, optional (default: 1)
|
|
53
|
+
Block length for block-bootstrap.
|
|
54
|
+
|
|
55
|
+
**kwargs :
|
|
56
|
+
Arguments passed on to parent class CondIndTest.
|
|
57
|
+
"""
|
|
58
|
+
@property
|
|
59
|
+
def measure(self):
|
|
60
|
+
"""
|
|
61
|
+
Concrete property to return the measure of the independence test
|
|
62
|
+
"""
|
|
63
|
+
return self._measure
|
|
64
|
+
|
|
65
|
+
def __init__(self,
|
|
66
|
+
n_symbs=None,
|
|
67
|
+
significance='shuffle_test',
|
|
68
|
+
sig_blocklength=1,
|
|
69
|
+
conf_blocklength=1,
|
|
70
|
+
**kwargs):
|
|
71
|
+
# Setup the member variables
|
|
72
|
+
self._measure = 'cmi_symb'
|
|
73
|
+
self.two_sided = False
|
|
74
|
+
self.residual_based = False
|
|
75
|
+
self.recycle_residuals = False
|
|
76
|
+
self.n_symbs = n_symbs
|
|
77
|
+
# Call the parent constructor
|
|
78
|
+
CondIndTest.__init__(self,
|
|
79
|
+
significance=significance,
|
|
80
|
+
sig_blocklength=sig_blocklength,
|
|
81
|
+
conf_blocklength=conf_blocklength,
|
|
82
|
+
**kwargs)
|
|
83
|
+
|
|
84
|
+
if self.verbosity > 0:
|
|
85
|
+
print("n_symbs = %s" % self.n_symbs)
|
|
86
|
+
print("")
|
|
87
|
+
|
|
88
|
+
if self.conf_blocklength is None or self.sig_blocklength is None:
|
|
89
|
+
warnings.warn("Automatic block-length estimations from decay of "
|
|
90
|
+
"autocorrelation may not be correct for discrete "
|
|
91
|
+
"data")
|
|
92
|
+
|
|
93
|
+
def get_dependence_measure(self, array, xyz, data_type=None):
|
|
94
|
+
"""Returns CMI estimate based on contingency table from scipy's crosstab
|
|
95
|
+
to approximate probability mass.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
array : array-like
|
|
100
|
+
data array with X, Y, Z in rows and observations in columns
|
|
101
|
+
|
|
102
|
+
xyz : array of ints
|
|
103
|
+
XYZ identifier array of shape (dim,).
|
|
104
|
+
|
|
105
|
+
Returns
|
|
106
|
+
-------
|
|
107
|
+
val : float
|
|
108
|
+
Conditional mutual information estimate.
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
_, T = array.shape
|
|
112
|
+
|
|
113
|
+
if self.n_symbs is None:
|
|
114
|
+
levels = None
|
|
115
|
+
else:
|
|
116
|
+
# Assuming same list of levels for (z, y, x).
|
|
117
|
+
levels = np.tile(np.arange(self.n_symbs), (len(xyz), 1))
|
|
118
|
+
|
|
119
|
+
# High-dimensional contingency table
|
|
120
|
+
_, hist = crosstab(*(np.asarray(np.split(array, len(xyz), axis=0)).reshape((-1, T))), levels=levels,
|
|
121
|
+
sparse=False)
|
|
122
|
+
|
|
123
|
+
def _plogp_vector(T):
|
|
124
|
+
"""Precalculation of p*log(p) needed for entropies."""
|
|
125
|
+
gfunc = np.zeros(T + 1)
|
|
126
|
+
data = np.arange(1, T + 1, 1)
|
|
127
|
+
gfunc[1:] = data * np.log(data)
|
|
128
|
+
def plogp_func(time):
|
|
129
|
+
return gfunc[time]
|
|
130
|
+
return np.vectorize(plogp_func)
|
|
131
|
+
|
|
132
|
+
# Dimensions are hist are (X, Y, Z^1, .... Z^dz)
|
|
133
|
+
# plogp = _plogp_vector(T)
|
|
134
|
+
# hxyz = (-(plogp(hist)).sum() + plogp(T)) / float(T)
|
|
135
|
+
# hxz = (-(plogp(hist.sum(axis=1))).sum() + plogp(T)) / float(T)
|
|
136
|
+
# hyz = (-(plogp(hist.sum(axis=0))).sum() + plogp(T)) / float(T)
|
|
137
|
+
# hz = (-(plogp(hist.sum(axis=0).sum(axis=0))).sum() + plogp(T)) / float(T)
|
|
138
|
+
|
|
139
|
+
# Multivariate X, Y version
|
|
140
|
+
plogp = _plogp_vector(T)
|
|
141
|
+
hxyz = (-(plogp(hist)).sum() + plogp(T)) / float(T)
|
|
142
|
+
hxz = (-(plogp(hist.sum(axis=tuple(np.where(xyz==1)[0])))).sum() + plogp(T)) / float(T)
|
|
143
|
+
hyz = (-(plogp(hist.sum(axis=tuple(np.where(xyz==0)[0])))).sum() + plogp(T)) / float(T)
|
|
144
|
+
hz = (-(plogp(hist.sum(axis=tuple(np.where((xyz==0) | (xyz==1))[0])))).sum() + plogp(T)) / float(T)
|
|
145
|
+
val = hxz + hyz - hz - hxyz
|
|
146
|
+
|
|
147
|
+
return val
|
|
148
|
+
|
|
149
|
+
def get_shuffle_significance(self, array, xyz, value,
|
|
150
|
+
return_null_dist=False,
|
|
151
|
+
data_type=None):
|
|
152
|
+
"""Returns p-value for shuffle significance test.
|
|
153
|
+
|
|
154
|
+
Performes a local permutation test: x_i values are only permuted with
|
|
155
|
+
those x_j for which z_i = z_j. Samples are drawn without replacement
|
|
156
|
+
as much as possible.
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
array : array-like
|
|
161
|
+
data array with X, Y, Z in rows and observations in columns.
|
|
162
|
+
|
|
163
|
+
xyz : array of ints
|
|
164
|
+
XYZ identifier array of shape (dim,).
|
|
165
|
+
|
|
166
|
+
value : number
|
|
167
|
+
Value of test statistic for original (unshuffled) estimate.
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
pval : float
|
|
172
|
+
p-value.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
dim, T = array.shape
|
|
176
|
+
x_indices = np.where(xyz == 0)[0]
|
|
177
|
+
z_indices = np.where(xyz == 2)[0]
|
|
178
|
+
|
|
179
|
+
if len(z_indices) > 0:
|
|
180
|
+
# Get neighbors around each sample point in z
|
|
181
|
+
z_array = array[z_indices, :].T
|
|
182
|
+
# Unique combinations of z in the data (z1, z2, z3 ...)
|
|
183
|
+
z_comb = np.unique(z_array, axis=0)
|
|
184
|
+
|
|
185
|
+
# Create neighbor indices of length z_comb with default as -1.
|
|
186
|
+
neighbors = np.full((len(z_comb), T), -1)
|
|
187
|
+
# Neighborhood indices for each unique combination in z_comb.
|
|
188
|
+
for i in range(len(z_comb)):
|
|
189
|
+
neighbor_indices = np.where((z_array == z_comb[i]).all(axis=1))[0]
|
|
190
|
+
neighbors[i, :len(neighbor_indices)] = neighbor_indices
|
|
191
|
+
|
|
192
|
+
random_seeds = self.random_state.integers(np.iinfo(np.int32).max, size=self.sig_samples)
|
|
193
|
+
# null_dist = Parallel(n_jobs=-1)(
|
|
194
|
+
# delayed(self.parallelize_shuffles)(array, xyz, z_indices, x_indices, T, z_comb, neighbors, seed=seed) for seed in random_seeds)
|
|
195
|
+
# dask_jobs = [dask.delayed(self.parallelize_shuffles)(array, xyz, z_indices, x_indices, T, z_comb, neighbors, seed=seed) for seed in random_seeds]
|
|
196
|
+
# null_dist = dask.compute(dask_jobs)
|
|
197
|
+
# null_dist = np.asarray(null_dist)
|
|
198
|
+
|
|
199
|
+
null_dist = np.zeros(self.sig_samples)
|
|
200
|
+
for i, seed in enumerate(random_seeds):
|
|
201
|
+
null_dist[i] = self.parallelize_shuffles(array, xyz, z_indices, x_indices, T, z_comb, neighbors, seed=seed)
|
|
202
|
+
|
|
203
|
+
else:
|
|
204
|
+
null_dist = \
|
|
205
|
+
self._get_shuffle_dist(array, xyz,
|
|
206
|
+
self.get_dependence_measure,
|
|
207
|
+
sig_samples=self.sig_samples,
|
|
208
|
+
sig_blocklength=self.sig_blocklength,
|
|
209
|
+
verbosity=self.verbosity)
|
|
210
|
+
|
|
211
|
+
# pval = (null_dist >= value).mean()
|
|
212
|
+
pval = float(np.sum(null_dist >= value) + 1) / (self.sig_samples + 1)
|
|
213
|
+
|
|
214
|
+
if return_null_dist:
|
|
215
|
+
return pval, null_dist
|
|
216
|
+
return pval
|
|
217
|
+
|
|
218
|
+
@jit(forceobj=True)
|
|
219
|
+
def parallelize_shuffles(self, array, xyz, z_indices, x_indices, T, z_comb, neighbors, seed=None):
|
|
220
|
+
# Generate random order in which to go through samples.
|
|
221
|
+
# order = self.random_state.permutation(T).astype('int32')
|
|
222
|
+
rng = np.random.default_rng(seed)
|
|
223
|
+
order = rng.permutation(T).astype('int32')
|
|
224
|
+
|
|
225
|
+
restricted_permutation = np.zeros(T, dtype='int32')
|
|
226
|
+
# A global list of used indices across time samples and combinations.
|
|
227
|
+
# Since there are no repetitive (z) indices across combinations, a global list can be used.
|
|
228
|
+
used = np.array([], dtype='int32')
|
|
229
|
+
for sample_index in order:
|
|
230
|
+
# Get the index of the z combination for sample_index in z_comb
|
|
231
|
+
z_choice_index = np.where((z_comb == array[z_indices, sample_index]).all(axis=1))[0][0]
|
|
232
|
+
neighbors_choices = neighbors[z_choice_index][neighbors[z_choice_index] > -1]
|
|
233
|
+
# Shuffle neighbors in-place to randomize the choice of indices
|
|
234
|
+
# self.random_state.shuffle(neighbors_choices)
|
|
235
|
+
rng.shuffle(neighbors_choices)
|
|
236
|
+
|
|
237
|
+
# Permuting indices
|
|
238
|
+
m = 0
|
|
239
|
+
use = neighbors_choices[m]
|
|
240
|
+
while ((use in used) and (m < len(neighbors_choices))):
|
|
241
|
+
m += 1
|
|
242
|
+
use = neighbors_choices[m]
|
|
243
|
+
|
|
244
|
+
restricted_permutation[sample_index] = use
|
|
245
|
+
used = np.append(used, use)
|
|
246
|
+
|
|
247
|
+
array_shuffled = np.copy(array)
|
|
248
|
+
for i in x_indices:
|
|
249
|
+
array_shuffled[i] = array[i, restricted_permutation]
|
|
250
|
+
|
|
251
|
+
return self.get_dependence_measure(array_shuffled,
|
|
252
|
+
xyz)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
if __name__ == '__main__':
|
|
256
|
+
|
|
257
|
+
import tigramite
|
|
258
|
+
from tigramite.data_processing import DataFrame
|
|
259
|
+
import tigramite.data_processing as pp
|
|
260
|
+
import numpy as np
|
|
261
|
+
# from dask.distributed import Client
|
|
262
|
+
|
|
263
|
+
# client = dask.distributed.Client(processes=True)
|
|
264
|
+
seed = 42
|
|
265
|
+
random_state = np.random.default_rng(seed=seed)
|
|
266
|
+
cmi = CMIsymb(sig_samples=200, seed=seed)
|
|
267
|
+
|
|
268
|
+
T = 1000
|
|
269
|
+
dimz = 5
|
|
270
|
+
z = random_state.binomial(n=1, p=0.5, size=(T, dimz)).reshape(T, dimz)
|
|
271
|
+
x = np.empty(T).reshape(T, 1)
|
|
272
|
+
y = np.empty(T).reshape(T, 1)
|
|
273
|
+
for t in range(T):
|
|
274
|
+
val = z[t, 0].squeeze()
|
|
275
|
+
prob = 0.2+val*0.6
|
|
276
|
+
x[t] = random_state.choice([0,1], p=[prob, 1.-prob])
|
|
277
|
+
y[t] = random_state.choice([0,1, 2], p=[prob, (1.-prob)/2., (1.-prob)/2.])
|
|
278
|
+
|
|
279
|
+
print('start')
|
|
280
|
+
# print(client.dashboard_link)
|
|
281
|
+
# print(cmi.run_test_raw(x, y, z=None))
|
|
282
|
+
print(cmi.run_test_raw(x, y, z=z))
|
|
283
|
+
|
|
284
|
+
# client.close()
|
|
285
|
+
|
|
286
|
+
|