tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,403 @@
1
+ """Tigramite causal discovery for time series."""
2
+
3
+ # Author: Tom Hochsprung <tom.hochsprung@dlr.de>, Jakob Runge <jakob@jakob-runge.com>
4
+ #
5
+ # License: GNU General Public License v3.0
6
+
7
+ import numpy as np
8
+ import warnings
9
+
10
+ from scipy.stats import chi2, normaltest
11
+ from sklearn.linear_model import LinearRegression, LogisticRegression
12
+ from sklearn import metrics
13
+ from sklearn.dummy import DummyClassifier
14
+
15
+ import tigramite
16
+ from tigramite.independence_tests.independence_tests_base import CondIndTest
17
+
18
+
19
+ class RegressionCI(CondIndTest):
20
+ r"""Flexible parametric conditional independence tests for continuous, categorical, or mixed data.
21
+
22
+ Asymptotically equivalent to the tests for mixed data suggested in
23
+
24
+ Tsagris, Michail, et al. "Constraint-based causal discovery with mixed
25
+ data." International journal of data science and analytics 6
26
+ (2018): 19-30.
27
+
28
+ For linear regression RegressionCI implements a likelihood ratio test,
29
+ while the above employs a F-statistic. Furthermore, while our
30
+ implementation utilizes the Chi^2 null distribution, theirs uses the
31
+ F-distribution.
32
+
33
+ Assumes one-dimensional X, Y. But can be combined with PairwiseMultCI to
34
+ obtain a test for multivariate X, Y.
35
+
36
+ Notes
37
+ -----
38
+ To test :math:`X \perp Y | Z`, the regressions Y|XZ vs Y|Z, or, depending
39
+ on certain criteria, X|YZ vs X|Z are compared. For that, the notion of
40
+ the deviance is employed. If the fits of the respective regressions do
41
+ not differ significantly (measured using the deviance), the null
42
+ hypotheses of conditional independence is "accepted". This approach
43
+ assumes that X and Y are univariate, and Z can be either empty,
44
+ univariate or multivariate. Moreover, this approach works for all
45
+ combinations of "discrete" and "continuous" X, Y and respective columns
46
+ of Z; depending on the case, linear regression or multinomial regression
47
+ is employed.
48
+
49
+ Assumes one-dimensional X, Y.
50
+
51
+ Parameters
52
+ ----------
53
+ **kwargs :
54
+ Arguments passed on to parent class CondIndTest.
55
+ """
56
+ @property
57
+ def measure(self):
58
+ """
59
+ Concrete property to return the measure of the independence test
60
+ """
61
+ return self._measure
62
+
63
+ def __init__(self,
64
+ **kwargs):
65
+
66
+ # Setup the member variables
67
+ self._measure = 'regression_ci'
68
+ self.two_sided = False
69
+ self.residual_based = False
70
+ self.recycle_residuals = False
71
+
72
+ CondIndTest.__init__(self, **kwargs)
73
+
74
+ def set_dataframe(self, dataframe):
75
+ """Initialize and check the dataframe.
76
+
77
+ Parameters
78
+ ----------
79
+ dataframe : data object
80
+ Set tigramite dataframe object. It must have the attributes
81
+ dataframe.values yielding a numpy array of shape (observations T,
82
+ variables N) and optionally a mask of the same shape and a missing
83
+ values flag.
84
+
85
+ """
86
+ self.dataframe = dataframe
87
+
88
+ if self.mask_type is not None:
89
+ if dataframe.mask is None:
90
+ raise ValueError("mask_type is not None, but no mask in dataframe.")
91
+ dataframe._check_mask(dataframe.mask)
92
+
93
+ if dataframe.data_type is None:
94
+ raise ValueError("data_type cannot be None for RegressionCI.")
95
+ dataframe._check_mask(dataframe.data_type)
96
+
97
+ def get_dependence_measure(self, array, xyz, data_type):
98
+ """Returns test statistic.
99
+
100
+ Parameters
101
+ ----------
102
+ array : array-like
103
+ data array with X, Y, Z in rows and observations in columns.
104
+
105
+ xyz : array of ints
106
+ XYZ identifier array of shape (dim,).
107
+
108
+ data_type : array-like
109
+ array of same shape as array which describes whether samples
110
+ are continuous or discrete: 0s for continuous and
111
+ 1s for discrete
112
+
113
+ Returns
114
+ -------
115
+ val : float
116
+ test estimate.
117
+ """
118
+
119
+ def convert_to_one_hot(data, nb_classes):
120
+ """Convert an iterable of indices to one-hot encoded labels."""
121
+ targets = np.array(data).reshape(-1).tolist()
122
+ # categories need to be mapped to 0, 1, ... in this function
123
+ s = sorted(set(targets))
124
+ targets = [s.index(i) for i in targets]
125
+ targets = np.array(targets)
126
+ return np.eye(nb_classes)[targets]
127
+
128
+ def do_componentwise_one_hot_encoding(X, var_type):
129
+ """A function that one-hot encodes all categorical components of X"""
130
+
131
+ T, dim = X.shape
132
+ X_new = np.empty([T, 0])
133
+ # componentwise dummy-encoding (if necessary, otherwise, keep component as usual):
134
+ for i in range(0, len(var_type)):
135
+ if var_type[i] == 1:
136
+ nb_classes = len(set(X[:, i]))
137
+ X_new = np.hstack((X_new, convert_to_one_hot(X[:, i].astype(int), nb_classes=nb_classes)))
138
+ elif var_type[i] == 0:
139
+ X_new = np.hstack((X_new, X[:, i].reshape((T, 1))))
140
+ else:
141
+ raise ValueError("data_type only allows entries in {0, 1}")
142
+ return X_new
143
+
144
+ def calc_deviance_logistic(X, y, var_type):
145
+ """Calculates the deviance (i.e., 2 * log-likelihood) for a multinomial logistic regression
146
+ (with standard regression assumptions)
147
+ """
148
+
149
+ # 1-hot-encode all categorical columns
150
+ X = do_componentwise_one_hot_encoding(X, var_type=var_type)
151
+ y = np.ravel(y).astype('int')
152
+ # do logistic regression, if y only contains one class, return zero.
153
+ if len(np.unique(y)) < 2:
154
+ model = DummyClassifier(strategy="constant", constant=y[0])
155
+ model.fit(X, y)
156
+ deviance = 0.
157
+ warnings.warn("Constant array detected, CI test ill-defined!")
158
+ else:
159
+ model = LogisticRegression(solver='newton-cholesky') # was 'lbfgs' before
160
+ model.fit(X, y)
161
+ deviance = 2 * metrics.log_loss(y, model.predict_proba(X), normalize=False)
162
+
163
+ # dofs: +2 for intercept (+1) (not too important, cancels out later anyway)
164
+ dof = model.n_features_in_ + 1
165
+
166
+ return deviance, dof
167
+
168
+ def calc_deviance_linear(X, y, var_type):
169
+ """Calculates the deviance (i.e., 2 * log-likelihood) for a linear regression
170
+ (with standard regression assumptions
171
+ """
172
+
173
+ n, p = X.shape # p is not important for later
174
+ # 1-hot-encode all categorical columns
175
+ X = do_componentwise_one_hot_encoding(X, var_type = var_type)
176
+ y = np.ravel(y)
177
+ if y.std() == 0.:
178
+ warnings.warn("Constant array detected, CI test ill-defined!")
179
+ # do linear regression
180
+ model = LinearRegression()
181
+ model.fit(X, y)
182
+ # predictions based on fitted model
183
+ preds = model.predict(X)
184
+ # residual sum of squares
185
+ rss = np.sum(np.power((preds - y), 2))
186
+ # deviance (only the term with the rss-term is important, the rest cancels out later anyway)
187
+ # deviance is calculated as -2*log-likelihood
188
+ deviance = n * np.log(2 * np.pi) + n * np.log(rss / n) + n
189
+ # dofs: +2 for intercept (+1) (not too important, cancels out later anyway)
190
+ dof = model.n_features_in_ + 1
191
+ return deviance, dof
192
+
193
+ def entropy(series):
194
+ value, counts = np.unique(series, return_counts=True)
195
+ norm_counts = counts / counts.sum()
196
+ return -(norm_counts * np.log(norm_counts)).sum()
197
+
198
+ x_indices = np.where(xyz == 0)[0]
199
+ y_indices = np.where(xyz == 1)[0]
200
+ z_indices = np.where(xyz == 2)[0]
201
+
202
+ x = array[x_indices].T
203
+ y = array[y_indices].T
204
+
205
+ x_type = data_type[x_indices]
206
+ y_type = data_type[y_indices]
207
+
208
+ if len(z_indices) == 0:
209
+ z = np.ones((array.shape[1], 1))
210
+ z_type = [0]
211
+ else:
212
+ z = array[z_indices].T
213
+ z_type = data_type[z_indices]
214
+ z_type = z_type.max(axis=1)
215
+
216
+ # check, whether within X and within Y all datapoints have the same datatype
217
+ if ((x_type.max() != x_type.min()) or (y_type.max() != y_type.min())):
218
+ raise ValueError("All samples regarding X or respectively Y must have the same datatype")
219
+
220
+ x_type = x_type.max()
221
+ y_type = y_type.max()
222
+
223
+ # if z was (originally) None, then just an intercept is fitted ...
224
+ # Now, different cases for X discrete/continuous and Y discrete/continuous
225
+
226
+ # Case 1: X continuous, Y continuous
227
+ if (x_type == 0) and (y_type == 0):
228
+ # Use the more normal variable as dependent variable TODO: makes sense?
229
+ if normaltest(x)[0] >= normaltest(y)[0]:
230
+ dep_var = y
231
+ rest = np.hstack((x, z))
232
+ rest_type = np.hstack((x_type, z_type))
233
+ else:
234
+ dep_var = x
235
+ rest = np.hstack((y, z))
236
+ rest_type = np.hstack((y_type, z_type))
237
+
238
+ # Fit Y | Z
239
+ dev1, dof1 = calc_deviance_linear(z, dep_var, var_type = z_type)
240
+ # Fit Y | ZX
241
+ dev2, dof2 = calc_deviance_linear(rest, dep_var, var_type=rest_type)
242
+ # print(dev1, dev2, np.abs(dev1 - dev2))
243
+
244
+ # Case 2: X discrete, Y continuous
245
+ elif (x_type == 1) and (y_type == 0):
246
+ xz = np.hstack((x, z))
247
+ # Fit Y | Z
248
+ dev1, dof1 = calc_deviance_linear(z, y, var_type = z_type)
249
+ # Fit Y | XZ
250
+ dev2, dof2 = calc_deviance_linear(xz, y, var_type = np.hstack((x_type, z_type)))
251
+
252
+ # Case 3: X continuous, Y discrete
253
+ elif (x_type == 0) and (y_type == 1):
254
+ yz = np.hstack((y, z))
255
+ # Fit X | Z
256
+ dev1, dof1 = calc_deviance_linear(z, x, var_type = z_type)
257
+ # Fit X | YZ
258
+ dev2, dof2 = calc_deviance_linear(yz, x, var_type = np.hstack((y_type, z_type)))
259
+
260
+ # Case 4: X discrete, Y discrete
261
+ elif (x_type == 1) and (y_type == 1):
262
+ # Use the variable with smaller entropy as dependent variable TODO: makes sense?
263
+ if entropy(x) >= entropy(y):
264
+ dep_var = y
265
+ rest = np.hstack((x, z))
266
+ rest_type = np.hstack((x_type, z_type))
267
+ else:
268
+ dep_var = x
269
+ rest = np.hstack((y, z))
270
+ rest_type = np.hstack((y_type, z_type))
271
+ # xz = np.hstack((x, z))
272
+ # Fit Y | Z
273
+ dev1, dof1 = calc_deviance_logistic(z, dep_var, var_type = z_type)
274
+ # Fit Y | XZ
275
+ dev2, dof2 = calc_deviance_logistic(rest, dep_var, var_type=rest_type)
276
+
277
+ # calculate the difference between the deviance for the smaller and for the larger model
278
+ # (i.e., the actual deviance)
279
+ stat = dev1 - dev2
280
+ dof = dof2 - dof1
281
+
282
+ self._temp_dof = dof
283
+ return stat
284
+
285
+ def get_analytic_significance(self, value, T, dim, xyz):
286
+ """Return the p_value of test statistic.
287
+
288
+ According to a chi-square distribution with 'dof' degrees of freedom.
289
+
290
+ """
291
+
292
+ # Calculate the p_value
293
+ p_value = chi2.sf(value, self._temp_dof)
294
+ del self._temp_dof
295
+
296
+ return p_value
297
+
298
+
299
+ if __name__ == '__main__':
300
+
301
+
302
+ import pandas as pd
303
+ import numpy as np
304
+ import matplotlib.pyplot as plt
305
+ from tigramite.pcmci import PCMCI
306
+ # from tigramite.independence_tests.regressionCI import RegressionCI
307
+ import tigramite.plotting as tp
308
+ import tigramite.data_processing as pp
309
+
310
+ import tigramite
311
+ from tigramite.data_processing import DataFrame
312
+ import tigramite.data_processing as pp
313
+ import numpy as np
314
+
315
+ seed=43
316
+ random_state = np.random.default_rng(seed=seed)
317
+ ci = RegressionCI()
318
+
319
+ T = 100
320
+
321
+ reals = 100
322
+ rate = np.zeros(reals)
323
+
324
+ x_example = "discrete"
325
+ y_example = "discrete"
326
+ dimz = 1
327
+ # z_example = ["discrete", "continuous"]
328
+ z_example = ["continuous"] #, "discrete"]
329
+ # z_example = None
330
+ rate = np.zeros(reals)
331
+ for i in range(reals):
332
+ if (dimz > 0):
333
+ z = np.zeros((T, dimz))
334
+ for k in range(0, len(z_example)):
335
+ if z_example[k] == "discrete":
336
+ z[:, k] = random_state.binomial(n=1, p=0.5, size=T)
337
+ else:
338
+ z[:, k] = random_state.uniform(low = 0, high = 1, size=T)
339
+ else:
340
+ z = None
341
+ x = np.empty(T).reshape(T, 1)
342
+ y = np.empty(T).reshape(T, 1)
343
+ for t in range(T):
344
+ if dimz > 0:
345
+ if z_example[0] == "discrete":
346
+ val = z[t, 0].squeeze()
347
+ prob = 0.2 + val * 0.6
348
+ else:
349
+ prob = z[t, 0].squeeze()
350
+ else:
351
+ prob = 0.2
352
+ if x_example == "discrete":
353
+ x[t] = random_state.choice([0, 1], p=[prob, 1. - prob])
354
+ else:
355
+ x[t] = 0.1*random_state.random() # np.random.uniform(prob, 1) #np.random.normal(prob, 1)
356
+ if y_example == "discrete":
357
+ y[t] = random_state.choice([0, 1], p=[prob, (1. - prob)]) # + x[t]
358
+ else:
359
+ y[t] = random_state.normal(prob, 1) + 0.5*x[t]
360
+
361
+ # # Continuous data
362
+ # z = np.random.randn(T, dimz)
363
+ # x = (0.5*z[:,0] + np.random.randn(T)).reshape(T, 1)
364
+ # y = (0.5*z[:,0] + np.random.randn(T)).reshape(T, 1) #+ 2*x
365
+
366
+ if x_example == "discrete":
367
+ x_type = np.ones(T)
368
+ else:
369
+ x_type = np.zeros(T)
370
+ if y_example == "discrete":
371
+ y_type = np.ones(T)
372
+ else:
373
+ y_type = np.zeros(T)
374
+ if dimz > 0:
375
+ z_type = np.zeros((T, dimz))
376
+ for j in range(0, len(z_example)):
377
+ if z_example[j] == "discrete":
378
+ z_type[:, j] = np.ones(T)
379
+ else:
380
+ z_type[:, j] = np.zeros(T)
381
+ else:
382
+ z_type = None
383
+
384
+ val, pval = ci.run_test_raw(x, y, z=z, x_type=x_type, y_type=y_type, z_type=z_type)
385
+ rate[i] = pval
386
+
387
+ # data = np.hstack((x, y, z))
388
+ # data_type = np.zeros(data.shape)
389
+ # data_type[:, 0] = x_example == "discrete"
390
+ # data_type[:, 1] = y_example == "discrete"
391
+ # data_type[:, 2] = z_example == "discrete"
392
+ # data_type = data_type.astype('int')
393
+ # # print(data_type)
394
+ # dataframe = pp.DataFrame(data=data, data_type=data_type)
395
+ # ci.set_dataframe(dataframe)
396
+
397
+ # val, pval = ci.run_test(X=[(0, 0)], Y=[(1, 0)], Z=[(2, 0)])
398
+ # rate[i] = pval
399
+
400
+ print((rate <= 0.05).mean())
401
+
402
+
403
+ # dummy