tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
tigramite/rpcmci.py ADDED
@@ -0,0 +1,467 @@
1
+ """Tigramite causal discovery for time series."""
2
+
3
+ # Authors: Elena Saggioro, Sagar Simha, Matthias Bruhns, Jakob Runge <jakob@jakob-runge.com>
4
+ #
5
+ # License: GNU General Public License v3.0
6
+
7
+ from copy import deepcopy
8
+ import numpy as np
9
+ import sklearn
10
+ from joblib import Parallel, delayed
11
+ from ortools.linear_solver import pywraplp
12
+ import traceback
13
+
14
+ from tigramite.independence_tests.parcorr import ParCorr
15
+ from tigramite.data_processing import DataFrame
16
+ from tigramite.models import Prediction
17
+ from tigramite.pcmci import PCMCI
18
+
19
+ class RPCMCI(PCMCI):
20
+ r"""RPCMCI class for extracting causal regimes and the associated graphs from
21
+ time series data.
22
+
23
+ Notes
24
+ -----
25
+ The Regime-PCMCI causal discovery method is described in:
26
+
27
+ Elena Saggioro, Jana de Wiljes, Marlene Kretschmer, Jakob Runge;
28
+ Reconstructing regime-dependent causal relationships from observational
29
+ time series. Chaos 1 November 2020; 30 (11): 113115.
30
+ https://doi.org/10.1063/5.0020538
31
+
32
+ The method iterates between two phases --a regime learning phase
33
+ (optimization-based) and a causal discovery phase (PCMCI)-- to identify
34
+ regime dependent causal relationships. A persistent discrete regime
35
+ variable is assumed that leads to a finite number of regimes within which
36
+ stationarity can be assumed.
37
+
38
+ Parameters
39
+ ----------
40
+ dataframe : data object
41
+ This is the Tigramite dataframe object. It has the attributes
42
+ dataframe.values yielding a numpy array of shape ( observations T,
43
+ variables N). For RPCMCI the mask will be ignored. You may use the
44
+ missing_flag to indicate missing values.
45
+ cond_ind_test : conditional independence test object
46
+ This can be ParCorr or other classes from
47
+ ``tigramite.independence_tests`` or an external test passed as a
48
+ callable. This test can be based on the class
49
+ tigramite.independence_tests.CondIndTest.
50
+ prediction_model : sklearn model object
51
+ For example, sklearn.linear_model.LinearRegression() for a linear
52
+ regression model. This should be consistent with cond_ind_test, ie,
53
+ use ParCorr() with a linear model and, eg, GPDC() with a
54
+ GaussianProcessRegressor model, or CMIknn with NearestNeighbors model.
55
+ seed : int
56
+ Random seed for annealing step.
57
+ verbosity : int, optional (default: -1)
58
+ Verbose levels -1, 0, 1, ...
59
+ """
60
+
61
+ def __init__(self, dataframe, cond_ind_test=None,
62
+ prediction_model=None, seed=None, verbosity=-1):
63
+
64
+ self.verbosity = verbosity
65
+
66
+ self.seed = seed
67
+ if self.seed is None:
68
+ self.seed = np.random.randint(0, 1000)
69
+
70
+ # Set prediction model to be used in optimization
71
+ self.prediction_model = prediction_model
72
+ if self.prediction_model is None:
73
+ self.prediction_model = sklearn.linear_model.LinearRegression()
74
+
75
+ # Set conditional independence test
76
+ if cond_ind_test is None:
77
+ cond_ind_test = ParCorr()
78
+ cond_ind_test.set_mask_type('y')
79
+
80
+ if dataframe.analysis_mode != 'single':
81
+ raise ValueError("Only single time series data allowed for RPCMCI.")
82
+
83
+ if dataframe.has_vector_data:
84
+ raise ValueError("Only scalar data allowed for RPCMCI.")
85
+
86
+
87
+ # Masking is not available in RPCMCI, but missing values can be specified
88
+ dataframe.mask = {0:np.zeros(dataframe.values[0].shape, dtype='bool')}
89
+ self.missing_flag = dataframe.missing_flag
90
+
91
+ # Init base class
92
+ PCMCI.__init__(self, dataframe=dataframe,
93
+ cond_ind_test=cond_ind_test,
94
+ verbosity=0)
95
+
96
+ def run_rpcmci(self,
97
+ num_regimes,
98
+ max_transitions,
99
+ switch_thres=0.05,
100
+ num_iterations=20,
101
+ max_anneal=10,
102
+ tau_min=1,
103
+ tau_max=1,
104
+ pc_alpha=0.2,
105
+ alpha_level=0.01,
106
+ n_jobs=-1,
107
+ ):
108
+
109
+ """Run RPCMCI method for extracting causal regimes and the associated graphs from
110
+ time series data.
111
+
112
+ Parameters
113
+ ----------
114
+ num_regimes : int
115
+ Number of assumed regimes.
116
+ max_transitions : int
117
+ Maximum number of transitions within a single regime (persistency parameter).
118
+ switch_thres : float
119
+ Switch threshold.
120
+ num_iterations : int
121
+ Optimization iterations.
122
+ max_anneal : int
123
+ Maximum annealing runs.
124
+ tau_min : int, optional (default: 0)
125
+ Minimum time lag to test.
126
+ tau_max : int, optional (default: 1)
127
+ Maximum time lag. Must be larger or equal to tau_min.
128
+ pc_alpha : float, optional (default: 0.2)
129
+ Significance level in PCMCI.
130
+ alpha_level : float, optional (default: 0.05)
131
+ Significance level in PCMCI at which the p_matrix is thresholded to
132
+ get graph.
133
+ n_jobs : int, optional (default: -1)
134
+ Number of CPUs to use in joblib parallization. Default n_jobs=-1
135
+ uses all available.
136
+
137
+ Returns
138
+ -------
139
+ regimes : array of shape (n_regimes, T)
140
+ One-hot encoded regime variable.
141
+ causal_results: dictionary
142
+ Contains result of run_pcmci() after convergence.
143
+ diff_g_f : tuple
144
+ Difference between two consecutive optimizations for all annealings and
145
+ the optimal one with minimum objective value (see paper).
146
+ error_free_annealings : int
147
+ Number of annealings that converged without error.
148
+ """
149
+
150
+ count_saved_ann = 0
151
+ # initialize residuals (objective value) of MIP optimize
152
+ objmip_ann = [None] * max_anneal
153
+ parents_ann = [None] * max_anneal
154
+ causal_prediction = [None] * max_anneal
155
+ links_ann = [None] * max_anneal
156
+ gamma_ann = [None] * max_anneal
157
+ diff_g_ann = [None] * max_anneal
158
+ q_break_cycle = 5
159
+
160
+ data = self.dataframe.values[0]
161
+
162
+ def _pcmci(tau_min, tau_max, pc_alpha, alpha_level):
163
+ """Wrapper around running PCMCI."""
164
+ results = self.run_pcmci(tau_min=tau_min, tau_max=tau_max, pc_alpha=pc_alpha, alpha_level=alpha_level)
165
+ graph = results['graph']
166
+ pcmci_parents = self.return_parents_dict(graph=graph, val_matrix=results['val_matrix'])
167
+ return results, graph, pcmci_parents
168
+
169
+ def _optimize_gamma(resid_sq, max_transitions):
170
+ r"""
171
+ Solves the following optimization problem :
172
+
173
+ minimize c * x
174
+
175
+ where c = resid_sq , flattened along num_regimes dimension
176
+ x = Gamma , flattened along num_regimes dimension
177
+
178
+ with Constraints:
179
+ (1) [\sum_{k=1,num_regimes}gamma^k(t) ]= 1
180
+ forall t : uniqueness
181
+ (2) [\sum_{t=1:T-1} | gamma^k(t+1) - gamma^k(t) | ] <= max_transitions
182
+ forall k : persistence
183
+
184
+
185
+ Inputs:
186
+ resid_sq ( np.shape = (num_regimes,T) )
187
+ max_transitions = max number of switchings allowed
188
+
189
+ Returns:
190
+ Gamma_updated ( np.shape = (num_regimes,T) ))
191
+ """
192
+
193
+ num_regimes, T = resid_sq.shape
194
+
195
+ # Create the linear solver with the GLOP backend.
196
+ solver = pywraplp.Solver.CreateSolver("GLOP")
197
+ infinity = solver.infinity()
198
+
199
+ # Define vector of integer variables in the interval [0,1].
200
+ G = [solver.NumVar(0, 1, f"x_{i}") for i in range(num_regimes * T)]
201
+
202
+ # Define eta, auxiliary vars for constr. (2).
203
+ E = [solver.NumVar(0, infinity, f"eta_{i}") for i in range(num_regimes * T - 1)]
204
+ X = G + E
205
+ solver.Minimize(
206
+ sum([resid_sq[k, t] * X[k * T + t] for k in range(num_regimes) for t in range(T)])
207
+ )
208
+
209
+ con_lst = [sum([X[k * T + t] for k in range(num_regimes)]) for t in range(T)]
210
+ for t in range(T):
211
+ solver.Add(con_lst[t] == 1)
212
+
213
+ for k in range(num_regimes):
214
+ for t in range(T - 1):
215
+ # (2.1)
216
+ solver.Add(
217
+ (X[k * T + t + 1] - X[k * T + t] - X[k * T + t + num_regimes * T] <= 0)
218
+ )
219
+ # (2.2)
220
+ solver.Add(
221
+ (
222
+ (
223
+ -1 * X[k * T + t + 1]
224
+ + X[k * T + t]
225
+ - X[k * T + t + num_regimes * T]
226
+ <= 0
227
+ )
228
+ )
229
+ )
230
+ # (2.3)
231
+ solver.Add(
232
+ ((sum([X[k * T + t + num_regimes * T] for t in range(T - 1)]) <= max_transitions))
233
+ )
234
+
235
+ status = solver.Solve()
236
+ if status == pywraplp.Solver.OPTIMAL:
237
+ if self.verbosity > 0:
238
+ print("\nOptimal objective: reached.")
239
+ gamma = np.reshape([g.solution_value() for g in G], (num_regimes, T))
240
+ obj_value = solver.Objective().Value()
241
+ return gamma, obj_value
242
+ else:
243
+ # if self.verbosity > -1:
244
+ # print("The problem does not have an optimal solution. Please change hyperparameters.")
245
+ raise ValueError("The problem does not have an optimal solution. Please change hyperparameters.")
246
+
247
+ def one_annealing_step(a):
248
+ """Executes one annealing step. The random seed is self.seed + a."""
249
+
250
+ if self.verbosity > 0:
251
+ print(f"\n################# Annealing iteration a = {a} ####################\n")
252
+
253
+ T = self.dataframe.T[0]
254
+
255
+ # Initialise gamma_0 as random matrix of 1s and 0s
256
+ random_state = np.random.default_rng(self.seed + a)
257
+ gamma_opt = random_state.uniform(0, 1, size=(num_regimes, T)) # range is [0,1)!
258
+
259
+ parents_opt = {} # [None] * num_regimes
260
+ results_opt = {} # [None] * num_regimes
261
+ links_opt = {} # [None] * num_regimes
262
+ objective_opt = 0
263
+
264
+ # Difference between two consecutive optimizations
265
+ diff_g = []
266
+
267
+ #
268
+ # Iteration over 1. causal discovery and 2. constrained optimization
269
+ #
270
+ error_flag = False
271
+ for q in range(num_iterations):
272
+ if self.verbosity > 0:
273
+ print(f"\n###### Optimization step q = {q}")
274
+
275
+ # Initialize to 0
276
+ residuals = np.zeros((num_regimes, T, self.N))
277
+
278
+ gamma_temp = deepcopy(gamma_opt)
279
+
280
+ #
281
+ # 1. Causal discovery and prediction
282
+ #
283
+
284
+ # Iterate over regimes
285
+ for k in range(num_regimes):
286
+ if self.verbosity > 0:
287
+ print(f"{16 * '#'} Regime k = {k}")
288
+
289
+ # Select sample according to gamma_opt, is a bool vector
290
+ selected_samples_k = (gamma_temp[k, :] > switch_thres)
291
+
292
+ mask_of_k = np.ones(data.shape, dtype="bool")
293
+ mask_of_k[selected_samples_k] = False
294
+
295
+ # df_of_k = pp.DataFrame(data, mask=mask_of_k, missing_flag=self.missing_flag,
296
+ # var_names=self.var_names)
297
+
298
+ # Change mask in dataframe for this step
299
+ self.dataframe.mask[0] = mask_of_k
300
+
301
+ if np.any((mask_of_k == False).sum(axis=0) <= 5):
302
+ error_flag = True
303
+ if self.verbosity > 0:
304
+ print(f"*****Regime with too few samples in annealing a = {a} at iteration q = {q}.*****\n")
305
+ if self.verbosity > 0:
306
+ print("***** Break k-loop of regimes *****\n ")
307
+ break # from k-loop
308
+
309
+ try:
310
+ # cond_ind_test = getattr(self, method)(**method_args)
311
+ # pcmci = PCMCI(dataframe=df_of_k,
312
+ # cond_ind_test=self.cond_ind_test,
313
+ # verbosity=0)
314
+ results_temp, link_temp, parents_temp = _pcmci(
315
+ # pcmci,
316
+ tau_max=int(tau_max),
317
+ pc_alpha=pc_alpha,
318
+ alpha_level=alpha_level,
319
+ tau_min=tau_min,)
320
+ except Exception:
321
+ traceback.print_exc()
322
+ error_flag = True
323
+ print(f"*****Value error in causal discovery for annealing a = {a} at iteration q = {q}.*****\n")
324
+ print("***** Break k-loop of regimes *****\n ")
325
+ break # from k-loop
326
+
327
+ parents_opt[k] = parents_temp
328
+ results_opt[k] = results_temp
329
+ links_opt[k] = link_temp
330
+
331
+ try:
332
+ # Prediction with causal parents
333
+ pred = Prediction(
334
+ dataframe=self.dataframe,
335
+ prediction_model=self.prediction_model,
336
+ data_transform=sklearn.preprocessing.StandardScaler(),
337
+ train_indices=range(T),
338
+ test_indices=range(T),
339
+ verbosity=0,
340
+ )
341
+
342
+ pred.fit(
343
+ target_predictors=parents_temp,
344
+ selected_targets=range(self.N),
345
+ tau_max=int(tau_max),
346
+ )
347
+ # print(parents_temp)
348
+ # Compute the predicted residuals for each variable
349
+ predicted = pred.predict(
350
+ target=list(range(self.N)),
351
+ new_data=DataFrame(data, missing_flag=self.missing_flag)
352
+ )
353
+
354
+ original_data = np.zeros(predicted.shape)
355
+ for target in range(self.N):
356
+ # print(data.shape, predicted.shape, original_data.shape, pred.get_test_array(target).shape, mask_of_k.sum(axis=0))
357
+ # print(pred.get_test_array(target)[0].flatten().std())
358
+ original_data[:, target] = pred.get_test_array(target)[0].flatten()
359
+
360
+ except Exception:
361
+ traceback.print_exc()
362
+ error_flag = True
363
+ print(f"*****Value error in prediction for annealing a = {a} at iteration q = {q}.*****\n")
364
+ print("***** Break k-loop of regimes *****\n ")
365
+ break # from k-loop
366
+
367
+
368
+ # Get residuals
369
+ residuals[k, int(tau_max):, :] = original_data - predicted
370
+ # print(np.abs(residuals[k, int(tau_max):, :]).mean(axis=0))
371
+
372
+ if error_flag:
373
+ if self.verbosity > 0:
374
+ print(f"***** Break q-loop of optimization iterations for Annealing a = {a} at iteration q = {q}."
375
+ " Go to next annealing step. *****\n")
376
+ break
377
+
378
+ #
379
+ # 2. Regime optimization step with side constraints
380
+ #
381
+
382
+ # Comute the resid_sq
383
+ res_sq = np.square(residuals).sum(axis=-1)
384
+ # print(res_sq.shape)
385
+
386
+ try:
387
+ # Optimization
388
+ gamma_opt, objective_opt = _optimize_gamma(res_sq, max_transitions)
389
+
390
+ except Exception:
391
+ traceback.print_exc()
392
+ error_flag = True
393
+ print(f"*****Value error in optimization for annealing a = {a} at iteration q = {q}.*****\n")
394
+ break
395
+
396
+ diff_g.append(np.sum(np.abs(gamma_opt - gamma_temp)))
397
+
398
+ if self.verbosity > 0:
399
+ print(f"Difference in abs value between the previous and current gamma "
400
+ f"(shape num_regimesxT) : {diff_g[q]}")
401
+
402
+ # Break conditions
403
+ if diff_g[-1] == 0:
404
+ if self.verbosity > 0:
405
+ print("Two consecutive gammas are equal: (local) minimum reached. "
406
+ "Go to next annealing.\n")
407
+ break
408
+
409
+ if (q >= q_break_cycle) and (diff_g[-1] <= (2 * num_regimes * T // 100)):
410
+ if self.verbosity > 0:
411
+ print(f"Iteration larger than {q_break_cycle} and two consecutive gammas are too similar. "
412
+ f"Go to next annealing.\n")
413
+ break
414
+
415
+ if error_flag:
416
+ if self.verbosity > 0:
417
+ print(f"*****Annealing a = {a} failed****\n")
418
+
419
+ return None
420
+
421
+ return a, objective_opt, parents_opt, results_opt, links_opt, gamma_opt, diff_g
422
+
423
+ # Parallelizing over annealing steps
424
+ all_results = Parallel(n_jobs=n_jobs)(
425
+ delayed(one_annealing_step)(a) for a in range(max_anneal))
426
+
427
+ # all_results = []
428
+ # for a in range(max_anneal):
429
+ # all_results.append(one_annealing_step(a))
430
+
431
+ error_free_annealings = 0
432
+ for result in all_results:
433
+ if result is not None:
434
+ error_free_annealings += 1
435
+ a, objective_opt, parents_opt, results_opt, links_opt, gamma_opt, diff_g = result
436
+
437
+ # Save annealing results
438
+ objmip_ann[a] = objective_opt
439
+ parents_ann[a] = parents_opt
440
+ causal_prediction[a] = results_opt
441
+ links_ann[a] = links_opt
442
+ gamma_ann[a] = gamma_opt
443
+ diff_g_ann[a] = diff_g
444
+
445
+ if error_free_annealings == 0:
446
+ print("No annealings have converged. Run failed.")
447
+ return None
448
+
449
+ # If annealing values are larger than the default.
450
+ # Can happen for long time series and high dimensionality
451
+ min_obj_val = np.min([a for a in objmip_ann if a is not None])
452
+ i_best = objmip_ann.index(min_obj_val)
453
+
454
+ # Final results based on best
455
+ # parents_f = parents_ann[i_best]
456
+ results_f = causal_prediction[i_best]
457
+ # links_f = links_ann[i_best]
458
+ gamma_f = gamma_ann[i_best]
459
+ # Convergence optimization
460
+ diff_g_f = diff_g_ann, diff_g_ann[i_best]
461
+
462
+ final_results = {'regimes': gamma_f,
463
+ 'causal_results':results_f,
464
+ 'diff_g_f':diff_g_f,
465
+ 'error_free_annealings':error_free_annealings}
466
+
467
+ return final_results
File without changes