tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1218 @@
1
+ """Tigramite causal discovery for time series."""
2
+
3
+ # Author: Jakob Runge <jakob@jakob-runge.com>
4
+ #
5
+ # License: GNU General Public License v3.0
6
+
7
+ from __future__ import print_function
8
+ import warnings
9
+ import itertools
10
+ from collections import defaultdict
11
+ from copy import deepcopy
12
+ import numpy as np
13
+ import scipy.stats
14
+ import math
15
+ from joblib import Parallel, delayed
16
+
17
+ class PCMCIbase():
18
+ r"""PCMCI base class.
19
+
20
+ Parameters
21
+ ----------
22
+ dataframe : data object
23
+ This is the Tigramite dataframe object. Among others, it has the
24
+ attributes dataframe.values yielding a numpy array of shape (
25
+ observations T, variables N) and optionally a mask of the same shape.
26
+ cond_ind_test : conditional independence test object
27
+ This can be ParCorr or other classes from
28
+ ``tigramite.independence_tests`` or an external test passed as a
29
+ callable. This test can be based on the class
30
+ tigramite.independence_tests.CondIndTest.
31
+ verbosity : int, optional (default: 0)
32
+ Verbose levels 0, 1, ...
33
+
34
+ Attributes
35
+ ----------
36
+ all_parents : dictionary
37
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
38
+ the conditioning-parents estimated with PC algorithm.
39
+ val_min : dictionary
40
+ Dictionary of form val_min[j][(i, -tau)] = float
41
+ containing the minimum test statistic value for each link estimated in
42
+ the PC algorithm.
43
+ pval_max : dictionary
44
+ Dictionary of form pval_max[j][(i, -tau)] = float containing the maximum
45
+ p-value for each link estimated in the PC algorithm.
46
+ iterations : dictionary
47
+ Dictionary containing further information on algorithm steps.
48
+ N : int
49
+ Number of variables.
50
+ T : dict
51
+ Time series sample length of dataset(s).
52
+ """
53
+
54
+ def __init__(self, dataframe,
55
+ cond_ind_test,
56
+ verbosity=0):
57
+ # Set the data for this iteration of the algorithm
58
+ self.dataframe = dataframe
59
+ # Set the conditional independence test to be used
60
+ self.cond_ind_test = deepcopy(cond_ind_test)
61
+ if isinstance(self.cond_ind_test, type):
62
+ raise ValueError("PCMCI requires that cond_ind_test "
63
+ "is instantiated, e.g. cond_ind_test = "
64
+ "ParCorr().")
65
+ self.cond_ind_test.set_dataframe(self.dataframe)
66
+ # Set the verbosity for debugging/logging messages
67
+ self.verbosity = verbosity
68
+ # Set the variable names
69
+ self.var_names = self.dataframe.var_names
70
+
71
+ # Store the shape of the data in the T and N variables
72
+ self.T = self.dataframe.T
73
+ self.N = self.dataframe.N
74
+
75
+
76
+ def _reverse_link(self, link):
77
+ """Reverse a given link, taking care to replace > with < and vice versa."""
78
+
79
+ if link == "":
80
+ return ""
81
+
82
+ if link[2] == ">":
83
+ left_mark = "<"
84
+ else:
85
+ left_mark = link[2]
86
+
87
+ if link[0] == "<":
88
+ right_mark = ">"
89
+ else:
90
+ right_mark = link[0]
91
+
92
+ return left_mark + link[1] + right_mark
93
+
94
+ def _check_cyclic(self, link_dict):
95
+ """Return True if the link_dict has a contemporaneous cycle.
96
+
97
+ """
98
+
99
+ path = set()
100
+ visited = set()
101
+
102
+ def visit(vertex):
103
+ if vertex in visited:
104
+ return False
105
+ visited.add(vertex)
106
+ path.add(vertex)
107
+ for itaui in link_dict.get(vertex, ()):
108
+ i, taui = itaui
109
+ link_type = link_dict[vertex][itaui]
110
+ if taui == 0 and link_type in ['-->', '-?>']:
111
+ if i in path or visit(i):
112
+ return True
113
+ path.remove(vertex)
114
+ return False
115
+
116
+ return any(visit(v) for v in link_dict)
117
+
118
+ def _set_link_assumptions(self, link_assumptions, tau_min, tau_max,
119
+ remove_contemp=False):
120
+ """Helper function to set and check the link_assumptions argument
121
+
122
+ Parameters
123
+ ----------
124
+ link_assumptions : dict
125
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
126
+ assumptions about links. This initializes the graph with entries
127
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
128
+ implies that a directed link from i to j at lag 0 must exist.
129
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
130
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
131
+ may not exist, but if it exists, its orientation is '-->'. Link
132
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
133
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
134
+ does not appear in the dictionary, it is assumed absent. That is,
135
+ if link_assumptions is not None, then all links have to be specified
136
+ or the links are assumed absent.
137
+ tau_mix : int
138
+ Minimum time delay to test.
139
+ tau_max : int
140
+ Maximum time delay to test.
141
+ remove_contemp : bool
142
+ Whether contemporaneous links (at lag zero) should be removed.
143
+
144
+ Returns
145
+ -------
146
+ link_assumptions : dict
147
+ Cleaned links.
148
+ """
149
+ # Copy and pass into the function
150
+ # Shallow nested copy suffices: keys are tuples (immutable), values are strings (immutable)
151
+ _int_link_assumptions = {j: dict(link_assumptions[j]) for j in link_assumptions} if link_assumptions is not None else None
152
+ # Set the default selected links if none are set
153
+ _vars = list(range(self.N))
154
+ _lags = list(range(-(tau_max), -tau_min + 1, 1))
155
+ if _int_link_assumptions is None:
156
+ _int_link_assumptions = {}
157
+ # Set the default as all combinations
158
+ for j in _vars:
159
+ _int_link_assumptions[j] = {}
160
+ for i in _vars:
161
+ for lag in range(tau_min, tau_max + 1):
162
+ if not (i == j and lag == 0):
163
+ if lag == 0:
164
+ _int_link_assumptions[j][(i, 0)] = 'o?o'
165
+ else:
166
+ _int_link_assumptions[j][(i, -lag)] = '-?>'
167
+
168
+ else:
169
+
170
+ if remove_contemp:
171
+ for j in _int_link_assumptions.keys():
172
+ _int_link_assumptions[j] = {link:_int_link_assumptions[j][link]
173
+ for link in _int_link_assumptions[j]
174
+ if link[1] != 0}
175
+
176
+ # Make contemporaneous assumptions consistent and orient lagged links
177
+ for j in _vars:
178
+ for link in _int_link_assumptions[j]:
179
+ i, tau = link
180
+ link_type = _int_link_assumptions[j][link]
181
+ if tau == 0:
182
+ if (j, 0) in _int_link_assumptions[i]:
183
+ if _int_link_assumptions[j][link] != self._reverse_link(_int_link_assumptions[i][(j, 0)]):
184
+ raise ValueError("Inconsistent link assumptions for indices %d - %d " %(i, j))
185
+ else:
186
+ _int_link_assumptions[i][(j, 0)] = self._reverse_link(_int_link_assumptions[j][link])
187
+ else:
188
+ # Orient lagged links by time order while leaving the middle mark
189
+ new_link_type = '-' + link_type[1] + '>'
190
+ _int_link_assumptions[j][link] = new_link_type
191
+
192
+ # Otherwise, check that our assumpions are sane
193
+ # Check that the link_assumptions refer to links that are inside the
194
+ # data range and types
195
+ _key_set = set(_int_link_assumptions.keys())
196
+ valid_entries = _key_set == set(range(self.N))
197
+
198
+ valid_types = [
199
+ 'o-o',
200
+ 'o?o',
201
+ '-->',
202
+ '-?>',
203
+ '<--',
204
+ '<?-',
205
+ ]
206
+
207
+ for links in _int_link_assumptions.values():
208
+ if isinstance(links, dict) and len(links) == 0:
209
+ continue
210
+ for var, lag in links:
211
+ if var not in _vars or lag not in _lags:
212
+ valid_entries = False
213
+ if links[(var, lag)] not in valid_types:
214
+ valid_entries = False
215
+
216
+
217
+ if not valid_entries:
218
+ raise ValueError("link_assumptions"
219
+ " must be dictionary with keys for all [0,...,N-1]"
220
+ " variables and contain only links from "
221
+ "these variables in range [tau_min, tau_max] "
222
+ "and with link types in %s" %str(valid_types))
223
+
224
+ # Check for contemporaneous cycles
225
+ if self._check_cyclic(_int_link_assumptions):
226
+ raise ValueError("link_assumptions has contemporaneous cycle(s).")
227
+
228
+ # Return the _int_link_assumptions
229
+ return _int_link_assumptions
230
+
231
+ def _dict_to_matrix(self, val_dict, tau_max, n_vars, default=1):
232
+ """Helper function to convert dictionary to matrix format.
233
+
234
+ Parameters
235
+ ---------
236
+ val_dict : dict
237
+ Dictionary of form {0:{(0, -1):float, ...}, 1:{...}, ...}.
238
+ tau_max : int
239
+ Maximum lag.
240
+ n_vars : int
241
+ Number of variables.
242
+ default : int
243
+ Default value for entries not part of val_dict.
244
+
245
+ Returns
246
+ -------
247
+ matrix : array of shape (N, N, tau_max+1)
248
+ Matrix format of p-values and test statistic values.
249
+ """
250
+ matrix = np.ones((n_vars, n_vars, tau_max + 1))
251
+ matrix *= default
252
+
253
+ for j in val_dict.keys():
254
+ for link in val_dict[j].keys():
255
+ k, tau = link
256
+ if tau == 0:
257
+ matrix[k, j, 0] = matrix[j, k, 0] = val_dict[j][link]
258
+ else:
259
+ matrix[k, j, abs(tau)] = val_dict[j][link]
260
+ return matrix
261
+
262
+
263
+ def get_corrected_pvalues(self, p_matrix,
264
+ fdr_method='fdr_bh',
265
+ exclude_contemporaneous=True,
266
+ tau_min=0,
267
+ tau_max=1,
268
+ link_assumptions=None,
269
+ ):
270
+ """Returns p-values corrected for multiple testing.
271
+
272
+ Currently implemented is Benjamini-Hochberg False Discovery Rate
273
+ method. Correction is performed either among all links if
274
+ exclude_contemporaneous==False, or only among lagged links.
275
+
276
+ Parameters
277
+ ----------
278
+ p_matrix : array-like
279
+ Matrix of p-values. Must be of shape (N, N, tau_max + 1).
280
+ tau_min : int, default: 0
281
+ Minimum time lag. Only used as consistency check of link_assumptions.
282
+ tau_max : int, default: 1
283
+ Maximum time lag. Must be larger or equal to tau_min. Only used as
284
+ consistency check of link_assumptions.
285
+ link_assumptions : dict or None
286
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
287
+ assumptions about links. This initializes the graph with entries
288
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
289
+ implies that a directed link from i to j at lag 0 must exist.
290
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
291
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
292
+ may not exist, but if it exists, its orientation is '-->'. Link
293
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
294
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
295
+ does not appear in the dictionary, it is assumed absent. That is,
296
+ if link_assumptions is not None, then all links have to be specified
297
+ or the links are assumed absent.
298
+ fdr_method : str, optional (default: 'fdr_bh')
299
+ Correction method, currently implemented is Benjamini-Hochberg
300
+ False Discovery Rate method.
301
+ exclude_contemporaneous : bool, optional (default: True)
302
+ Whether to include contemporaneous links in correction.
303
+
304
+ Returns
305
+ -------
306
+ q_matrix : array-like
307
+ Matrix of shape (N, N, tau_max + 1) containing corrected p-values.
308
+ """
309
+
310
+ def _ecdf(x):
311
+ """No frills empirical cdf used in fdr correction.
312
+ """
313
+ nobs = len(x)
314
+ return np.arange(1, nobs + 1) / float(nobs)
315
+
316
+ # Get the shape parameters from the p_matrix
317
+ _, N, tau_max_plusone = p_matrix.shape
318
+ # Check the limits on tau
319
+ self._check_tau_limits(tau_min, tau_max)
320
+ # Include only link_assumptions if given
321
+ if link_assumptions != None:
322
+ # Create a mask for these values
323
+ mask = np.zeros((N, N, tau_max_plusone), dtype='bool')
324
+ _int_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max)
325
+ for j, links_ in _int_link_assumptions.items():
326
+ for link in links_:
327
+ i, lag = link
328
+ if _int_link_assumptions[j][link] not in ["<--", "<?-"]:
329
+ mask[i, j, abs(lag)] = True
330
+ else:
331
+ # Create a mask for these values
332
+ mask = np.ones((N, N, tau_max_plusone), dtype='bool')
333
+ # Ignore values from lag-zero 'autocorrelation' indices
334
+ mask[range(N), range(N), 0] = False
335
+ # Exclude all contemporaneous values if requested
336
+ if exclude_contemporaneous:
337
+ mask[:, :, 0] = False
338
+ # Create the return value
339
+ q_matrix = np.array(p_matrix)
340
+ # Use the multiple tests function
341
+ if fdr_method is None or fdr_method == 'none':
342
+ pass
343
+ elif fdr_method == 'fdr_bh':
344
+ pvs = p_matrix[mask]
345
+ pvals_sortind = np.argsort(pvs)
346
+ pvals_sorted = np.take(pvs, pvals_sortind)
347
+
348
+ ecdffactor = _ecdf(pvals_sorted)
349
+
350
+ pvals_corrected_raw = pvals_sorted / ecdffactor
351
+ pvals_corrected = np.minimum.accumulate(
352
+ pvals_corrected_raw[::-1])[::-1]
353
+ del pvals_corrected_raw
354
+
355
+ pvals_corrected[pvals_corrected > 1] = 1
356
+ pvals_corrected_ = np.empty_like(pvals_corrected)
357
+ pvals_corrected_[pvals_sortind] = pvals_corrected
358
+ del pvals_corrected
359
+
360
+ q_matrix[mask] = pvals_corrected_
361
+
362
+ else:
363
+ raise ValueError('Only FDR method fdr_bh implemented')
364
+
365
+ # Return the new matrix
366
+ return q_matrix
367
+
368
+
369
+ def _get_adj_time_series(self, graph, include_conflicts=True, sort_by=None):
370
+ """Helper function that returns dictionary of adjacencies from graph.
371
+
372
+ Parameters
373
+ ----------
374
+ graph : array of shape [N, N, tau_max+1]
375
+ Resulting causal graph, see description above for interpretation.
376
+ include_conflicts : bool, optional (default: True)
377
+ Whether conflicting links (marked as 2 in graph) should be returned.
378
+ sort_by : dict or none, optional (default: None)
379
+ If not None, the adjacencies are sorted by the absolute values of
380
+ the corresponding entries.
381
+
382
+ Returns
383
+ -------
384
+ adjt : dictionary
385
+ Adjacency dictionary.
386
+ """
387
+ N, N, tau_max_plusone = graph.shape
388
+ adjt = {}
389
+ if include_conflicts:
390
+ for j in range(N):
391
+ where = np.where(graph[:, j, :] != "")
392
+ adjt[j] = list(zip(*(where[0], -where[1])))
393
+ else:
394
+ for j in range(N):
395
+ where = np.where(np.logical_and.reduce((graph[:,j,:] != "",
396
+ graph[:,j,:] != "x-x",
397
+ graph[:,j,:] != "x?x")))
398
+ # where = np.where(graph[:, j, :] == 1)
399
+ adjt[j] = list(zip(*(where[0], -where[1])))
400
+
401
+ if sort_by is not None:
402
+ for j in range(N):
403
+ # Get the absolute value for all the test statistics
404
+ abs_values = {k: np.abs(sort_by[j][k]) for k in list(sort_by[j])
405
+ if k in adjt[j]}
406
+ adjt[j] = sorted(abs_values, key=abs_values.get, reverse=True)
407
+
408
+ return adjt
409
+
410
+ def _get_adj_time_series_contemp(self, graph, include_conflicts=True,
411
+ sort_by=None):
412
+ """Helper function that returns dictionary of contemporaneous
413
+ adjacencies from graph.
414
+
415
+ Parameters
416
+ ----------
417
+ graph : array of shape [N, N, tau_max+1]
418
+ Resulting causal graph, see description above for interpretation.
419
+ include_conflicts : bool, optional (default: True)
420
+ Whether conflicting links (marked as 2 in graph) should be returned.
421
+ sort_by : dict or none, optional (default: None)
422
+ If not None, the adjacencies are sorted by the absolute values of
423
+ the corresponding entries.
424
+
425
+ Returns
426
+ -------
427
+ adjt : dictionary
428
+ Contemporaneous adjacency dictionary.
429
+ """
430
+ N, N, tau_max_plusone = graph.shape
431
+ adjt = self._get_adj_time_series(graph,
432
+ include_conflicts=include_conflicts,
433
+ sort_by=sort_by)
434
+ for j in range(N):
435
+ adjt[j] = [a for a in adjt[j] if a[1] == 0]
436
+ # adjt[j] = list(np.where(graph[:,j,0] != 0)[0])
437
+
438
+ return adjt
439
+
440
+
441
+ def _get_simplicial_node(self, circle_cpdag, variable_order):
442
+ """Find simplicial nodes in circle component CPDAG.
443
+
444
+ A vertex V is simplicial if all vertices adjacent to V are also adjacent
445
+ to each other (form a clique).
446
+
447
+ Parameters
448
+ ----------
449
+ circle_cpdag : array of shape (N, N, tau_max+1)
450
+ Circle component of PCMCIplus graph.
451
+ variable_order : list of length N
452
+ Order of variables in which to search for simplicial nodes.
453
+
454
+ Returns
455
+ -------
456
+ (j, adj_j) or None
457
+ First found simplicial node and its adjacencies.
458
+ """
459
+
460
+ for j in variable_order:
461
+ adj_j = np.where(np.logical_or(circle_cpdag[:,j,0] == "o-o",
462
+ circle_cpdag[:,j,0] == "o?o"))[0].tolist()
463
+
464
+ # Make sure the node has any adjacencies
465
+ all_adjacent = len(adj_j) > 0
466
+
467
+ # If it has just one adjacency, it's also simplicial
468
+ if len(adj_j) == 1:
469
+ return (j, adj_j)
470
+ else:
471
+ for (var1, var2) in itertools.combinations(adj_j, 2):
472
+ if circle_cpdag[var1, var2, 0] == "":
473
+ all_adjacent = False
474
+ break
475
+
476
+ if all_adjacent:
477
+ return (j, adj_j)
478
+
479
+ return None
480
+
481
+ def _get_dag_from_cpdag(self, cpdag_graph, variable_order):
482
+ """Yields one member of the Markov equivalence class of a CPDAG.
483
+
484
+ Removes conflicting edges.
485
+
486
+ Used in PCMCI to run model selection on the output of PCMCIplus in order
487
+ to, e.g., optimize pc_alpha.
488
+
489
+ Based on Zhang 2008, Theorem 2 (simplified for CPDAGs): Let H be the
490
+ graph resulting from the following procedure applied to a CPDAG:
491
+
492
+ Consider the circle component of the CPDAG (sub graph consisting of all
493
+ (o-o edges, i.e., only for contemporaneous links), CPDAG^C and turn into
494
+ a DAG with no unshielded colliders. Then (H is a member of the Markov
495
+ equivalence class of the CPDAG.
496
+
497
+ We use the approach mentioned in Colombo and Maathuis (2015) Lemma 7.6:
498
+ First note that CPDAG^C is chordal, that is, any cycle of length four or
499
+ more has a chord, which is an edge joining two vertices that are not
500
+ adjacent in the cycle; see the proof of Lemma 4.1 of Zhang (2008b). Any
501
+ chordal graph with more than one vertex has two simplicial vertices,
502
+ that is, vertices V such that all vertices adjacent to V are also
503
+ adjacent to each other. We choose such a vertex V1 and orient any edges
504
+ incident to V1 into V1. Since V1 is simplicial, this does not create
505
+ unshielded colliders. We then remove V1 and these edges from the graph.
506
+ The resulting graph is again chordal and therefore again has at least
507
+ two simplicial vertices. Choose such a vertex V2 , and orient any edges
508
+ incident to V2 into V2. We continue this procedure until all edges are
509
+ oriented. The resulting ordering is called a perfect elimination scheme
510
+ for CPDAG^C. Then the combined graph with the directed edges already
511
+ contained in the CPDAG is returned.
512
+
513
+ Parameters
514
+ ----------
515
+ cpdag_graph : array of shape (N, N, tau_max+1)
516
+ Result of PCMCIplus, a CPDAG.
517
+ variable_order : list of length N
518
+ Order of variables in which to search for simplicial nodes.
519
+
520
+ Returns
521
+ -------
522
+ dag : array of shape (N, N, tau_max+1)
523
+ One member of the Markov equivalence class of the CPDAG.
524
+ """
525
+
526
+ # TODO: Check whether CPDAG is chordal
527
+
528
+ # Initialize resulting MAG
529
+ dag = np.copy(cpdag_graph)
530
+
531
+ # Turn circle component CPDAG^C into a DAG with no unshielded colliders.
532
+ circle_cpdag = np.copy(cpdag_graph)
533
+ # All lagged links are directed by time, remove them here
534
+ circle_cpdag[:,:,1:] = ""
535
+ # Also remove conflicting links
536
+ circle_cpdag[circle_cpdag=="x-x"] = ""
537
+ # Find undirected links, remove directed links
538
+ for i, j, tau in zip(*np.where(circle_cpdag != "")):
539
+ if circle_cpdag[i,j,0][1] == '?':
540
+ raise ValueError("Invalid middle mark.")
541
+ if circle_cpdag[i,j,0] == "-->":
542
+ circle_cpdag[i,j,0] = ""
543
+
544
+ # Iterate through simplicial nodes
545
+ simplicial_node = self._get_simplicial_node(circle_cpdag,
546
+ variable_order)
547
+ while simplicial_node is not None:
548
+
549
+ # Choose such a vertex V1 and orient any edges incident to V1 into
550
+ # V1 in the MAG And remove V1 and these edges from the circle
551
+ # component PAG
552
+ (j, adj_j) = simplicial_node
553
+ for var in adj_j:
554
+ dag[var, j, 0] = "-->"
555
+ dag[j, var, 0] = "<--"
556
+ circle_cpdag[var, j, 0] = circle_cpdag[j, var, 0] = ""
557
+
558
+ # Iterate
559
+ simplicial_node = self._get_simplicial_node(circle_cpdag,
560
+ variable_order)
561
+
562
+ return dag
563
+
564
+ def convert_to_string_graph(self, graph_bool):
565
+ """Converts the 0,1-based graph returned by PCMCI to a string array
566
+ with links '-->'.
567
+
568
+ Parameters
569
+ ----------
570
+ graph_bool : array
571
+ 0,1-based graph array output by PCMCI.
572
+
573
+ Returns
574
+ -------
575
+ graph : array
576
+ graph as string array with links '-->'.
577
+ """
578
+
579
+ graph = np.zeros(graph_bool.shape, dtype='<U3')
580
+ graph[:] = ""
581
+ # Lagged links
582
+ graph[:,:,1:][graph_bool[:,:,1:]==1] = "-->"
583
+ # Unoriented contemporaneous links
584
+ graph[:,:,0][np.logical_and(graph_bool[:,:,0]==1,
585
+ graph_bool[:,:,0].T==1)] = "o-o"
586
+ # Conflicting contemporaneous links
587
+ graph[:,:,0][np.logical_and(graph_bool[:,:,0]==2,
588
+ graph_bool[:,:,0].T==2)] = "x-x"
589
+ # Directed contemporaneous links
590
+ for (i,j) in zip(*np.where(
591
+ np.logical_and(graph_bool[:,:,0]==1, graph_bool[:,:,0].T==0))):
592
+ graph[i,j,0] = "-->"
593
+ graph[j,i,0] = "<--"
594
+
595
+ return graph
596
+
597
+ def symmetrize_p_and_val_matrix(self, p_matrix, val_matrix, link_assumptions, conf_matrix=None):
598
+ """Symmetrizes the p_matrix, val_matrix, and conf_matrix based on link_assumptions
599
+ and the larger p-value.
600
+
601
+ Parameters
602
+ ----------
603
+ val_matrix : array of shape [N, N, tau_max+1]
604
+ Estimated matrix of test statistic values.
605
+ p_matrix : array of shape [N, N, tau_max+1]
606
+ Estimated matrix of p-values. Set to 1 if val_only=True.
607
+ conf_matrix : array of shape [N, N, tau_max+1,2]
608
+ Estimated matrix of confidence intervals of test statistic values.
609
+ Only computed if set in cond_ind_test, where also the percentiles
610
+ are set.
611
+ link_assumptions : dict or None
612
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
613
+ assumptions about links. This initializes the graph with entries
614
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
615
+ implies that a directed link from i to j at lag 0 must exist.
616
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
617
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
618
+ may not exist, but if it exists, its orientation is '-->'. Link
619
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
620
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
621
+ does not appear in the dictionary, it is assumed absent. That is,
622
+ if link_assumptions is not None, then all links have to be specified
623
+ or the links are assumed absent.
624
+ Returns
625
+ -------
626
+ val_matrix : array of shape [N, N, tau_max+1]
627
+ Estimated matrix of test statistic values.
628
+ p_matrix : array of shape [N, N, tau_max+1]
629
+ Estimated matrix of p-values. Set to 1 if val_only=True.
630
+ conf_matrix : array of shape [N, N, tau_max+1,2]
631
+ Estimated matrix of confidence intervals of test statistic values.
632
+ Only computed if set in cond_ind_test, where also the percentiles
633
+ are set.
634
+ """
635
+
636
+ # Symmetrize p_matrix and val_matrix and conf_matrix
637
+ for i in range(self.N):
638
+ for j in range(self.N):
639
+ # If both the links are present in link_assumptions, symmetrize using maximum p-value
640
+ # if ((i, 0) in link_assumptions[j] and (j, 0) in link_assumptions[i]):
641
+ if (i, 0) in link_assumptions[j]:
642
+ if link_assumptions[j][(i, 0)] in ["o-o", 'o?o']:
643
+ if (p_matrix[i, j, 0]
644
+ >= p_matrix[j, i, 0]):
645
+ p_matrix[j, i, 0] = p_matrix[i, j, 0]
646
+ val_matrix[j, i, 0] = val_matrix[i, j, 0]
647
+ if conf_matrix is not None:
648
+ conf_matrix[j, i, 0] = conf_matrix[i, j, 0]
649
+
650
+ # If only one of the links is present in link_assumptions, symmetrize using the p-value of the link present
651
+ # elif ((i, 0) in link_assumptions[j] and (j, 0) not in link_assumptions[i]):
652
+ elif link_assumptions[j][(i, 0)] in ["-->", '-?>']:
653
+ p_matrix[j, i, 0] = p_matrix[i, j, 0]
654
+ val_matrix[j, i, 0] = val_matrix[i, j, 0]
655
+ if conf_matrix is not None:
656
+ conf_matrix[j, i, 0] = conf_matrix[i, j, 0]
657
+ else:
658
+ # Links not present in link_assumptions
659
+ pass
660
+
661
+ # Return the values as a dictionary and store in class
662
+ results = {'val_matrix': val_matrix,
663
+ 'p_matrix': p_matrix,
664
+ 'conf_matrix': conf_matrix}
665
+ return results
666
+
667
+ def run_sliding_window_of(self, method, method_args,
668
+ window_step,
669
+ window_length,
670
+ conf_lev = 0.9,
671
+ ):
672
+ """Runs chosen method on sliding windows taken from DataFrame.
673
+
674
+ The function returns summary_results and all_results (containing the
675
+ individual window results). summary_results contains val_matrix_mean
676
+ and val_matrix_interval, the latter containing the confidence bounds for
677
+ conf_lev. If the method also returns a graph, then 'most_frequent_links'
678
+ containing the most frequent link outcome (either 0 or 1 or a specific
679
+ link type) in each entry of graph, as well as 'link_frequency',
680
+ containing the occurence frequency of the most frequent link outcome,
681
+ are returned.
682
+
683
+ Parameters
684
+ ----------
685
+ method : str
686
+ Chosen method among valid functions in PCMCI.
687
+ method_args : dict
688
+ Arguments passed to method.
689
+ window_step : int
690
+ Time step of windows.
691
+ window_length : int
692
+ Length of sliding window.
693
+ conf_lev : float, optional (default: 0.9)
694
+ Two-sided confidence interval for summary results.
695
+
696
+ Returns
697
+ -------
698
+ Dictionary of results for every sliding window.
699
+ """
700
+
701
+ valid_methods = ['run_pc_stable',
702
+ 'run_mci',
703
+ 'get_lagged_dependencies',
704
+ 'run_fullci',
705
+ 'run_bivci',
706
+ 'run_pcmci',
707
+ 'run_pcalg',
708
+ 'run_lpcmci',
709
+ 'run_jpcmciplus',
710
+ # 'run_pcalg_non_timeseries_data',
711
+ 'run_pcmciplus',]
712
+
713
+ if method not in valid_methods:
714
+ raise ValueError("method must be one of %s" % str(valid_methods))
715
+
716
+ if self.dataframe.reference_points_is_none is False:
717
+ raise ValueError("Reference points are not accepted in "
718
+ "sliding windows analysis, align data before and use masking"
719
+ " and/or missing values.")
720
+
721
+ T = self.dataframe.largest_time_step
722
+
723
+ if self.cond_ind_test.recycle_residuals:
724
+ # recycle_residuals clashes with sliding windows...
725
+ raise ValueError("cond_ind_test.recycle_residuals must be False.")
726
+
727
+ if self.verbosity > 0:
728
+ print("\n##\n## Running sliding window analysis of %s " % method +
729
+ "\n##\n" +
730
+ "\nwindow_step = %s \n" % window_step +
731
+ "\nwindow_length = %s \n" % window_length
732
+ )
733
+
734
+ original_reference_points = deepcopy(self.dataframe.reference_points)
735
+
736
+ window_start_points = np.arange(0, T - window_length, window_step)
737
+ n_windows = len(window_start_points)
738
+
739
+ if len(window_start_points) == 0:
740
+ raise ValueError("Empty list of windows, check window_length and window_step!")
741
+
742
+ window_results = {}
743
+ for iw, w in enumerate(window_start_points):
744
+ if self.verbosity > 0:
745
+ print("\n# Window start %s (%d/%d) \n" %(w, iw+1, len(window_start_points)))
746
+ # Construct reference_points from window
747
+ time_window = np.arange(w, w + window_length, 1)
748
+ # Remove points beyond T
749
+ time_window = time_window[time_window < T]
750
+
751
+ self.dataframe.reference_points = time_window
752
+ window_res = deepcopy(getattr(self, method)(**method_args))
753
+
754
+ # Aggregate val_matrix and other arrays to new arrays with
755
+ # windows as first dimension. Lists and other objects
756
+ # are stored in dictionary
757
+ for key in window_res:
758
+ res_item = window_res[key]
759
+ if iw == 0:
760
+ if type(res_item) is np.ndarray:
761
+ window_results[key] = np.empty((n_windows,)
762
+ + res_item.shape,
763
+ dtype=res_item.dtype)
764
+ else:
765
+ window_results[key] = {}
766
+
767
+ window_results[key][iw] = res_item
768
+
769
+ # Reset to original_reference_points data for further analyses
770
+ # self.dataframe.values[0] = original_data
771
+ self.dataframe.reference_points = original_reference_points
772
+
773
+ # Generate summary results
774
+ summary_results = self.return_summary_results(results=window_results,
775
+ conf_lev=conf_lev)
776
+
777
+ return {'summary_results': summary_results,
778
+ 'window_results': window_results}
779
+
780
+ def run_bootstrap_of(self, method, method_args,
781
+ boot_samples=100,
782
+ boot_blocklength=1,
783
+ conf_lev=0.9, aggregation="majority", seed=None):
784
+ """Runs chosen method on bootstrap samples drawn from DataFrame.
785
+
786
+ Bootstraps for tau=0 are drawn from [2xtau_max, ..., T] and all lagged
787
+ variables constructed in DataFrame.construct_array are consistently
788
+ shifted with respect to this bootstrap sample to ensure that lagged
789
+ relations in the bootstrap sample are preserved.
790
+
791
+ The function returns summary_results and all_results (containing the
792
+ individual bootstrap results). summary_results contains
793
+ val_matrix_mean and val_matrix_interval, the latter containing the
794
+ confidence bounds for conf_lev. If the method also returns a graph,
795
+ then 'most_frequent_links' containing the most frequent link outcome
796
+ (specific link type) in each entry of graph, as well
797
+ as 'link_frequency', containing the occurence frequency of the most
798
+ frequent link outcome, are returned. Two aggregation methods are
799
+ available for 'most_frequent_links'. By default, "majority"
800
+ provides the most frequent link outcome. Alternatively
801
+ "no_edge_majority" provides an alternative aggregation strategy.
802
+ As explained in Debeire et al. (2024), in the first step of this
803
+ alternative approach, the orientation of edges is ignored, and the
804
+ focus is only on determining the adjacency of each pair of vertices.
805
+ This is done through majority voting between no edge and all other
806
+ edge types. In the second step, the adjacencies identified in the
807
+ first step are oriented based on majority voting. This alternative
808
+ approach ensures that no edge can only be voted on if it appears
809
+ in more than half of the bootstrap ensemble of graphs.
810
+
811
+ Assumes that method uses cond_ind_test.run_test() function with cut_off
812
+ = '2xtau_max'.
813
+
814
+ Utilizes parallelization via joblib.
815
+
816
+ Parameters
817
+ ----------
818
+ method : str
819
+ Chosen method among valid functions in PCMCI.
820
+ method_args : dict
821
+ Arguments passed to method.
822
+ boot_samples : int
823
+ Number of bootstrap samples to draw.
824
+ boot_blocklength : int, optional (default: 1)
825
+ Block length for block-bootstrap.
826
+ conf_lev : float, optional (default: 0.9)
827
+ Two-sided confidence interval for summary results.
828
+ seed : int, optional(default = None)
829
+ Seed for RandomState (default_rng)
830
+ aggregation : str, optional (default: "majority")
831
+ Chosen aggregation strategy: "majority" or "no_edge_majority".
832
+
833
+ Returns
834
+ -------
835
+ Dictionary of summary results and results for every bootstrap sample.
836
+ """
837
+
838
+ valid_methods = ['run_pc_stable',
839
+ 'run_mci',
840
+ 'get_lagged_dependencies',
841
+ 'run_fullci',
842
+ 'run_bivci',
843
+ 'run_pcmci',
844
+ 'run_pcalg',
845
+ 'run_pcalg_non_timeseries_data',
846
+ 'run_pcmciplus',
847
+ 'run_lpcmci',
848
+ 'run_jpcmciplus',
849
+ ]
850
+ if method not in valid_methods:
851
+ raise ValueError("method must be one of %s" % str(valid_methods))
852
+
853
+ T = self.dataframe.largest_time_step
854
+ seed_sequence = np.random.SeedSequence(seed)
855
+ #global_random_state = np.random.default_rng(seed)
856
+
857
+ # Extract tau_max to construct bootstrap draws
858
+ if 'tau_max' not in method_args:
859
+ raise ValueError("tau_max must be explicitely set in method_args.")
860
+ tau_max = method_args['tau_max']
861
+
862
+ if self.cond_ind_test.recycle_residuals:
863
+ # recycle_residuals clashes with bootstrap draws...
864
+ raise ValueError("cond_ind_test.recycle_residuals must be False.")
865
+
866
+ if self.verbosity > 0:
867
+ print("\n##\n## Running Bootstrap of %s " % method +
868
+ "\n##\n" +
869
+ "\nboot_samples = %s \n" % boot_samples +
870
+ "\nboot_blocklength = %s \n" % boot_blocklength
871
+ )
872
+
873
+ # Set bootstrap attribute to be passed to dataframe
874
+ self.dataframe.bootstrap = {}
875
+ self.dataframe.bootstrap['boot_blocklength'] = boot_blocklength
876
+
877
+ boot_results = {}
878
+ #for b in range(boot_samples):
879
+ # Generate random state for this boot and set it in dataframe
880
+ # which will generate a draw with replacement
881
+ #boot_seed = global_random_state.integers(0, boot_samples, 1)
882
+ #boot_random_state = np.random.default_rng(boot_seed)
883
+ #self.dataframe.bootstrap['random_state'] = boot_random_state
884
+
885
+ child_seeds = seed_sequence.spawn(boot_samples)
886
+
887
+ aggregated_results = Parallel(n_jobs=-1)(
888
+ delayed(self.parallelized_bootstraps)(method, method_args, boot_seed=child_seeds[b]) for
889
+ b in range(boot_samples))
890
+
891
+ for b in range(boot_samples):
892
+ # Aggregate val_matrix and other arrays to new arrays with
893
+ # boot_samples as first dimension. Lists and other objects
894
+ # are stored in dictionary
895
+ boot_res = aggregated_results[b]
896
+ for key in boot_res:
897
+ res_item = boot_res[key]
898
+ if type(res_item) is np.ndarray:
899
+ if b == 0:
900
+ boot_results[key] = np.empty((boot_samples,)
901
+ + res_item.shape,
902
+ dtype=res_item.dtype)
903
+ boot_results[key][b] = res_item
904
+ else:
905
+ if b == 0:
906
+ boot_results[key] = {}
907
+ boot_results[key][b] = res_item
908
+
909
+ # Generate summary results
910
+ summary_results = self.return_summary_results(results=boot_results,
911
+ conf_lev=conf_lev,
912
+ aggregation=aggregation)
913
+
914
+ # Reset bootstrap to None
915
+ self.dataframe.bootstrap = None
916
+
917
+ return {'summary_results': summary_results,
918
+ 'boot_results': boot_results}
919
+
920
+ def parallelized_bootstraps(self, method, method_args, boot_seed):
921
+ # Pass seed sequence for this boot and set it in dataframe
922
+ # which will generate a draw with replacement
923
+ boot_random_state = np.random.default_rng(boot_seed)
924
+ self.dataframe.bootstrap['random_state'] = boot_random_state
925
+ boot_res = getattr(self, method)(**method_args)
926
+ return boot_res
927
+
928
+ @staticmethod
929
+ def return_summary_results(results, conf_lev=0.9, aggregation="majority"):
930
+ """Return summary results for causal graphs.
931
+
932
+ The function returns summary_results of an array of PCMCI(+) results.
933
+ Summary_results contains val_matrix_mean and val_matrix_interval, the latter
934
+ containing the confidence bounds for conf_lev. If the method also returns a graph,
935
+ then 'most_frequent_links' containing the most frequent link outcome
936
+ (either 0 or 1 or a specific link type) in each entry of graph, as well
937
+ as 'link_frequency', containing the occurence frequency of the most
938
+ frequent link outcome, are returned. Two aggregation methods are
939
+ available for 'most_frequent_links'. By default, "majority"
940
+ provides the most frequent link outcome. Alternatively
941
+ "no_edge_majority" provides an alternative aggregation strategy.
942
+ As explained in Debeire et al. (2024), in the first step of this
943
+ alternative approach, the orientation of edges is ignored, and the
944
+ focus is only on determining the adjacency of each pair of vertices.
945
+ This is done through majority voting between no edge and all other
946
+ edge types. In the second step, the adjacencies identified in the
947
+ first step are oriented based on majority voting. This alternative
948
+ approach ensures that no edge can only be voted on if it appears
949
+ in more than half of the bootstrap ensemble of graphs.
950
+
951
+ Parameters
952
+ ----------
953
+ results : dict
954
+ Results dictionary where the numpy arrays graph and val_matrix are
955
+ of shape (n_results, N, N, tau_max + 1).
956
+ conf_lev : float, optional (default: 0.9)
957
+ Two-sided confidence interval for summary results.
958
+ aggregation : str, optional (default: "majority")
959
+ Chosen aggregation strategy: "majority" or "no_edge_majority".
960
+ Returns
961
+ -------
962
+ Dictionary of summary results.
963
+ """
964
+
965
+ valid_aggregations = {"majority", "no_edge_majority"}
966
+ if aggregation not in valid_aggregations:
967
+ raise ValueError(f"Invalid aggregation mode: {aggregation}. Expected one of {valid_aggregations}")
968
+
969
+ # Generate summary results
970
+ summary_results = {}
971
+
972
+ if 'graph' in results:
973
+ n_results, N, N, tau_max_plusone = results['graph'].shape
974
+ tau_max = tau_max_plusone - 1
975
+ # print(repr(results['graph']))
976
+ summary_results['most_frequent_links'] = np.zeros((N, N, tau_max_plusone),
977
+ dtype=results['graph'][0].dtype)
978
+ summary_results['link_frequency'] = np.zeros((N, N, tau_max_plusone),
979
+ dtype='float')
980
+
981
+ #preferred order in case of ties with the spirit of
982
+ #keeping the least assertive and most cautious claims in the presence of ties.
983
+ #In case of ties between other link types, a conflicting link "x-x" is assigned
984
+ preferred_order = [
985
+ "", # No link (most conservative)
986
+ #"o?o", # No claim made (lag 0 only)
987
+ #"<?>", # Neither is ancestor
988
+ "x-x", # Conflict (used to break <--> vs --> vs <-- ties)
989
+ "o-o", # Undirected link (lag 0 only)
990
+ # "<-o", # X^i not ancestor, but linked (lag 0 only)
991
+ # "o->", # X^j not ancestor, but linked
992
+ # rest is solved by conflict
993
+ # "<->",
994
+ # "-->",
995
+ # "<--",
996
+ ]
997
+
998
+ for (i, j) in itertools.product(range(N), range(N)):
999
+ for abstau in range(0, tau_max + 1):
1000
+ links, counts = np.unique(results['graph'][:,i,j,abstau],
1001
+ return_counts=True)
1002
+ list_of_most_freq = links[counts == counts.max()]
1003
+ if aggregation=="majority":
1004
+ if len(list_of_most_freq) == 1:
1005
+ choice = list_of_most_freq[0]
1006
+ else:
1007
+ ordered_list = [link for link in preferred_order
1008
+ if link in list_of_most_freq]
1009
+ if len(ordered_list) == 0:
1010
+ choice = "x-x"
1011
+ else:
1012
+ choice = ordered_list[0]
1013
+ summary_results['most_frequent_links'][i,j, abstau] = choice
1014
+ summary_results['link_frequency'][i,j, abstau] = \
1015
+ counts[counts == counts.max()].sum()/float(n_results)
1016
+
1017
+ elif aggregation=="no_edge_majority":
1018
+ if counts[links == ""].size == 0: #handle the case where there is no "" in links
1019
+ freq_of_no_edge=0
1020
+ else:
1021
+ # make scalar count (counts[...] returns a 1-element array)
1022
+ freq_of_no_edge = int(counts[links == ""].sum())
1023
+
1024
+ freq_of_adjacency = n_results - freq_of_no_edge
1025
+ if freq_of_adjacency > freq_of_no_edge:
1026
+ adja_links = np.delete(links,np.where(links == ""))
1027
+ adja_counts = np.delete(counts,np.where(links == ""))
1028
+ list_of_most_freq_adja = adja_links[adja_counts == adja_counts.max()]
1029
+ if len(list_of_most_freq_adja) == 1:
1030
+ choice = list_of_most_freq_adja[0]
1031
+ else:
1032
+ ordered_list = [link for link in preferred_order
1033
+ if link in list_of_most_freq_adja]
1034
+ if len(ordered_list) == 0:
1035
+ choice = "x-x"
1036
+ else:
1037
+ choice = ordered_list[0]
1038
+ summary_results['most_frequent_links'][i,j, abstau] = choice
1039
+ summary_results['link_frequency'][i,j, abstau] = \
1040
+ adja_counts[adja_counts == adja_counts.max()].sum()/float(n_results)
1041
+ else:
1042
+ choice= ""
1043
+ summary_results['most_frequent_links'][i,j, abstau] = choice
1044
+ summary_results['link_frequency'][i,j, abstau] = \
1045
+ freq_of_no_edge/float(n_results)
1046
+ # Confidence intervals for val_matrix; interval is two-sided
1047
+ c_int = (1. - (1. - conf_lev)/2.)
1048
+ summary_results['val_matrix_mean'] = np.mean(
1049
+ results['val_matrix'], axis=0)
1050
+
1051
+ summary_results['val_matrix_interval'] = np.stack(np.percentile(
1052
+ results['val_matrix'], axis=0,
1053
+ q = [100*(1. - c_int), 100*c_int]), axis=3)
1054
+ return summary_results
1055
+
1056
+ @staticmethod
1057
+ def graph_to_dict(graph):
1058
+ """Helper function to convert graph to dictionary of links.
1059
+
1060
+ Parameters
1061
+ ---------
1062
+ graph : array of shape (N, N, tau_max+1)
1063
+ Matrix format of graph in string format.
1064
+
1065
+ Returns
1066
+ -------
1067
+ links : dict
1068
+ Dictionary of form {0:{(0, -1): o-o, ...}, 1:{...}, ...}.
1069
+ """
1070
+ N = graph.shape[0]
1071
+
1072
+ links = dict([(j, {}) for j in range(N)])
1073
+
1074
+ for (i, j, tau) in zip(*np.where(graph!='')):
1075
+ links[j][(i, -tau)] = graph[i,j,tau]
1076
+
1077
+ return links
1078
+
1079
+ # @staticmethod
1080
+ def _dict_to_graph(self, links, tau_max=None):
1081
+ """Helper function to convert dictionary of links to graph.
1082
+
1083
+ Parameters
1084
+ ---------
1085
+ links : dict
1086
+ Dictionary of form {0:{(0, -1): 'o-o'}, ...}, 1:{...}, ...}.
1087
+
1088
+ Returns
1089
+ -------
1090
+ graph : array of shape (N, N, tau_max+1)
1091
+ Matrix format of graph in string format.
1092
+ """
1093
+
1094
+ N = len(links)
1095
+
1096
+ # Get maximum time lag
1097
+ max_lag = 0
1098
+ for j in range(N):
1099
+ for link in links[j]:
1100
+ var, lag = link
1101
+ if isinstance(links[j], dict):
1102
+ link_type = links[j][link]
1103
+ if link_type != "":
1104
+ max_lag = max(max_lag, abs(lag))
1105
+ else:
1106
+ max_lag = max(max_lag, abs(lag))
1107
+
1108
+ if tau_max is None:
1109
+ tau_max = max_lag
1110
+ else:
1111
+ if tau_max < max_lag:
1112
+ raise ValueError("maxlag(links) > tau_max")
1113
+
1114
+ graph = np.zeros((N, N, tau_max + 1), dtype='<U3')
1115
+ graph[:] = ""
1116
+ for j in range(N):
1117
+ for link in links[j]:
1118
+ i, tau = link
1119
+ if isinstance(links[j], dict):
1120
+ link_type = links[j][link]
1121
+ graph[i, j, abs(tau)] = link_type
1122
+ else:
1123
+ graph[i, j, abs(tau)] = '-->'
1124
+
1125
+ return graph
1126
+
1127
+ @staticmethod
1128
+ def get_graph_from_dict(links, tau_max=None):
1129
+ """Helper function to convert dictionary of links to graph array format.
1130
+
1131
+ Parameters
1132
+ ---------
1133
+ links : dict
1134
+ Dictionary of form {0:[((0, -1), coeff, func), ...], 1:[...], ...}.
1135
+ Also format {0:[(0, -1), ...], 1:[...], ...} is allowed.
1136
+ tau_max : int or None
1137
+ Maximum lag. If None, the maximum lag in links is used.
1138
+
1139
+ Returns
1140
+ -------
1141
+ graph : array of shape (N, N, tau_max+1)
1142
+ Matrix format of graph with 1 for true links and 0 else.
1143
+ """
1144
+
1145
+ def _get_minmax_lag(links):
1146
+ """Helper function to retrieve tau_min and tau_max from links.
1147
+ """
1148
+
1149
+ N = len(links)
1150
+
1151
+ # Get maximum time lag
1152
+ min_lag = np.inf
1153
+ max_lag = 0
1154
+ for j in range(N):
1155
+ for link_props in links[j]:
1156
+ if len(link_props) > 2:
1157
+ var, lag = link_props[0]
1158
+ coeff = link_props[1]
1159
+ # func = link_props[2]
1160
+ if coeff != 0.:
1161
+ min_lag = min(min_lag, abs(lag))
1162
+ max_lag = max(max_lag, abs(lag))
1163
+ else:
1164
+ var, lag = link_props
1165
+ min_lag = min(min_lag, abs(lag))
1166
+ max_lag = max(max_lag, abs(lag))
1167
+
1168
+ return min_lag, max_lag
1169
+
1170
+ N = len(links)
1171
+
1172
+ # Get maximum time lag
1173
+ min_lag, max_lag = _get_minmax_lag(links)
1174
+
1175
+ # Set maximum lag
1176
+ if tau_max is None:
1177
+ tau_max = max_lag
1178
+ else:
1179
+ if max_lag > tau_max:
1180
+ raise ValueError("tau_max is smaller than maximum lag = %d "
1181
+ "found in links, use tau_max=None or larger "
1182
+ "value" % max_lag)
1183
+
1184
+ graph = np.zeros((N, N, tau_max + 1), dtype='<U3')
1185
+ for j in links.keys():
1186
+ for link_props in links[j]:
1187
+ if len(link_props) > 2:
1188
+ var, lag = link_props[0]
1189
+ coeff = link_props[1]
1190
+ if coeff != 0.:
1191
+ graph[var, j, abs(lag)] = "-->"
1192
+ if lag == 0:
1193
+ graph[j, var, 0] = "<--"
1194
+ else:
1195
+ var, lag = link_props
1196
+ graph[var, j, abs(lag)] = "-->"
1197
+ if lag == 0:
1198
+ graph[j, var, 0] = "<--"
1199
+
1200
+ return graph
1201
+
1202
+ @staticmethod
1203
+ def build_link_assumptions(link_assumptions_absent_link_means_no_knowledge,
1204
+ n_component_time_series,
1205
+ tau_max,
1206
+ tau_min=0):
1207
+
1208
+ out = {j: {(i, -tau_i): ("o?>" if tau_i > 0 else "o?o")
1209
+ for i in range(n_component_time_series) for tau_i in range(tau_min, tau_max+1)
1210
+ if (tau_i > 0 or i != j)} for j in range(n_component_time_series)}
1211
+
1212
+ for j, links_j in link_assumptions_absent_link_means_no_knowledge.items():
1213
+ for (i, lag_i), link_ij in links_j.items():
1214
+ if link_ij == "":
1215
+ del out[j][(i, lag_i)]
1216
+ else:
1217
+ out[j][(i, lag_i)] = link_ij
1218
+ return out