tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
tigramite/pcmci.py ADDED
@@ -0,0 +1,3935 @@
1
+ """Tigramite causal discovery for time series."""
2
+
3
+ # Author: Jakob Runge <jakob@jakob-runge.com>
4
+ #
5
+ # License: GNU General Public License v3.0
6
+
7
+ from __future__ import print_function
8
+ import warnings
9
+ import itertools
10
+ from collections import defaultdict
11
+ from copy import deepcopy
12
+ import numpy as np
13
+ import scipy.stats
14
+
15
+ from tigramite.pcmci_base import PCMCIbase
16
+
17
+ def _create_nested_dictionary(depth=0, lowest_type=dict):
18
+ """Create a series of nested dictionaries to a maximum depth. The first
19
+ depth - 1 nested dictionaries are defaultdicts, the last is a normal
20
+ dictionary.
21
+
22
+ Parameters
23
+ ----------
24
+ depth : int
25
+ Maximum depth argument.
26
+ lowest_type: callable (optional)
27
+ Type contained in leaves of tree. Ex: list, dict, tuple, int, float ...
28
+ """
29
+ new_depth = depth - 1
30
+ if new_depth <= 0:
31
+ return defaultdict(lowest_type)
32
+ return defaultdict(lambda: _create_nested_dictionary(new_depth))
33
+
34
+
35
+ def _nested_to_normal(nested_dict):
36
+ """Transforms the nested default dictionary into a standard dictionaries
37
+
38
+ Parameters
39
+ ----------
40
+ nested_dict : default dictionary of default dictionaries of ... etc.
41
+ """
42
+ if isinstance(nested_dict, defaultdict):
43
+ nested_dict = {k: _nested_to_normal(v) for k, v in nested_dict.items()}
44
+ return nested_dict
45
+
46
+
47
+ class PCMCI(PCMCIbase):
48
+ r"""PCMCI causal discovery for time series datasets.
49
+
50
+ PCMCI is a causal discovery framework for large-scale time series
51
+ datasets. This class contains several methods. The standard PCMCI method
52
+ addresses time-lagged causal discovery and is described in Ref [1] where
53
+ also further sub-variants are discussed. Lagged as well as contemporaneous
54
+ causal discovery is addressed with PCMCIplus and described in [5]. See the
55
+ tutorials for guidance in applying these methods.
56
+
57
+ PCMCI has:
58
+
59
+ * different conditional independence tests adapted to linear or
60
+ nonlinear dependencies, and continuously-valued or discrete data (
61
+ implemented in ``tigramite.independence_tests``)
62
+ * (mostly) hyperparameter optimization
63
+ * easy parallelization (separate script)
64
+ * handling of masked time series data
65
+ * false discovery control and confidence interval estimation
66
+
67
+
68
+ Notes
69
+ -----
70
+
71
+ .. image:: mci_schematic.*
72
+ :width: 200pt
73
+
74
+ In the PCMCI framework, the dependency structure of a set of time series
75
+ variables is represented in a *time series graph* as shown in the Figure.
76
+ The nodes of a time series graph are defined as the variables at
77
+ different times and a link indicates a conditional dependency that can be
78
+ interpreted as a causal dependency under certain assumptions (see paper).
79
+ Assuming stationarity, the links are repeated in time. The parents
80
+ :math:`\mathcal{P}` of a variable are defined as the set of all nodes
81
+ with a link towards it (blue and red boxes in Figure).
82
+
83
+ The different PCMCI methods estimate causal links by iterative
84
+ conditional independence testing. PCMCI can be flexibly combined with
85
+ any kind of conditional independence test statistic adapted to the kind
86
+ of data (continuous or discrete) and its assumed dependency types.
87
+ These are available in ``tigramite.independence_tests``.
88
+
89
+ NOTE: MCI test statistic values define a particular measure of causal
90
+ strength depending on the test statistic used. For example, ParCorr()
91
+ results in normalized values between -1 and 1. However, if you are
92
+ interested in quantifying causal effects, i.e., the effect of
93
+ hypothetical interventions, you may better look at the causal effect
94
+ estimation functionality of Tigramite.
95
+
96
+ References
97
+ ----------
98
+
99
+ [1] J. Runge, P. Nowack, M. Kretschmer, S. Flaxman, D. Sejdinovic,
100
+ Detecting and quantifying causal associations in large nonlinear time
101
+ series datasets. Sci. Adv. 5, eaau4996 (2019)
102
+ https://advances.sciencemag.org/content/5/11/eaau4996
103
+
104
+ [5] J. Runge,
105
+ Discovering contemporaneous and lagged causal relations in
106
+ autocorrelated nonlinear time series datasets
107
+ http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf
108
+
109
+ Parameters
110
+ ----------
111
+ dataframe : data object
112
+ This is the Tigramite dataframe object. Among others, it has the
113
+ attributes dataframe.values yielding a numpy array of shape (
114
+ observations T, variables N) and optionally a mask of the same shape.
115
+ cond_ind_test : conditional independence test object
116
+ This can be ParCorr or other classes from
117
+ ``tigramite.independence_tests`` or an external test passed as a
118
+ callable. This test can be based on the class
119
+ tigramite.independence_tests.CondIndTest.
120
+ verbosity : int, optional (default: 0)
121
+ Verbose levels 0, 1, ...
122
+
123
+ Attributes
124
+ ----------
125
+ all_parents : dictionary
126
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
127
+ the conditioning-parents estimated with PC algorithm.
128
+ val_min : dictionary
129
+ Dictionary of form val_min[j][(i, -tau)] = float
130
+ containing the minimum absolute test statistic value for each link estimated in
131
+ the PC algorithm.
132
+ pval_max : dictionary
133
+ Dictionary of form pval_max[j][(i, -tau)] = float containing the maximum
134
+ p-value for each link estimated in the PC algorithm.
135
+ iterations : dictionary
136
+ Dictionary containing further information on algorithm steps.
137
+ N : int
138
+ Number of variables.
139
+ T : dict
140
+ Time series sample length of dataset(s).
141
+ """
142
+
143
+ def __init__(self, dataframe,
144
+ cond_ind_test,
145
+ verbosity=0):
146
+
147
+ # Init base class
148
+ PCMCIbase.__init__(self, dataframe=dataframe,
149
+ cond_ind_test=cond_ind_test,
150
+ verbosity=verbosity)
151
+
152
+
153
+ def _iter_conditions(self, parent, conds_dim, all_parents):
154
+ """Yield next condition.
155
+
156
+ Yields next condition from lexicographically ordered conditions.
157
+
158
+ Parameters
159
+ ----------
160
+ parent : tuple
161
+ Tuple of form (i, -tau).
162
+ conds_dim : int
163
+ Cardinality in current step.
164
+ all_parents : list
165
+ List of form [(0, -1), (3, -2), ...].
166
+
167
+ Yields
168
+ -------
169
+ cond : list
170
+ List of form [(0, -1), (3, -2), ...] for the next condition.
171
+ """
172
+ all_parents_excl_current = [p for p in all_parents if p != parent]
173
+ for cond in itertools.combinations(all_parents_excl_current, conds_dim):
174
+ yield list(cond)
175
+
176
+ def _sort_parents(self, parents_vals):
177
+ """Sort current parents according to test statistic values.
178
+
179
+ Sorting is from strongest to weakest absolute values.
180
+
181
+ Parameters
182
+ ---------
183
+ parents_vals : dict
184
+ Dictionary of form {(0, -1):float, ...} containing the minimum test
185
+ statistic value of a link.
186
+
187
+ Returns
188
+ -------
189
+ parents : list
190
+ List of form [(0, -1), (3, -2), ...] containing sorted parents.
191
+ """
192
+ if self.verbosity > 1:
193
+ print("\n Sorting parents in decreasing order with "
194
+ "\n weight(i-tau->j) = min_{iterations} |val_{ij}(tau)| ")
195
+ # Get the absolute value for all the test statistics
196
+ abs_values = {k: np.abs(parents_vals[k]) for k in list(parents_vals)}
197
+ return sorted(abs_values, key=abs_values.get, reverse=True)
198
+
199
+ def _print_link_info(self, j, index_parent, parent, num_parents,
200
+ already_removed=False):
201
+ """Print info about the current link being tested.
202
+
203
+ Parameters
204
+ ----------
205
+ j : int
206
+ Index of current node being tested.
207
+ index_parent : int
208
+ Index of the current parent.
209
+ parent : tuple
210
+ Standard (i, tau) tuple of parent node id and time delay
211
+ num_parents : int
212
+ Total number of parents.
213
+ already_removed : bool
214
+ Whether parent was already removed.
215
+ """
216
+ link_marker = {True:"o?o", False:"-?>"}
217
+
218
+ abstau = abs(parent[1])
219
+ if self.verbosity > 1:
220
+ print("\n Link (%s % d) %s %s (%d/%d):" % (
221
+ self.var_names[parent[0]], parent[1], link_marker[abstau==0],
222
+ self.var_names[j],
223
+ index_parent + 1, num_parents))
224
+
225
+ if already_removed:
226
+ print(" Already removed.")
227
+
228
+ def _print_cond_info(self, Z, comb_index, pval, val):
229
+ """Print info about the condition
230
+
231
+ Parameters
232
+ ----------
233
+ Z : list
234
+ The current condition being tested.
235
+ comb_index : int
236
+ Index of the combination yielding this condition.
237
+ pval : float
238
+ p-value from this condition.
239
+ val : float
240
+ value from this condition.
241
+ """
242
+ var_name_z = ""
243
+ for i, tau in Z:
244
+ var_name_z += "(%s % .2s) " % (self.var_names[i], tau)
245
+ if len(Z) == 0: var_name_z = "()"
246
+ print(" Subset %d: %s gives pval = %.5f / val = % .3f" %
247
+ (comb_index, var_name_z, pval, val))
248
+
249
+ def _print_a_pc_result(self, nonsig, conds_dim, max_combinations):
250
+ """Print the results from the current iteration of conditions.
251
+
252
+ Parameters
253
+ ----------
254
+ nonsig : bool
255
+ Indicate non-significance.
256
+ conds_dim : int
257
+ Cardinality of the current step.
258
+ max_combinations : int
259
+ Maximum number of combinations of conditions of current cardinality
260
+ to test.
261
+ """
262
+ # Start with an indent
263
+ print_str = " "
264
+ # Determine the body of the text
265
+ if nonsig:
266
+ print_str += "Non-significance detected."
267
+ elif conds_dim > max_combinations:
268
+ print_str += "Still subsets of dimension" + \
269
+ " %d left," % (conds_dim) + \
270
+ " but q_max = %d reached." % (max_combinations)
271
+ else:
272
+ print_str += "No conditions of dimension %d left." % (conds_dim)
273
+ # Print the message
274
+ print(print_str)
275
+
276
+ def _print_converged_pc_single(self, converged, j, max_conds_dim):
277
+ """
278
+ Print statement about the convergence of the pc_stable_single algorithm.
279
+
280
+ Parameters
281
+ ----------
282
+ convergence : bool
283
+ true if convergence was reached.
284
+ j : int
285
+ Variable index.
286
+ max_conds_dim : int
287
+ Maximum number of conditions to test.
288
+ """
289
+ if converged:
290
+ print("\nAlgorithm converged for variable %s" %
291
+ self.var_names[j])
292
+ else:
293
+ print(
294
+ "\nAlgorithm not yet converged, but max_conds_dim = %d"
295
+ " reached." % max_conds_dim)
296
+
297
+ def _run_pc_stable_single(self, j,
298
+ link_assumptions_j=None,
299
+ tau_min=1,
300
+ tau_max=1,
301
+ save_iterations=False,
302
+ pc_alpha=0.2,
303
+ max_conds_dim=None,
304
+ max_combinations=1):
305
+ """Lagged PC algorithm for estimating lagged parents of single variable.
306
+
307
+ Parameters
308
+ ----------
309
+ j : int
310
+ Variable index.
311
+ link_assumptions_j : dict
312
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
313
+ assumptions about links. This initializes the graph with entries
314
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
315
+ implies that a directed link from i to j at lag 0 must exist.
316
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
317
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
318
+ may not exist, but if it exists, its orientation is '-->'. Link
319
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
320
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
321
+ does not appear in the dictionary, it is assumed absent. That is,
322
+ if link_assumptions is not None, then all links have to be specified
323
+ or the links are assumed absent.
324
+ tau_min : int, optional (default: 1)
325
+ Minimum time lag to test. Useful for variable selection in
326
+ multi-step ahead predictions. Must be greater zero.
327
+ tau_max : int, optional (default: 1)
328
+ Maximum time lag. Must be larger or equal to tau_min.
329
+ save_iterations : bool, optional (default: False)
330
+ Whether to save iteration step results such as conditions used.
331
+ pc_alpha : float or None, optional (default: 0.2)
332
+ Significance level in algorithm. If a list is given, pc_alpha is
333
+ optimized using model selection criteria provided in the
334
+ cond_ind_test class as get_model_selection_criterion(). If None,
335
+ a default list of values is used.
336
+ max_conds_dim : int, optional (default: None)
337
+ Maximum number of conditions to test. If None is passed, this number
338
+ is unrestricted.
339
+ max_combinations : int, optional (default: 1)
340
+ Maximum number of combinations of conditions of current cardinality
341
+ to test in PC1 step.
342
+
343
+ Returns
344
+ -------
345
+ parents : list
346
+ List of estimated parents.
347
+ val_min : dict
348
+ Dictionary of form {(0, -1):float, ...} containing the minimum absolute
349
+ test statistic value of a link.
350
+ pval_max : dict
351
+ Dictionary of form {(0, -1):float, ...} containing the maximum
352
+ p-value of a link across different conditions.
353
+ iterations : dict
354
+ Dictionary containing further information on algorithm steps.
355
+ """
356
+
357
+ if pc_alpha < 0. or pc_alpha > 1.:
358
+ raise ValueError("Choose 0 <= pc_alpha <= 1")
359
+
360
+ # Initialize the dictionaries for the pval_max, val_dict, val_min
361
+ # results
362
+ pval_max = dict()
363
+ val_dict = dict()
364
+ val_min = dict()
365
+ # Initialize the parents values from the selected links, copying to
366
+ # ensure this initial argument is unchanged.
367
+ parents = []
368
+ for itau in link_assumptions_j:
369
+ link_type = link_assumptions_j[itau]
370
+ if itau != (j, 0) and link_type not in ['<--', '<?-']:
371
+ parents.append(itau)
372
+
373
+ val_dict = {(p[0], p[1]): None for p in parents}
374
+ pval_max = {(p[0], p[1]): None for p in parents}
375
+
376
+ # Define a nested defaultdict of depth 4 to save all information about
377
+ # iterations
378
+ iterations = _create_nested_dictionary(4)
379
+ # Ensure tau_min is at least 1
380
+ tau_min = max(1, tau_min)
381
+
382
+ # Loop over all possible condition dimensions
383
+ max_conds_dim = self._set_max_condition_dim(max_conds_dim,
384
+ tau_min, tau_max)
385
+ # Iteration through increasing number of conditions, i.e. from
386
+ # [0, max_conds_dim] inclusive
387
+ converged = False
388
+ for conds_dim in range(max_conds_dim + 1):
389
+ # (Re)initialize the list of non-significant links
390
+ nonsig_parents = list()
391
+ # Check if the algorithm has converged
392
+ if len(parents) - 1 < conds_dim:
393
+ converged = True
394
+ break
395
+ # Print information about
396
+ if self.verbosity > 1:
397
+ print("\nTesting condition sets of dimension %d:" % conds_dim)
398
+
399
+ # Iterate through all possible pairs (that have not converged yet)
400
+ for index_parent, parent in enumerate(parents):
401
+ # Print info about this link
402
+ if self.verbosity > 1:
403
+ self._print_link_info(j, index_parent, parent, len(parents))
404
+ # Iterate through all possible combinations
405
+ nonsig = False
406
+ for comb_index, Z in \
407
+ enumerate(self._iter_conditions(parent, conds_dim,
408
+ parents)):
409
+ # Break if we try too many combinations
410
+ if comb_index >= max_combinations:
411
+ break
412
+ # Perform independence test
413
+ if link_assumptions_j[parent] == '-->':
414
+ val = 1.
415
+ pval = 0.
416
+ dependent = True
417
+ else:
418
+ val, pval, dependent = self.cond_ind_test.run_test(X=[parent],
419
+ Y=[(j, 0)],
420
+ Z=Z,
421
+ tau_max=tau_max,
422
+ alpha_or_thres=pc_alpha,
423
+ )
424
+ # Print some information if needed
425
+ if self.verbosity > 1:
426
+ self._print_cond_info(Z, comb_index, pval, val)
427
+ # Keep track of maximum p-value and minimum estimated value
428
+ # for each pair (across any condition)
429
+ val_min[parent] = \
430
+ min(np.abs(val), val_min.get(parent,
431
+ float("inf")))
432
+
433
+ if pval_max[parent] is None or pval > pval_max[parent]:
434
+ pval_max[parent] = pval
435
+ val_dict[parent] = val
436
+
437
+ # Save the iteration if we need to
438
+ if save_iterations:
439
+ a_iter = iterations['iterations'][conds_dim][parent]
440
+ a_iter[comb_index]['conds'] = list(Z)
441
+ a_iter[comb_index]['val'] = val
442
+ a_iter[comb_index]['pval'] = pval
443
+ # Delete link later and break while-loop if non-significant
444
+ if not dependent: #pval > pc_alpha:
445
+ nonsig_parents.append((j, parent))
446
+ nonsig = True
447
+ break
448
+
449
+ # Print the results if needed
450
+ if self.verbosity > 1:
451
+ self._print_a_pc_result(nonsig,
452
+ conds_dim, max_combinations)
453
+
454
+ # Remove non-significant links
455
+ for _, parent in nonsig_parents:
456
+ del val_min[parent]
457
+ # Return the parents list sorted by the test metric so that the
458
+ # updated parents list is given to the next cond_dim loop
459
+ parents = self._sort_parents(val_min)
460
+ # Print information about the change in possible parents
461
+ if self.verbosity > 1:
462
+ print("\nUpdating parents:")
463
+ self._print_parents_single(j, parents, val_min, pval_max)
464
+
465
+ # Print information about if convergence was reached
466
+ if self.verbosity > 1:
467
+ self._print_converged_pc_single(converged, j, max_conds_dim)
468
+ # Return the results
469
+ return {'parents': parents,
470
+ 'val_min': val_min,
471
+ 'val_dict': val_dict,
472
+ 'pval_max': pval_max,
473
+ 'iterations': _nested_to_normal(iterations)}
474
+
475
+ def _print_pc_params(self, link_assumptions, tau_min, tau_max, pc_alpha,
476
+ max_conds_dim, max_combinations):
477
+ """Print the setup of the current pc_stable run.
478
+
479
+ Parameters
480
+ ----------
481
+ link_assumptions : dict or None
482
+ Dictionary of form specifying which links should be tested.
483
+ tau_min : int, default: 1
484
+ Minimum time lag to test.
485
+ tau_max : int, default: 1
486
+ Maximum time lag to test.
487
+ pc_alpha : float or list of floats
488
+ Significance level in algorithm.
489
+ max_conds_dim : int
490
+ Maximum number of conditions to test.
491
+ max_combinations : int
492
+ Maximum number of combinations of conditions to test.
493
+ """
494
+ print("\n##\n## Step 1: PC1 algorithm for selecting lagged conditions\n##"
495
+ "\n\nParameters:")
496
+ if link_assumptions is not None:
497
+ print("link_assumptions = %s" % str(link_assumptions))
498
+ print("independence test = %s" % self.cond_ind_test.measure
499
+ + "\ntau_min = %d" % tau_min
500
+ + "\ntau_max = %d" % tau_max
501
+ + "\npc_alpha = %s" % pc_alpha
502
+ + "\nmax_conds_dim = %s" % max_conds_dim
503
+ + "\nmax_combinations = %d" % max_combinations)
504
+ print("\n")
505
+
506
+ def _print_pc_sel_results(self, pc_alpha, results, j, score, optimal_alpha):
507
+ """Print the results from the pc_alpha selection.
508
+
509
+ Parameters
510
+ ----------
511
+ pc_alpha : list
512
+ Tested significance levels in algorithm.
513
+ results : dict
514
+ Results from the tested pc_alphas.
515
+ score : array of floats
516
+ scores from each pc_alpha.
517
+ j : int
518
+ Index of current variable.
519
+ optimal_alpha : float
520
+ Optimal value of pc_alpha.
521
+ """
522
+ print("\n# Condition selection results:")
523
+ for iscore, pc_alpha_here in enumerate(pc_alpha):
524
+ names_parents = "[ "
525
+ for pari in results[pc_alpha_here]['parents']:
526
+ names_parents += "(%s % d) " % (
527
+ self.var_names[pari[0]], pari[1])
528
+ names_parents += "]"
529
+ print(" pc_alpha=%s got score %.4f with parents %s" %
530
+ (pc_alpha_here, score[iscore], names_parents))
531
+ print("\n==> optimal pc_alpha for variable %s is %s" %
532
+ (self.var_names[j], optimal_alpha))
533
+
534
+ def _check_tau_limits(self, tau_min, tau_max):
535
+ """Check the tau limits adhere to 0 <= tau_min <= tau_max.
536
+
537
+ Parameters
538
+ ----------
539
+ tau_min : float
540
+ Minimum tau value.
541
+ tau_max : float
542
+ Maximum tau value.
543
+ """
544
+ if not 0 <= tau_min <= tau_max:
545
+ raise ValueError("tau_max = %d, " % (tau_max) + \
546
+ "tau_min = %d, " % (tau_min) + \
547
+ "but 0 <= tau_min <= tau_max")
548
+
549
+ def _set_max_condition_dim(self, max_conds_dim, tau_min, tau_max):
550
+ """
551
+ Set the maximum dimension of the conditions. Defaults to self.N*tau_max.
552
+
553
+ Parameters
554
+ ----------
555
+ max_conds_dim : int
556
+ Input maximum condition dimension.
557
+ tau_max : int
558
+ Maximum tau.
559
+
560
+ Returns
561
+ -------
562
+ max_conds_dim : int
563
+ Input maximum condition dimension or default.
564
+ """
565
+ # Check if an input was given
566
+ if max_conds_dim is None:
567
+ max_conds_dim = self.N * (tau_max - tau_min + 1)
568
+ # Check this is a valid
569
+ if max_conds_dim < 0:
570
+ raise ValueError("maximum condition dimension must be >= 0")
571
+ return max_conds_dim
572
+
573
+ def run_pc_stable(self,
574
+ selected_links=None,
575
+ link_assumptions=None,
576
+ tau_min=1,
577
+ tau_max=1,
578
+ save_iterations=False,
579
+ pc_alpha=0.2,
580
+ max_conds_dim=None,
581
+ max_combinations=1):
582
+ """Lagged PC algorithm for estimating lagged parents of all variables.
583
+
584
+ Parents are made available as self.all_parents
585
+
586
+ Parameters
587
+ ----------
588
+ selected_links : dict or None
589
+ Deprecated, replaced by link_assumptions
590
+ link_assumptions : dict
591
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
592
+ assumptions about links. This initializes the graph with entries
593
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
594
+ implies that a directed link from i to j at lag 0 must exist.
595
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
596
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
597
+ may not exist, but if it exists, its orientation is '-->'. Link
598
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
599
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
600
+ does not appear in the dictionary, it is assumed absent. That is,
601
+ if link_assumptions is not None, then all links have to be specified
602
+ or the links are assumed absent.
603
+ tau_min : int, default: 1
604
+ Minimum time lag to test. Useful for multi-step ahead predictions.
605
+ Must be greater zero.
606
+ tau_max : int, default: 1
607
+ Maximum time lag. Must be larger or equal to tau_min.
608
+ save_iterations : bool, default: False
609
+ Whether to save iteration step results such as conditions used.
610
+ pc_alpha : float or list of floats, default: 0.2
611
+ Significance level in algorithm. If a list or None is passed, the
612
+ pc_alpha level is optimized for every variable across the given
613
+ pc_alpha values using the score computed in
614
+ cond_ind_test.get_model_selection_criterion().
615
+ max_conds_dim : int or None
616
+ Maximum number of conditions to test. If None is passed, this number
617
+ is unrestricted.
618
+ max_combinations : int, default: 1
619
+ Maximum number of combinations of conditions of current cardinality
620
+ to test in PC1 step.
621
+
622
+ Returns
623
+ -------
624
+ all_parents : dict
625
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
626
+ containing estimated parents.
627
+ """
628
+ if selected_links is not None:
629
+ raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
630
+
631
+ # Create an internal copy of pc_alpha
632
+ _int_pc_alpha = deepcopy(pc_alpha)
633
+ # Check if we are selecting an optimal alpha value
634
+ select_optimal_alpha = True
635
+ # Set the default values for pc_alpha
636
+ if _int_pc_alpha is None:
637
+ _int_pc_alpha = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
638
+ elif not isinstance(_int_pc_alpha, (list, tuple, np.ndarray)):
639
+ _int_pc_alpha = [_int_pc_alpha]
640
+ select_optimal_alpha = False
641
+ # Check the limits on tau_min
642
+ self._check_tau_limits(tau_min, tau_max)
643
+ tau_min = max(1, tau_min)
644
+ # Check that the maximum combinations variable is correct
645
+ if max_combinations <= 0:
646
+ raise ValueError("max_combinations must be > 0")
647
+ # Implement defaultdict for all pval_max, val_max, and iterations
648
+ pval_max = defaultdict(dict)
649
+ val_min = defaultdict(dict)
650
+ val_dict = defaultdict(dict)
651
+ iterations = defaultdict(dict)
652
+
653
+ if self.verbosity > 0:
654
+ self._print_pc_params(link_assumptions, tau_min, tau_max,
655
+ _int_pc_alpha, max_conds_dim,
656
+ max_combinations)
657
+
658
+ # Set the selected links
659
+ # _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max,
660
+ # remove_contemp=True)
661
+ _int_link_assumptions = self._set_link_assumptions(link_assumptions,
662
+ tau_min, tau_max, remove_contemp=True)
663
+
664
+ # Initialize all parents
665
+ all_parents = dict()
666
+ # Set the maximum condition dimension
667
+ max_conds_dim = self._set_max_condition_dim(max_conds_dim,
668
+ tau_min, tau_max)
669
+
670
+ # Loop through the selected variables
671
+ for j in range(self.N):
672
+ # Print the status of this variable
673
+ if self.verbosity > 1:
674
+ print("\n## Variable %s" % self.var_names[j])
675
+ print("\nIterating through pc_alpha = %s:" % _int_pc_alpha)
676
+ # Initialize the scores for selecting the optimal alpha
677
+ score = np.zeros_like(_int_pc_alpha)
678
+ # Initialize the result
679
+ results = {}
680
+ for iscore, pc_alpha_here in enumerate(_int_pc_alpha):
681
+ # Print statement about the pc_alpha being tested
682
+ if self.verbosity > 1:
683
+ print("\n# pc_alpha = %s (%d/%d):" % (pc_alpha_here,
684
+ iscore + 1,
685
+ score.shape[0]))
686
+ # Get the results for this alpha value
687
+ results[pc_alpha_here] = \
688
+ self._run_pc_stable_single(j,
689
+ link_assumptions_j=_int_link_assumptions[j],
690
+ tau_min=tau_min,
691
+ tau_max=tau_max,
692
+ save_iterations=save_iterations,
693
+ pc_alpha=pc_alpha_here,
694
+ max_conds_dim=max_conds_dim,
695
+ max_combinations=max_combinations)
696
+ # Figure out the best score if there is more than one pc_alpha
697
+ # value
698
+ if select_optimal_alpha:
699
+ score[iscore] = \
700
+ self.cond_ind_test.get_model_selection_criterion(
701
+ j, results[pc_alpha_here]['parents'], tau_max)
702
+ # Record the optimal alpha value
703
+ optimal_alpha = _int_pc_alpha[score.argmin()]
704
+ # Only print the selection results if there is more than one
705
+ # pc_alpha
706
+ if self.verbosity > 1 and select_optimal_alpha:
707
+ self._print_pc_sel_results(_int_pc_alpha, results, j,
708
+ score, optimal_alpha)
709
+ # Record the results for this variable
710
+ all_parents[j] = results[optimal_alpha]['parents']
711
+ val_min[j] = results[optimal_alpha]['val_min']
712
+ val_dict[j] = results[optimal_alpha]['val_dict']
713
+ pval_max[j] = results[optimal_alpha]['pval_max']
714
+ iterations[j] = results[optimal_alpha]['iterations']
715
+ # Only save the optimal alpha if there is more than one pc_alpha
716
+ if select_optimal_alpha:
717
+ iterations[j]['optimal_pc_alpha'] = optimal_alpha
718
+ # Save the results in the current status of the algorithm
719
+ self.all_parents = all_parents
720
+ self.val_matrix = self._dict_to_matrix(val_dict, tau_max, self.N,
721
+ default=0.)
722
+ self.p_matrix = self._dict_to_matrix(pval_max, tau_max, self.N,
723
+ default=1.)
724
+ self.iterations = iterations
725
+ self.val_min = val_min
726
+ self.pval_max = pval_max
727
+ # Print the results
728
+ if self.verbosity > 0:
729
+ print("\n## Resulting lagged parent (super)sets:")
730
+ self._print_parents(all_parents, val_min, pval_max)
731
+ # Return the parents
732
+ return all_parents
733
+
734
+ def _print_parents_single(self, j, parents, val_min, pval_max):
735
+ """Print current parents for variable j.
736
+
737
+ Parameters
738
+ ----------
739
+ j : int
740
+ Index of current variable.
741
+ parents : list
742
+ List of form [(0, -1), (3, -2), ...].
743
+ val_min : dict
744
+ Dictionary of form {(0, -1):float, ...} containing the minimum absolute
745
+ test statistic value of a link.
746
+ pval_max : dict
747
+ Dictionary of form {(0, -1):float, ...} containing the maximum
748
+ p-value of a link across different conditions.
749
+ """
750
+ if len(parents) < 20 or hasattr(self, 'iterations'):
751
+ print("\n Variable %s has %d link(s):" % (
752
+ self.var_names[j], len(parents)))
753
+ if (hasattr(self, 'iterations')
754
+ and 'optimal_pc_alpha' in list(self.iterations[j])):
755
+ print(" [pc_alpha = %s]" % (
756
+ self.iterations[j]['optimal_pc_alpha']))
757
+ if val_min is None or pval_max is None:
758
+ for p in parents:
759
+ print(" (%s % .d)" % (
760
+ self.var_names[p[0]], p[1]))
761
+ else:
762
+ for p in parents:
763
+ print(" (%s % .d): max_pval = %.5f, |min_val| = % .3f" % (
764
+ self.var_names[p[0]], p[1], pval_max[p],
765
+ abs(val_min[p])))
766
+ else:
767
+ print("\n Variable %s has %d link(s):" % (
768
+ self.var_names[j], len(parents)))
769
+
770
+ def _print_parents(self, all_parents, val_min, pval_max):
771
+ """Print current parents.
772
+
773
+ Parameters
774
+ ----------
775
+ all_parents : dictionary
776
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
777
+ the conditioning-parents estimated with PC algorithm.
778
+ val_min : dict
779
+ Dictionary of form {0:{(0, -1):float, ...}} containing the minimum
780
+ absolute test statistic value of a link.
781
+ pval_max : dict
782
+ Dictionary of form {0:{(0, -1):float, ...}} containing the maximum
783
+ p-value of a link across different conditions.
784
+ """
785
+ for j in [var for var in list(all_parents)]:
786
+ if val_min is None or pval_max is None:
787
+ self._print_parents_single(j, all_parents[j],
788
+ None, None)
789
+ else:
790
+ self._print_parents_single(j, all_parents[j],
791
+ val_min[j], pval_max[j])
792
+
793
+ def _mci_condition_to_string(self, conds):
794
+ """Convert the list of conditions into a string.
795
+
796
+ Parameters
797
+ ----------
798
+ conds : list
799
+ List of conditions.
800
+ """
801
+ cond_string = "[ "
802
+ for k, tau_k in conds:
803
+ cond_string += "(%s % d) " % (self.var_names[k], tau_k)
804
+ cond_string += "]"
805
+ return cond_string
806
+
807
+ def _print_mci_conditions(self, conds_y, conds_x_lagged,
808
+ j, i, tau, count, n_parents):
809
+ """Print information about the conditions for the MCI algorithm.
810
+
811
+ Parameters
812
+ ----------
813
+ conds_y : list
814
+ Conditions on node.
815
+ conds_x_lagged : list
816
+ Conditions on parent.
817
+ j : int
818
+ Current node.
819
+ i : int
820
+ Parent node.
821
+ tau : int
822
+ Parent time delay.
823
+ count : int
824
+ Index of current parent.
825
+ n_parents : int
826
+ Total number of parents.
827
+ """
828
+ # Remove the current parent from the conditions
829
+ conds_y_no_i = [node for node in conds_y if node != (i, tau)]
830
+ # Get the condition string for parent
831
+ condy_str = self._mci_condition_to_string(conds_y_no_i)
832
+ # Get the condition string for node
833
+ condx_str = self._mci_condition_to_string(conds_x_lagged)
834
+ # Formate and print the information
835
+ link_marker = {True:"o?o", False:"-?>"}
836
+ indent = "\n "
837
+ print_str = indent + "link (%s % d) " % (self.var_names[i], tau)
838
+ print_str += "%s %s (%d/%d):" % (link_marker[tau==0],
839
+ self.var_names[j], count + 1, n_parents)
840
+ print_str += indent + "with conds_y = %s" % (condy_str)
841
+ print_str += indent + "with conds_x = %s" % (condx_str)
842
+ print(print_str)
843
+
844
+ def _print_pcmciplus_conditions(self, lagged_parents, i, j, abstau,
845
+ max_conds_py, max_conds_px,
846
+ max_conds_px_lagged):
847
+ """Print information about the conditions for PCMCIplus.
848
+
849
+ Parameters
850
+ ----------
851
+ lagged_parents : dictionary of lists
852
+ Dictionary of lagged parents for each node.
853
+ j : int
854
+ Current node.
855
+ i : int
856
+ Parent node.
857
+ abstau : int
858
+ Parent time delay.
859
+ max_conds_py : int
860
+ Max number of parents for node j.
861
+ max_conds_px : int
862
+ Max number of parents for lagged node i.
863
+ max_conds_px_lagged : int
864
+ Maximum number of lagged conditions of X when X is lagged in MCI
865
+ tests. If None is passed, this number is equal to max_conds_px.
866
+ """
867
+ conds_y = lagged_parents[j][:max_conds_py]
868
+ conds_y_no_i = [node for node in conds_y if node != (i, -abstau)]
869
+ if abstau == 0:
870
+ conds_x = lagged_parents[i][:max_conds_px]
871
+ else:
872
+ if max_conds_px_lagged is None:
873
+ conds_x = lagged_parents[i][:max_conds_px]
874
+ else:
875
+ conds_x = lagged_parents[i][:max_conds_px_lagged]
876
+
877
+ # Shift the conditions for X by tau
878
+ conds_x_lagged = [(k, -abstau + k_tau) for k, k_tau in conds_x]
879
+ condy_str = self._mci_condition_to_string(conds_y_no_i)
880
+ condx_str = self._mci_condition_to_string(conds_x_lagged)
881
+ print_str = " with conds_y = %s" % (condy_str)
882
+ print_str += "\n with conds_x = %s" % (condx_str)
883
+ print(print_str)
884
+
885
+ def _get_int_parents(self, parents):
886
+ """Get the input parents dictionary.
887
+
888
+ Parameters
889
+ ----------
890
+ parents : dict or None
891
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
892
+ specifying the conditions for each variable. If None is
893
+ passed, no conditions are used.
894
+
895
+ Returns
896
+ -------
897
+ int_parents : defaultdict of lists
898
+ Internal copy of parents, respecting default options
899
+ """
900
+ int_parents = deepcopy(parents)
901
+ if int_parents is None:
902
+ int_parents = defaultdict(list)
903
+ else:
904
+ int_parents = defaultdict(list, int_parents)
905
+ return int_parents
906
+
907
+ def _iter_indep_conds(self,
908
+ parents,
909
+ _int_link_assumptions,
910
+ max_conds_py,
911
+ max_conds_px):
912
+ """Iterate through the conditions dictated by the arguments, yielding
913
+ the needed arguments for conditional independence functions.
914
+
915
+ Parameters
916
+ ----------
917
+ parents : dict
918
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
919
+ specifying the conditions for each variable.
920
+ _int_link_assumptions : dict
921
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
922
+ assumptions about links. This initializes the graph with entries
923
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
924
+ implies that a directed link from i to j at lag 0 must exist.
925
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
926
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
927
+ may not exist, but if it exists, its orientation is '-->'. Link
928
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
929
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
930
+ does not appear in the dictionary, it is assumed absent. That is,
931
+ if link_assumptions is not None, then all links have to be specified
932
+ or the links are assumed absent.
933
+ max_conds_py : int
934
+ Maximum number of conditions of Y to use.
935
+ max_conds_px : int
936
+ Maximum number of conditions of Z to use.
937
+
938
+ Yields
939
+ ------
940
+ i, j, tau, Z : list of tuples
941
+ (i, tau) is the parent node, (j, 0) is the current node, and Z is of
942
+ the form [(var, tau + tau')] and specifies the condition to test
943
+ """
944
+ # Loop over the selected variables
945
+ for j in range(self.N):
946
+ # Get the conditions for node j
947
+ conds_y = parents[j][:max_conds_py]
948
+ # Create a parent list from links seperated in time and by node
949
+ # parent_list = [(i, tau) for i, tau in _int_link_assumptions[j]
950
+ # if (i, tau) != (j, 0)]
951
+ parent_list = []
952
+ for itau in _int_link_assumptions[j]:
953
+ link_type = _int_link_assumptions[j][itau]
954
+ if itau != (j, 0) and link_type not in ['<--', '<?-']:
955
+ parent_list.append(itau)
956
+ # Iterate through parents (except those in conditions)
957
+ for cnt, (i, tau) in enumerate(parent_list):
958
+ # Get the conditions for node i
959
+ conds_x = parents[i][:max_conds_px]
960
+ # Shift the conditions for X by tau
961
+ conds_x_lagged = [(k, tau + k_tau) for k, k_tau in conds_x]
962
+ # Print information about the mci conditions if requested
963
+ if self.verbosity > 1:
964
+ self._print_mci_conditions(conds_y, conds_x_lagged, j, i,
965
+ tau, cnt, len(parent_list))
966
+ # Construct lists of tuples for estimating
967
+ # I(X_t-tau; Y_t | Z^Y_t, Z^X_t-tau)
968
+ # with conditions for X shifted by tau
969
+ Z = [node for node in conds_y if node != (i, tau)]
970
+ # Remove overlapped nodes between conds_x_lagged and conds_y
971
+ Z += [node for node in conds_x_lagged if node not in Z]
972
+ # Yield these list
973
+ yield j, i, tau, Z
974
+
975
+ def _run_mci_or_variants(self,
976
+ selected_links=None,
977
+ link_assumptions=None,
978
+ tau_min=0,
979
+ tau_max=1,
980
+ parents=None,
981
+ max_conds_py=None,
982
+ max_conds_px=None,
983
+ val_only=False,
984
+ alpha_level=0.05,
985
+ fdr_method='none'):
986
+ """Base function for MCI method and variants.
987
+
988
+ Returns the matrices of test statistic values, (optionally corrected)
989
+ p-values, and (optionally) confidence intervals. Also (new in 4.3)
990
+ returns graph based on alpha_level (and optional FDR-correction).
991
+
992
+ Parameters
993
+ ----------
994
+ selected_links : dict or None
995
+ Deprecated, replaced by link_assumptions
996
+ link_assumptions : dict
997
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
998
+ assumptions about links. This initializes the graph with entries
999
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
1000
+ implies that a directed link from i to j at lag 0 must exist.
1001
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
1002
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
1003
+ may not exist, but if it exists, its orientation is '-->'. Link
1004
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
1005
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
1006
+ does not appear in the dictionary, it is assumed absent. That is,
1007
+ if link_assumptions is not None, then all links have to be specified
1008
+ or the links are assumed absent.
1009
+ tau_min : int, default: 0
1010
+ Minimum time lag to test. Note that zero-lags are undirected.
1011
+ tau_max : int, default: 1
1012
+ Maximum time lag. Must be larger or equal to tau_min.
1013
+ parents : dict or None
1014
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
1015
+ specifying the conditions for each variable. If None is
1016
+ passed, no conditions are used.
1017
+ max_conds_py : int or None
1018
+ Maximum number of conditions of Y to use. If None is passed, this
1019
+ number is unrestricted.
1020
+ max_conds_px : int or None
1021
+ Maximum number of conditions of Z to use. If None is passed, this
1022
+ number is unrestricted.
1023
+ val_only : bool, default: False
1024
+ Option to only compute dependencies and not p-values.
1025
+ alpha_level : float, optional (default: 0.05)
1026
+ Significance level at which the p_matrix is thresholded to
1027
+ get graph.
1028
+ fdr_method : str, optional (default: 'none')
1029
+ Correction method, currently implemented is Benjamini-Hochberg
1030
+ False Discovery Rate method ('fdr_bh').
1031
+
1032
+ Returns
1033
+ -------
1034
+ graph : array of shape [N, N, tau_max+1]
1035
+ Causal graph, see description above for interpretation.
1036
+ val_matrix : array of shape [N, N, tau_max+1]
1037
+ Estimated matrix of test statistic values.
1038
+ p_matrix : array of shape [N, N, tau_max+1]
1039
+ Estimated matrix of p-values, optionally adjusted if fdr_method is
1040
+ not 'none'.
1041
+ conf_matrix : array of shape [N, N, tau_max+1,2]
1042
+ Estimated matrix of confidence intervals of test statistic values.
1043
+ Only computed if set in cond_ind_test, where also the percentiles
1044
+ are set.
1045
+ """
1046
+ if selected_links is not None:
1047
+ raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
1048
+
1049
+ # Check the limits on tau
1050
+ self._check_tau_limits(tau_min, tau_max)
1051
+ # Set the selected links
1052
+ # _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max)
1053
+ _int_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max)
1054
+
1055
+ # Set the maximum condition dimension for Y and X
1056
+ max_conds_py = self._set_max_condition_dim(max_conds_py,
1057
+ tau_min, tau_max)
1058
+ max_conds_px = self._set_max_condition_dim(max_conds_px,
1059
+ tau_min, tau_max)
1060
+ # Get the parents that will be checked
1061
+ _int_parents = self._get_int_parents(parents)
1062
+ # Initialize the return values
1063
+ val_matrix = np.zeros((self.N, self.N, tau_max + 1))
1064
+ p_matrix = np.ones((self.N, self.N, tau_max + 1))
1065
+ # Initialize the optional return of the confidance matrix
1066
+ conf_matrix = None
1067
+ if self.cond_ind_test.confidence is not None:
1068
+ conf_matrix = np.zeros((self.N, self.N, tau_max + 1, 2))
1069
+
1070
+ # Get the conditions as implied by the input arguments
1071
+ for j, i, tau, Z in self._iter_indep_conds(_int_parents,
1072
+ _int_link_assumptions,
1073
+ max_conds_py,
1074
+ max_conds_px):
1075
+ # Set X and Y (for clarity of code)
1076
+ X = [(i, tau)]
1077
+ Y = [(j, 0)]
1078
+
1079
+ if val_only is False:
1080
+ # Run the independence tests and record the results
1081
+ if ((i, -abs(tau)) in _int_link_assumptions[j]
1082
+ and _int_link_assumptions[j][(i, -abs(tau))] in ['-->', 'o-o']):
1083
+ val = 1.
1084
+ pval = 0.
1085
+ else:
1086
+ val, pval, _ = self.cond_ind_test.run_test(X, Y, Z=Z,
1087
+ tau_max=tau_max,
1088
+ alpha_or_thres=alpha_level,
1089
+ )
1090
+ val_matrix[i, j, abs(tau)] = val
1091
+ p_matrix[i, j, abs(tau)] = pval
1092
+ else:
1093
+ val = self.cond_ind_test.get_measure(X, Y, Z=Z, tau_max=tau_max)
1094
+ val_matrix[i, j, abs(tau)] = val
1095
+
1096
+ # Get the confidence value, returns None if cond_ind_test.confidence
1097
+ # is False
1098
+ conf = self.cond_ind_test.get_confidence(X, Y, Z=Z, tau_max=tau_max)
1099
+ # Record the value if the conditional independence requires it
1100
+ if self.cond_ind_test.confidence:
1101
+ conf_matrix[i, j, abs(tau)] = conf
1102
+
1103
+ if val_only:
1104
+ results = {'val_matrix':val_matrix,
1105
+ 'conf_matrix':conf_matrix}
1106
+ self.results = results
1107
+ return results
1108
+
1109
+ # Correct the p_matrix if there is a fdr_method
1110
+ if fdr_method != 'none':
1111
+ if self.cond_ind_test.significance == 'fixed_thres':
1112
+ raise ValueError("FDR-correction not compatible with significance == 'fixed_thres'")
1113
+ p_matrix = self.get_corrected_pvalues(p_matrix=p_matrix, tau_min=tau_min,
1114
+ tau_max=tau_max,
1115
+ link_assumptions=_int_link_assumptions,
1116
+ fdr_method=fdr_method)
1117
+
1118
+ # Threshold p_matrix to get graph (or val_matrix for significance == 'fixed_thres')
1119
+ if self.cond_ind_test.significance == 'fixed_thres':
1120
+ if self.cond_ind_test.two_sided:
1121
+ final_graph = np.abs(val_matrix) >= np.abs(alpha_level)
1122
+ else:
1123
+ final_graph = val_matrix >= alpha_level
1124
+ else:
1125
+ final_graph = p_matrix <= alpha_level
1126
+
1127
+ # Convert to string graph representation
1128
+ graph = self.convert_to_string_graph(final_graph)
1129
+
1130
+ # Symmetrize p_matrix and val_matrix
1131
+ symmetrized_results = self.symmetrize_p_and_val_matrix(
1132
+ p_matrix=p_matrix,
1133
+ val_matrix=val_matrix,
1134
+ link_assumptions=_int_link_assumptions,
1135
+ conf_matrix=conf_matrix)
1136
+
1137
+ if self.verbosity > 0:
1138
+ self.print_significant_links(
1139
+ graph = graph,
1140
+ p_matrix = symmetrized_results['p_matrix'],
1141
+ val_matrix = symmetrized_results['val_matrix'],
1142
+ conf_matrix = symmetrized_results['conf_matrix'],
1143
+ alpha_level = alpha_level)
1144
+
1145
+ # Return the values as a dictionary and store in class
1146
+ results = {
1147
+ 'graph': graph,
1148
+ 'p_matrix': symmetrized_results['p_matrix'],
1149
+ 'val_matrix': symmetrized_results['val_matrix'],
1150
+ 'conf_matrix': symmetrized_results['conf_matrix'],
1151
+ }
1152
+ self.results = results
1153
+ return results
1154
+
1155
+ def run_mci(self,
1156
+ selected_links=None,
1157
+ link_assumptions=None,
1158
+ tau_min=0,
1159
+ tau_max=1,
1160
+ parents=None,
1161
+ max_conds_py=None,
1162
+ max_conds_px=None,
1163
+ val_only=False,
1164
+ alpha_level=0.05,
1165
+ fdr_method='none'):
1166
+ """MCI conditional independence tests.
1167
+
1168
+ Implements the MCI test (Algorithm 2 in [1]_).
1169
+
1170
+ Returns the matrices of test statistic values, (optionally corrected)
1171
+ p-values, and (optionally) confidence intervals. Also (new in 4.3)
1172
+ returns graph based on alpha_level (and optional FDR-correction).
1173
+
1174
+ Parameters
1175
+ ----------
1176
+ selected_links : dict or None
1177
+ Deprecated, replaced by link_assumptions
1178
+ link_assumptions : dict
1179
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
1180
+ assumptions about links. This initializes the graph with entries
1181
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
1182
+ implies that a directed link from i to j at lag 0 must exist.
1183
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
1184
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
1185
+ may not exist, but if it exists, its orientation is '-->'. Link
1186
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
1187
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
1188
+ does not appear in the dictionary, it is assumed absent. That is,
1189
+ if link_assumptions is not None, then all links have to be specified
1190
+ or the links are assumed absent.
1191
+ tau_min : int, default: 0
1192
+ Minimum time lag to test. Note that zero-lags are undirected.
1193
+ tau_max : int, default: 1
1194
+ Maximum time lag. Must be larger or equal to tau_min.
1195
+ parents : dict or None
1196
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
1197
+ specifying the conditions for each variable. If None is
1198
+ passed, no conditions are used.
1199
+ max_conds_py : int or None
1200
+ Maximum number of conditions of Y to use. If None is passed, this
1201
+ number is unrestricted.
1202
+ max_conds_px : int or None
1203
+ Maximum number of conditions of Z to use. If None is passed, this
1204
+ number is unrestricted.
1205
+ val_only : bool, default: False
1206
+ Option to only compute dependencies and not p-values.
1207
+ alpha_level : float, optional (default: 0.05)
1208
+ Significance level at which the p_matrix is thresholded to
1209
+ get graph.
1210
+ fdr_method : str, optional (default: 'none')
1211
+ Correction method, currently implemented is Benjamini-Hochberg
1212
+ False Discovery Rate method ('fdr_bh').
1213
+
1214
+ Returns
1215
+ -------
1216
+ graph : array of shape [N, N, tau_max+1]
1217
+ Causal graph, see description above for interpretation.
1218
+ val_matrix : array of shape [N, N, tau_max+1]
1219
+ Estimated matrix of test statistic values.
1220
+ p_matrix : array of shape [N, N, tau_max+1]
1221
+ Estimated matrix of p-values, optionally adjusted if fdr_method is
1222
+ not 'none'.
1223
+ conf_matrix : array of shape [N, N, tau_max+1,2]
1224
+ Estimated matrix of confidence intervals of test statistic values.
1225
+ Only computed if set in cond_ind_test, where also the percentiles
1226
+ are set.
1227
+ """
1228
+
1229
+ if selected_links is not None:
1230
+ raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
1231
+
1232
+
1233
+ if self.verbosity > 0:
1234
+ print("\n##\n## Step 2: MCI algorithm\n##"
1235
+ "\n\nParameters:")
1236
+ print("\nindependence test = %s" % self.cond_ind_test.measure
1237
+ + "\ntau_min = %d" % tau_min
1238
+ + "\ntau_max = %d" % tau_max
1239
+ + "\nmax_conds_py = %s" % max_conds_py
1240
+ + "\nmax_conds_px = %s" % max_conds_px)
1241
+
1242
+ return self._run_mci_or_variants(
1243
+ link_assumptions=link_assumptions,
1244
+ tau_min=tau_min,
1245
+ tau_max=tau_max,
1246
+ parents=parents,
1247
+ max_conds_py=max_conds_py,
1248
+ max_conds_px=max_conds_px,
1249
+ val_only=val_only,
1250
+ alpha_level=alpha_level,
1251
+ fdr_method=fdr_method)
1252
+
1253
+ def get_lagged_dependencies(self,
1254
+ selected_links=None,
1255
+ link_assumptions=None,
1256
+ tau_min=0,
1257
+ tau_max=1,
1258
+ val_only=False,
1259
+ alpha_level=0.05,
1260
+ fdr_method='none'):
1261
+ """Unconditional lagged independence tests.
1262
+
1263
+ Implements the unconditional lagged independence test (see [ 1]_).
1264
+
1265
+ Returns the matrices of test statistic values, (optionally corrected)
1266
+ p-values, and (optionally) confidence intervals. Also (new in 4.3)
1267
+ returns graph based on alpha_level (and optional FDR-correction).
1268
+
1269
+ Parameters
1270
+ ----------
1271
+ selected_links : dict or None
1272
+ Deprecated, replaced by link_assumptions
1273
+ link_assumptions : dict
1274
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
1275
+ assumptions about links. This initializes the graph with entries
1276
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
1277
+ implies that a directed link from i to j at lag 0 must exist.
1278
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
1279
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
1280
+ may not exist, but if it exists, its orientation is '-->'. Link
1281
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
1282
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
1283
+ does not appear in the dictionary, it is assumed absent. That is,
1284
+ if link_assumptions is not None, then all links have to be specified
1285
+ or the links are assumed absent.
1286
+ tau_min : int, default: 0
1287
+ Minimum time lag to test. Note that zero-lags are undirected.
1288
+ tau_max : int, default: 1
1289
+ Maximum time lag. Must be larger or equal to tau_min.
1290
+ val_only : bool, default: False
1291
+ Option to only compute dependencies and not p-values.
1292
+ alpha_level : float, optional (default: 0.05)
1293
+ Significance level at which the p_matrix is thresholded to
1294
+ get graph.
1295
+ fdr_method : str, optional (default: 'none')
1296
+ Correction method, currently implemented is Benjamini-Hochberg
1297
+ False Discovery Rate method ('fdr_bh').
1298
+
1299
+ Returns
1300
+ -------
1301
+ graph : array of shape [N, N, tau_max+1]
1302
+ Causal graph, see description above for interpretation.
1303
+ val_matrix : array of shape [N, N, tau_max+1]
1304
+ Estimated matrix of test statistic values.
1305
+ p_matrix : array of shape [N, N, tau_max+1]
1306
+ Estimated matrix of p-values, optionally adjusted if fdr_method is
1307
+ not 'none'.
1308
+ conf_matrix : array of shape [N, N, tau_max+1,2]
1309
+ Estimated matrix of confidence intervals of test statistic values.
1310
+ Only computed if set in cond_ind_test, where also the percentiles
1311
+ are set.
1312
+ """
1313
+
1314
+ if selected_links is not None:
1315
+ raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
1316
+
1317
+ if self.verbosity > 0:
1318
+ print("\n##\n## Estimating lagged dependencies \n##"
1319
+ "\n\nParameters:")
1320
+ print("\nindependence test = %s" % self.cond_ind_test.measure
1321
+ + "\ntau_min = %d" % tau_min
1322
+ + "\ntau_max = %d" % tau_max)
1323
+
1324
+ return self._run_mci_or_variants(
1325
+ link_assumptions=link_assumptions,
1326
+ tau_min=tau_min,
1327
+ tau_max=tau_max,
1328
+ parents=None,
1329
+ max_conds_py=0,
1330
+ max_conds_px=0,
1331
+ val_only=val_only,
1332
+ alpha_level=alpha_level,
1333
+ fdr_method=fdr_method)
1334
+
1335
+ def run_fullci(self,
1336
+ selected_links=None,
1337
+ link_assumptions=None,
1338
+ tau_min=0,
1339
+ tau_max=1,
1340
+ val_only=False,
1341
+ alpha_level=0.05,
1342
+ fdr_method='none'):
1343
+ """FullCI conditional independence tests.
1344
+
1345
+ Implements the FullCI test (see [1]_).
1346
+
1347
+ Returns the matrices of test statistic values, (optionally corrected)
1348
+ p-values, and (optionally) confidence intervals. Also (new in 4.3)
1349
+ returns graph based on alpha_level (and optional FDR-correction).
1350
+
1351
+ Parameters
1352
+ ----------
1353
+ selected_links : dict or None
1354
+ Deprecated, replaced by link_assumptions
1355
+ link_assumptions : dict
1356
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
1357
+ assumptions about links. This initializes the graph with entries
1358
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
1359
+ implies that a directed link from i to j at lag 0 must exist.
1360
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
1361
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
1362
+ may not exist, but if it exists, its orientation is '-->'. Link
1363
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
1364
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
1365
+ does not appear in the dictionary, it is assumed absent. That is,
1366
+ if link_assumptions is not None, then all links have to be specified
1367
+ or the links are assumed absent.
1368
+ tau_min : int, default: 0
1369
+ Minimum time lag to test. Note that zero-lags are undirected.
1370
+ tau_max : int, default: 1
1371
+ Maximum time lag. Must be larger or equal to tau_min.
1372
+ val_only : bool, default: False
1373
+ Option to only compute dependencies and not p-values.
1374
+ alpha_level : float, optional (default: 0.05)
1375
+ Significance level at which the p_matrix is thresholded to
1376
+ get graph.
1377
+ fdr_method : str, optional (default: 'none')
1378
+ Correction method, currently implemented is Benjamini-Hochberg
1379
+ False Discovery Rate method ('fdr_bh').
1380
+
1381
+ Returns
1382
+ -------
1383
+ graph : array of shape [N, N, tau_max+1]
1384
+ Causal graph, see description above for interpretation.
1385
+ val_matrix : array of shape [N, N, tau_max+1]
1386
+ Estimated matrix of test statistic values.
1387
+ p_matrix : array of shape [N, N, tau_max+1]
1388
+ Estimated matrix of p-values, optionally adjusted if fdr_method is
1389
+ not 'none'.
1390
+ conf_matrix : array of shape [N, N, tau_max+1,2]
1391
+ Estimated matrix of confidence intervals of test statistic values.
1392
+ Only computed if set in cond_ind_test, where also the percentiles
1393
+ are set.
1394
+ """
1395
+
1396
+ if selected_links is not None:
1397
+ raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
1398
+
1399
+
1400
+ if self.verbosity > 0:
1401
+ print("\n##\n## Running Tigramite FullCI algorithm\n##"
1402
+ "\n\nParameters:")
1403
+ print("\nindependence test = %s" % self.cond_ind_test.measure
1404
+ + "\ntau_min = %d" % tau_min
1405
+ + "\ntau_max = %d" % tau_max)
1406
+
1407
+ full_past = dict([(j, [(i, -tau)
1408
+ for i in range(self.N)
1409
+ for tau in range(max(1, tau_min), tau_max + 1)])
1410
+ for j in range(self.N)])
1411
+
1412
+ return self._run_mci_or_variants(
1413
+ link_assumptions=link_assumptions,
1414
+ tau_min=tau_min,
1415
+ tau_max=tau_max,
1416
+ parents=full_past,
1417
+ max_conds_py=None,
1418
+ max_conds_px=0,
1419
+ val_only=val_only,
1420
+ alpha_level=alpha_level,
1421
+ fdr_method=fdr_method)
1422
+
1423
+ def run_bivci(self,
1424
+ selected_links=None,
1425
+ link_assumptions=None,
1426
+ tau_min=0,
1427
+ tau_max=1,
1428
+ val_only=False,
1429
+ alpha_level=0.05,
1430
+ fdr_method='none'):
1431
+ """BivCI conditional independence tests.
1432
+
1433
+ Implements the BivCI test (see [1]_).
1434
+
1435
+ Returns the matrices of test statistic values, (optionally corrected)
1436
+ p-values, and (optionally) confidence intervals. Also (new in 4.3)
1437
+ returns graph based on alpha_level (and optional FDR-correction).
1438
+
1439
+ Parameters
1440
+ ----------
1441
+ selected_links : dict or None
1442
+ Deprecated, replaced by link_assumptions
1443
+ link_assumptions : dict
1444
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
1445
+ assumptions about links. This initializes the graph with entries
1446
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
1447
+ implies that a directed link from i to j at lag 0 must exist.
1448
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
1449
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
1450
+ may not exist, but if it exists, its orientation is '-->'. Link
1451
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
1452
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
1453
+ does not appear in the dictionary, it is assumed absent. That is,
1454
+ if link_assumptions is not None, then all links have to be specified
1455
+ or the links are assumed absent.
1456
+ tau_min : int, default: 0
1457
+ Minimum time lag to test. Note that zero-lags are undirected.
1458
+ tau_max : int, default: 1
1459
+ Maximum time lag. Must be larger or equal to tau_min.
1460
+ val_only : bool, default: False
1461
+ Option to only compute dependencies and not p-values.
1462
+ alpha_level : float, optional (default: 0.05)
1463
+ Significance level at which the p_matrix is thresholded to
1464
+ get graph.
1465
+ fdr_method : str, optional (default: 'fdr_bh')
1466
+ Correction method, currently implemented is Benjamini-Hochberg
1467
+ False Discovery Rate method.
1468
+
1469
+ Returns
1470
+ -------
1471
+ graph : array of shape [N, N, tau_max+1]
1472
+ Causal graph, see description above for interpretation.
1473
+ val_matrix : array of shape [N, N, tau_max+1]
1474
+ Estimated matrix of test statistic values.
1475
+ p_matrix : array of shape [N, N, tau_max+1]
1476
+ Estimated matrix of p-values, optionally adjusted if fdr_method is
1477
+ not 'none'.
1478
+ conf_matrix : array of shape [N, N, tau_max+1,2]
1479
+ Estimated matrix of confidence intervals of test statistic values.
1480
+ Only computed if set in cond_ind_test, where also the percentiles
1481
+ are set.
1482
+ """
1483
+
1484
+ if selected_links is not None:
1485
+ raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
1486
+
1487
+ if self.verbosity > 0:
1488
+ print("\n##\n## Running Tigramite BivCI algorithm\n##"
1489
+ "\n\nParameters:")
1490
+ print("\nindependence test = %s" % self.cond_ind_test.measure
1491
+ + "\ntau_min = %d" % tau_min
1492
+ + "\ntau_max = %d" % tau_max)
1493
+
1494
+ auto_past = dict([(j, [(j, -tau)
1495
+ for tau in range(max(1, tau_min), tau_max + 1)])
1496
+ for j in range(self.N)])
1497
+
1498
+ return self._run_mci_or_variants(
1499
+ link_assumptions=link_assumptions,
1500
+ tau_min=tau_min,
1501
+ tau_max=tau_max,
1502
+ parents=auto_past,
1503
+ max_conds_py=None,
1504
+ max_conds_px=0,
1505
+ val_only=val_only,
1506
+ alpha_level=alpha_level,
1507
+ fdr_method=fdr_method)
1508
+
1509
+ def get_graph_from_pmatrix(self, p_matrix, alpha_level,
1510
+ tau_min, tau_max, link_assumptions=None):
1511
+ """Construct graph from thresholding the p_matrix at an alpha-level.
1512
+
1513
+ Allows to take into account link_assumptions.
1514
+
1515
+ Parameters
1516
+ ----------
1517
+ p_matrix : array of shape [N, N, tau_max+1]
1518
+ Estimated matrix of p-values, optionally adjusted if fdr_method is
1519
+ not 'none'.
1520
+ alpha_level : float, optional (default: 0.05)
1521
+ Significance level at which the p_matrix is thresholded to
1522
+ get graph.
1523
+ tau_mix : int
1524
+ Minimum time delay to test.
1525
+ tau_max : int
1526
+ Maximum time delay to test.
1527
+ link_assumptions : dict or None
1528
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
1529
+ assumptions about links. This initializes the graph with entries
1530
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
1531
+ implies that a directed link from i to j at lag 0 must exist.
1532
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
1533
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
1534
+ may not exist, but if it exists, its orientation is '-->'. Link
1535
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
1536
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
1537
+ does not appear in the dictionary, it is assumed absent. That is,
1538
+ if link_assumptions is not None, then all links have to be specified
1539
+ or the links are assumed absent.
1540
+ Returns
1541
+ -------
1542
+ graph : array of shape [N, N, tau_max+1]
1543
+ Causal graph, see description above for interpretation.
1544
+ """
1545
+
1546
+ # _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max)
1547
+ _int_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max)
1548
+
1549
+ if link_assumptions != None:
1550
+ # Create a mask for these values
1551
+ mask = np.zeros((self.N, self.N, tau_max + 1), dtype='bool')
1552
+ # for node1, links_ in _int_sel_links.items():
1553
+ # for node2, lag in links_:
1554
+ # mask[node2, node1, abs(lag)] = True
1555
+ for j, links_ in _int_link_assumptions.items():
1556
+ for i, lag in links_:
1557
+ if _int_link_assumptions[j][(i, lag)] not in ["<--", "<?-"]:
1558
+ mask[i, j, abs(lag)] = True
1559
+
1560
+ else:
1561
+ # Create a mask for these values
1562
+ mask = np.ones((self.N, self.N, tau_max + 1), dtype='bool')
1563
+
1564
+ # Set all p-values of absent links to 1.
1565
+ p_matrix[mask==False] == 1.
1566
+
1567
+ # Threshold p_matrix to get graph
1568
+ graph_bool = p_matrix <= alpha_level
1569
+
1570
+ # Convert to string graph representation
1571
+ graph = self.convert_to_string_graph(graph_bool)
1572
+
1573
+ # Return the graph
1574
+ return graph
1575
+
1576
+ def return_parents_dict(self, graph,
1577
+ val_matrix,
1578
+ include_lagzero_parents=False):
1579
+ """Returns dictionary of parents sorted by val_matrix.
1580
+
1581
+ If parents are unclear (edgemarks with 'o' or 'x', or middle mark '?'),
1582
+ then no parent is returned.
1583
+
1584
+ Parameters
1585
+ ----------
1586
+ graph : array of shape [N, N, tau_max+1]
1587
+ Causal graph, see description above for interpretation.
1588
+ val_matrix : array-like
1589
+ Matrix of test statistic values. Must be of shape (N, N, tau_max +
1590
+ 1).
1591
+ include_lagzero_parents : bool (default: False)
1592
+ Whether the dictionary should also return parents at lag
1593
+ zero.
1594
+
1595
+ Returns
1596
+ -------
1597
+ parents_dict : dict
1598
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
1599
+ containing estimated parents.
1600
+ """
1601
+
1602
+ # Initialize the return value
1603
+ parents_dict = dict()
1604
+ for j in range(self.N):
1605
+ # Get the good links
1606
+ if include_lagzero_parents:
1607
+ good_links = np.argwhere(graph[:, j, :] == "-->")
1608
+ # Build a dictionary from these links to their values
1609
+ links = {(i, -tau): np.abs(val_matrix[i, j, abs(tau)])
1610
+ for i, tau in good_links}
1611
+ else:
1612
+ good_links = np.argwhere(graph[:, j, 1:] == "-->")
1613
+ # Build a dictionary from these links to their values
1614
+ links = {(i, -tau - 1): np.abs(val_matrix[i, j, abs(tau) + 1])
1615
+ for i, tau in good_links}
1616
+ # Sort by value
1617
+ parents_dict[j] = sorted(links, key=links.get, reverse=True)
1618
+
1619
+ return parents_dict
1620
+
1621
+
1622
+ def return_significant_links(self, pq_matrix,
1623
+ val_matrix,
1624
+ alpha_level=0.05,
1625
+ include_lagzero_links=False):
1626
+ """Returns list of significant links as well as a boolean matrix.
1627
+
1628
+ DEPRECATED. Will be removed in future.
1629
+ """
1630
+ print("return_significant_links() is DEPRECATED: now run_pcmci(), "
1631
+ " run_mci()"
1632
+ " and all variants directly return the graph based on thresholding "
1633
+ "the p_matrix at alpha_level. The graph can also be updated "
1634
+ "based on a (potentially further adjusted) p_matrix using "
1635
+ "get_graph_from_pmatrix(). "
1636
+ "A dictionary of parents can be obtained "
1637
+ "with return_parents_dict().")
1638
+ return None
1639
+
1640
+ def print_significant_links(self,
1641
+ p_matrix,
1642
+ val_matrix,
1643
+ conf_matrix=None,
1644
+ graph=None,
1645
+ ambiguous_triples=None,
1646
+ alpha_level=0.05):
1647
+ """Prints significant links.
1648
+
1649
+ Used for output of PCMCI and PCMCIplus. For the latter also information
1650
+ on ambiguous links and conflicts is returned.
1651
+
1652
+ Parameters
1653
+ ----------
1654
+ alpha_level : float, optional (default: 0.05)
1655
+ Significance level.
1656
+ p_matrix : array-like
1657
+ Must be of shape (N, N, tau_max + 1).
1658
+ val_matrix : array-like
1659
+ Must be of shape (N, N, tau_max + 1).
1660
+ conf_matrix : array-like, optional (default: None)
1661
+ Matrix of confidence intervals of shape (N, N, tau_max+1, 2).
1662
+ graph : array-like
1663
+ Must be of shape (N, N, tau_max + 1).
1664
+ ambiguous_triples : list
1665
+ List of ambiguous triples.
1666
+ """
1667
+ if graph is not None:
1668
+ sig_links = (graph != "")*(graph != "<--")
1669
+ else:
1670
+ sig_links = (p_matrix <= alpha_level)
1671
+
1672
+ print("\n## Significant links at alpha = %s:" % alpha_level)
1673
+ for j in range(self.N):
1674
+ links = {(p[0], -p[1]): np.abs(val_matrix[p[0], j, abs(p[1])])
1675
+ for p in zip(*np.where(sig_links[:, j, :]))}
1676
+ # Sort by value
1677
+ sorted_links = sorted(links, key=links.get, reverse=True)
1678
+ n_links = len(links)
1679
+ string = ("\n Variable %s has %d "
1680
+ "link(s):" % (self.var_names[j], n_links))
1681
+ for p in sorted_links:
1682
+ string += ("\n (%s % d): pval = %.5f" %
1683
+ (self.var_names[p[0]], p[1],
1684
+ p_matrix[p[0], j, abs(p[1])]))
1685
+ string += " | val = % .3f" % (
1686
+ val_matrix[p[0], j, abs(p[1])])
1687
+ if conf_matrix is not None:
1688
+ string += " | conf = (%.3f, %.3f)" % (
1689
+ conf_matrix[p[0], j, abs(p[1])][0],
1690
+ conf_matrix[p[0], j, abs(p[1])][1])
1691
+ if graph is not None:
1692
+ if p[1] == 0 and graph[j, p[0], 0] == "o-o":
1693
+ string += " | unoriented link"
1694
+ if graph[p[0], j, abs(p[1])] == "x-x":
1695
+ string += " | unclear orientation due to conflict"
1696
+ print(string)
1697
+
1698
+ # link_marker = {True:"o-o", False:"-->"}
1699
+
1700
+ if ambiguous_triples is not None and len(ambiguous_triples) > 0:
1701
+ print("\n## Ambiguous triples (not used for orientation):\n")
1702
+ for triple in ambiguous_triples:
1703
+ (i, tau), k, j = triple
1704
+ print(" [(%s % d), %s, %s]" % (
1705
+ self.var_names[i], tau,
1706
+ self.var_names[k],
1707
+ self.var_names[j]))
1708
+
1709
+ def print_results(self,
1710
+ return_dict,
1711
+ alpha_level=0.05):
1712
+ """Prints significant parents from output of MCI or PCMCI algorithms.
1713
+
1714
+ Parameters
1715
+ ----------
1716
+ return_dict : dict
1717
+ Dictionary of return values, containing keys
1718
+ * 'p_matrix'
1719
+ * 'val_matrix'
1720
+ * 'conf_matrix'
1721
+
1722
+ alpha_level : float, optional (default: 0.05)
1723
+ Significance level.
1724
+ """
1725
+ # Check if conf_matrix is defined
1726
+ conf_matrix = None
1727
+ conf_key = 'conf_matrix'
1728
+ if conf_key in return_dict:
1729
+ conf_matrix = return_dict[conf_key]
1730
+ # Wrap the already defined function
1731
+ if 'graph' in return_dict:
1732
+ graph = return_dict['graph']
1733
+ else:
1734
+ graph = None
1735
+ if 'ambiguous_triples' in return_dict:
1736
+ ambiguous_triples = return_dict['ambiguous_triples']
1737
+ else:
1738
+ ambiguous_triples = None
1739
+ self.print_significant_links(return_dict['p_matrix'],
1740
+ return_dict['val_matrix'],
1741
+ conf_matrix=conf_matrix,
1742
+ graph=graph,
1743
+ ambiguous_triples=ambiguous_triples,
1744
+ alpha_level=alpha_level)
1745
+
1746
+ def run_pcmci(self,
1747
+ selected_links=None,
1748
+ link_assumptions=None,
1749
+ tau_min=0,
1750
+ tau_max=1,
1751
+ save_iterations=False,
1752
+ pc_alpha=0.2,
1753
+ max_conds_dim=None,
1754
+ max_combinations=1,
1755
+ max_conds_py=None,
1756
+ max_conds_px=None,
1757
+ alpha_level=0.05,
1758
+ fdr_method='none'):
1759
+ r"""Runs PCMCI time-lagged causal discovery for time series.
1760
+
1761
+ Wrapper around PC-algorithm function and MCI function.
1762
+
1763
+ Notes
1764
+ -----
1765
+
1766
+ The PCMCI causal discovery method is comprehensively described in [
1767
+ 1]_, where also analytical and numerical results are presented. Here
1768
+ we briefly summarize the method.
1769
+
1770
+ PCMCI estimates time-lagged causal links by a two-step procedure:
1771
+
1772
+ 1. Condition-selection: For each variable :math:`j`, estimate a
1773
+ *superset* of parents :math:`\\tilde{\mathcal{P}}(X^j_t)` with the
1774
+ iterative PC1 algorithm, implemented as ``run_pc_stable``. The
1775
+ condition-selection step reduces the dimensionality and avoids
1776
+ conditioning on irrelevant variables.
1777
+
1778
+ 2. *Momentary conditional independence* (MCI)
1779
+
1780
+ .. math:: X^i_{t-\\tau} \perp X^j_{t} | \\tilde{\\mathcal{P}}(
1781
+ X^j_t), \\tilde{\mathcal{P}}(X^i_{t-\\tau})
1782
+
1783
+ here implemented as ``run_mci``. This step estimates the p-values and
1784
+ test statistic values for all links accounting for common drivers,
1785
+ indirect links, and autocorrelation.
1786
+
1787
+ NOTE: MCI test statistic values define a particular measure of causal
1788
+ strength depending on the test statistic used. For example, ParCorr()
1789
+ results in normalized values between -1 and 1. However, if you are
1790
+ interested in quantifying causal effects, i.e., the effect of
1791
+ hypothetical interventions, you may better look at the causal effect
1792
+ estimation functionality of Tigramite.
1793
+
1794
+ PCMCI can be flexibly combined with any kind of conditional
1795
+ independence test statistic adapted to the kind of data (continuous
1796
+ or discrete) and its assumed dependency types. These are available in
1797
+ ``tigramite.independence_tests``.
1798
+
1799
+ The main free parameters of PCMCI (in addition to free parameters of
1800
+ the conditional independence test statistic) are the maximum time
1801
+ delay :math:`\\tau_{\\max}` (``tau_max``) and the significance
1802
+ threshold in the condition-selection step :math:`\\alpha` (
1803
+ ``pc_alpha``). The maximum time delay depends on the application and
1804
+ should be chosen according to the maximum causal time lag expected in
1805
+ the complex system. We recommend a rather large choice that includes
1806
+ peaks in the ``get_lagged_dependencies`` function. :math:`\\alpha`
1807
+ should not be seen as a significance test level in the
1808
+ condition-selection step since the iterative hypothesis tests do not
1809
+ allow for a precise assessment. :math:`\\alpha` rather takes the role
1810
+ of a regularization parameter in model-selection techniques. If a
1811
+ list of values is given or ``pc_alpha=None``, :math:`\\alpha` is
1812
+ optimized using model selection criteria implemented in the respective
1813
+ ``tigramite.independence_tests``.
1814
+
1815
+ Further optional parameters are discussed in [1]_.
1816
+
1817
+ Examples
1818
+ --------
1819
+ >>> import numpy
1820
+ >>> from tigramite.pcmci import PCMCI
1821
+ >>> from tigramite.independence_tests import ParCorr
1822
+ >>> import tigramite.data_processing as pp
1823
+ >>> from tigramite.toymodels import structural_causal_processes as toys
1824
+ >>> numpy.random.seed(7)
1825
+ >>> # Example process to play around with
1826
+ >>> # Each key refers to a variable and the incoming links are supplied
1827
+ >>> # as a list of format [((driver, -lag), coeff), ...]
1828
+ >>> links_coeffs = {0: [((0, -1), 0.8)],
1829
+ 1: [((1, -1), 0.8), ((0, -1), 0.5)],
1830
+ 2: [((2, -1), 0.8), ((1, -2), -0.6)]}
1831
+ >>> data, _ = toys.var_process(links_coeffs, T=1000)
1832
+ >>> # Data must be array of shape (time, variables)
1833
+ >>> print (data.shape)
1834
+ (1000, 3)
1835
+ >>> dataframe = pp.DataFrame(data)
1836
+ >>> cond_ind_test = ParCorr()
1837
+ >>> pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test)
1838
+ >>> results = pcmci.run_pcmci(tau_max=2, pc_alpha=None)
1839
+ >>> pcmci.print_significant_links(p_matrix=results['p_matrix'],
1840
+ val_matrix=results['val_matrix'],
1841
+ alpha_level=0.05)
1842
+ ## Significant parents at alpha = 0.05:
1843
+
1844
+ Variable 0 has 1 link(s):
1845
+ (0 -1): pval = 0.00000 | val = 0.588
1846
+
1847
+ Variable 1 has 2 link(s):
1848
+ (1 -1): pval = 0.00000 | val = 0.606
1849
+ (0 -1): pval = 0.00000 | val = 0.447
1850
+
1851
+ Variable 2 has 2 link(s):
1852
+ (2 -1): pval = 0.00000 | val = 0.618
1853
+ (1 -2): pval = 0.00000 | val = -0.499
1854
+
1855
+
1856
+ Parameters
1857
+ ----------
1858
+ selected_links : dict or None
1859
+ Deprecated, replaced by link_assumptions
1860
+ link_assumptions : dict
1861
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
1862
+ assumptions about links. This initializes the graph with entries
1863
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
1864
+ implies that a directed link from i to j at lag 0 must exist.
1865
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
1866
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
1867
+ may not exist, but if it exists, its orientation is '-->'. Link
1868
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
1869
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
1870
+ does not appear in the dictionary, it is assumed absent. That is,
1871
+ if link_assumptions is not None, then all links have to be specified
1872
+ or the links are assumed absent.
1873
+ tau_min : int, optional (default: 0)
1874
+ Minimum time lag to test. Note that zero-lags are undirected.
1875
+ tau_max : int, optional (default: 1)
1876
+ Maximum time lag. Must be larger or equal to tau_min.
1877
+ save_iterations : bool, optional (default: False)
1878
+ Whether to save iteration step results such as conditions used.
1879
+ pc_alpha : float, optional (default: 0.2)
1880
+ Significance level in PC1 algorithm.
1881
+ max_conds_dim : int, optional (default: None)
1882
+ Maximum number of conditions to test. If None is passed, this number
1883
+ is unrestricted.
1884
+ max_combinations : int, optional (default: 1)
1885
+ Maximum number of combinations of conditions of current cardinality
1886
+ to test in PC1 step.
1887
+ max_conds_py : int, optional (default: None)
1888
+ Maximum number of conditions of Y to use. If None is passed, this
1889
+ number is unrestricted.
1890
+ max_conds_px : int, optional (default: None)
1891
+ Maximum number of conditions of Z to use. If None is passed, this
1892
+ number is unrestricted.
1893
+ alpha_level : float, optional (default: 0.05)
1894
+ Significance level at which the p_matrix is thresholded to
1895
+ get graph.
1896
+ fdr_method : str, optional (default: 'fdr_bh')
1897
+ Correction method, currently implemented is Benjamini-Hochberg
1898
+ False Discovery Rate method.
1899
+
1900
+ Returns
1901
+ -------
1902
+ graph : array of shape [N, N, tau_max+1]
1903
+ Causal graph, see description above for interpretation.
1904
+ val_matrix : array of shape [N, N, tau_max+1]
1905
+ Estimated matrix of test statistic values.
1906
+ p_matrix : array of shape [N, N, tau_max+1]
1907
+ Estimated matrix of p-values, optionally adjusted if fdr_method is
1908
+ not 'none'.
1909
+ conf_matrix : array of shape [N, N, tau_max+1,2]
1910
+ Estimated matrix of confidence intervals of test statistic values.
1911
+ Only computed if set in cond_ind_test, where also the percentiles
1912
+ are set.
1913
+
1914
+ """
1915
+
1916
+ if selected_links is not None:
1917
+ raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
1918
+
1919
+
1920
+ # Get the parents from run_pc_stable
1921
+ all_parents = self.run_pc_stable(link_assumptions=link_assumptions,
1922
+ tau_min=tau_min,
1923
+ tau_max=tau_max,
1924
+ save_iterations=save_iterations,
1925
+ pc_alpha=pc_alpha,
1926
+ max_conds_dim=max_conds_dim,
1927
+ max_combinations=max_combinations)
1928
+
1929
+ # Get the results from run_mci, using the parents as the input
1930
+ results = self.run_mci(link_assumptions=link_assumptions,
1931
+ tau_min=tau_min,
1932
+ tau_max=tau_max,
1933
+ parents=all_parents,
1934
+ max_conds_py=max_conds_py,
1935
+ max_conds_px=max_conds_px,
1936
+ alpha_level=alpha_level,
1937
+ fdr_method=fdr_method)
1938
+
1939
+ # Store the parents in the pcmci member
1940
+ self.all_parents = all_parents
1941
+
1942
+ # Print the information
1943
+ # if self.verbosity > 0:
1944
+ # self.print_results(results)
1945
+ # Return the dictionary
1946
+ self.results = results
1947
+ return results
1948
+
1949
+ def run_pcmciplus(self,
1950
+ selected_links=None,
1951
+ link_assumptions=None,
1952
+ tau_min=0,
1953
+ tau_max=1,
1954
+ pc_alpha=0.01,
1955
+ contemp_collider_rule='majority',
1956
+ conflict_resolution=True,
1957
+ reset_lagged_links=False,
1958
+ max_conds_dim=None,
1959
+ max_combinations=1,
1960
+ max_conds_py=None,
1961
+ max_conds_px=None,
1962
+ max_conds_px_lagged=None,
1963
+ fdr_method='none',
1964
+ ):
1965
+ r"""Runs PCMCIplus time-lagged and contemporaneous causal discovery for
1966
+ time series.
1967
+
1968
+ Method described in [5]:
1969
+ http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf
1970
+
1971
+ [5] J. Runge, Discovering contemporaneous and lagged causal relations
1972
+ in autocorrelated nonlinear time series datasets
1973
+ http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf
1974
+
1975
+ Notes
1976
+ -----
1977
+
1978
+ The PCMCIplus causal discovery method is described in [5], where
1979
+ also analytical and numerical results are presented. In contrast to
1980
+ PCMCI, PCMCIplus can identify the full, lagged and contemporaneous,
1981
+ causal graph (up to the Markov equivalence class for contemporaneous
1982
+ links) under the standard assumptions of Causal Sufficiency,
1983
+ Faithfulness and the Markov condition.
1984
+
1985
+ PCMCIplus estimates time-lagged and contemporaneous causal links by a
1986
+ four-step procedure:
1987
+
1988
+ 1. Condition-selection (same as for PCMCI): For each variable
1989
+ :math:`j`, estimate a *superset* of lagged parents :math:`\widehat{
1990
+ \mathcal{B}}_t^-( X^j_t)` with the iterative PC1 algorithm,
1991
+ implemented as ``run_pc_stable``. The condition-selection step
1992
+ reduces the dimensionality and avoids conditioning on irrelevant
1993
+ variables.
1994
+
1995
+ 2. PC skeleton phase with contemporaneous conditions and *Momentary
1996
+ conditional independence* (MCI) tests: Iterate through subsets
1997
+ :math:`\\mathcal{S}` of contemporaneous adjacencies and conduct MCI
1998
+ conditional independence tests:
1999
+
2000
+ .. math:: X^i_{t-\\tau} ~\\perp~ X^j_{t} ~|~ \\mathcal{S},
2001
+ \\widehat{\\mathcal{B}}_t^-(X^j_t),
2002
+ \\widehat{\\mathcal{B}}_{t-\\tau}^-(X^i_{t-{\\tau}})
2003
+
2004
+ here implemented as ``run_pcalg``. This step estimates the p-values and
2005
+ test statistic values for all lagged and contemporaneous adjacencies
2006
+ accounting for common drivers, indirect links, and autocorrelation.
2007
+
2008
+ 3. PC collider orientation phase: Orient contemporaneous collider
2009
+ motifs based on unshielded triples. Optionally apply conservative or
2010
+ majority rule (also based on MCI tests).
2011
+
2012
+ 4. PC rule orientation phase: Orient remaining contemporaneous
2013
+ links based on PC rules.
2014
+
2015
+ In contrast to PCMCI, the relevant output of PCMCIplus is the
2016
+ array ``graph``. Its string entries are interpreted as follows:
2017
+
2018
+ * ``graph[i,j,tau]=-->`` for :math:`\\tau>0` denotes a directed, lagged
2019
+ causal link from :math:`i` to :math:`j` at lag :math:`\\tau`
2020
+
2021
+ * ``graph[i,j,0]=-->`` (and ``graph[j,i,0]=<--``) denotes a directed,
2022
+ contemporaneous causal link from :math:`i` to :math:`j`
2023
+
2024
+ * ``graph[i,j,0]=o-o`` (and ``graph[j,i,0]=o-o``) denotes an unoriented,
2025
+ contemporaneous adjacency between :math:`i` and :math:`j` indicating
2026
+ that the collider and orientation rules could not be applied (Markov
2027
+ equivalence)
2028
+
2029
+ * ``graph[i,j,0]=x-x`` and (``graph[j,i,0]=x-x``) denotes a conflicting,
2030
+ contemporaneous adjacency between :math:`i` and :math:`j` indicating
2031
+ that the directionality is undecided due to conflicting orientation
2032
+ rules
2033
+
2034
+ Importantly, ``p_matrix`` and ``val_matrix`` for PCMCIplus quantify
2035
+ the uncertainty and strength, respectively, only for the
2036
+ adjacencies, but not for the directionality of contemporaneous links.
2037
+ Note that lagged links are always oriented due to time order.
2038
+
2039
+ PCMCIplus can be flexibly combined with any kind of conditional
2040
+ independence test statistic adapted to the kind of data (continuous
2041
+ or discrete) and its assumed dependency types. These are available in
2042
+ ``tigramite.independence_tests``.
2043
+
2044
+ The main free parameters of PCMCIplus (in addition to free parameters of
2045
+ the conditional independence tests) are the maximum time delay
2046
+ :math:`\\tau_{\\max}` (``tau_max``) and the significance threshold
2047
+ :math:`\\alpha` ( ``pc_alpha``).
2048
+
2049
+ If a list or None is passed for ``pc_alpha``, the significance level is
2050
+ optimized for every graph across the given ``pc_alpha`` values using the
2051
+ score computed in ``cond_ind_test.get_model_selection_criterion()``.
2052
+ Since PCMCIplus outputs not a DAG, but an equivalence class of DAGs,
2053
+ first one member of this class is computed and then the score is
2054
+ computed as the average over all models fits for each variable in ``[0,
2055
+ ..., N]`` for that member. The score is the same for all members of the
2056
+ class.
2057
+
2058
+ The maximum time delay depends on the application and should be chosen
2059
+ according to the maximum causal time lag expected in the complex system.
2060
+ We recommend a rather large choice that includes peaks in the
2061
+ ``get_lagged_dependencies`` function. Another important parameter is
2062
+ ``contemp_collider_rule``. Only if set to ``majority`` or
2063
+ ``conservative'' and together with ``conflict_resolution=True``,
2064
+ PCMCIplus is fully *order independent* meaning that the order of the N
2065
+ variables in the dataframe does not matter. Last, the default option
2066
+ ``reset_lagged_links=False`` restricts the detection of lagged causal
2067
+ links in Step 2 to the significant adjacencies found in Step 1, given by
2068
+ :math:`\\widehat{ \\mathcal{B}}_t^-( X^j_t)`. For
2069
+ ``reset_lagged_links=True``, *all* lagged links are considered again,
2070
+ which improves detection power for lagged links, but also leads to
2071
+ larger runtimes.
2072
+
2073
+ Further optional parameters are discussed in [5].
2074
+
2075
+ Parameters
2076
+ ----------
2077
+ selected_links : dict or None
2078
+ Deprecated, replaced by link_assumptions
2079
+ link_assumptions : dict
2080
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
2081
+ assumptions about links. This initializes the graph with entries
2082
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
2083
+ implies that a directed link from i to j at lag 0 must exist.
2084
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
2085
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
2086
+ may not exist, but if it exists, its orientation is '-->'. Link
2087
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
2088
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
2089
+ does not appear in the dictionary, it is assumed absent. That is,
2090
+ if link_assumptions is not None, then all links have to be specified
2091
+ or the links are assumed absent.
2092
+ tau_min : int, optional (default: 0)
2093
+ Minimum time lag to test.
2094
+ tau_max : int, optional (default: 1)
2095
+ Maximum time lag. Must be larger or equal to tau_min.
2096
+ pc_alpha : float or list of floats, default: 0.01
2097
+ Significance level in algorithm. If a list or None is passed, the
2098
+ pc_alpha level is optimized for every graph across the given
2099
+ pc_alpha values ([0.001, 0.005, 0.01, 0.025, 0.05] for None) using
2100
+ the score computed in cond_ind_test.get_model_selection_criterion().
2101
+ contemp_collider_rule : {'majority', 'conservative', 'none'}
2102
+ Rule for collider phase to use. See the paper for details. Only
2103
+ 'majority' and 'conservative' lead to an order-independent
2104
+ algorithm.
2105
+ conflict_resolution : bool, optional (default: True)
2106
+ Whether to mark conflicts in orientation rules. Only for True
2107
+ this leads to an order-independent algorithm.
2108
+ reset_lagged_links : bool, optional (default: False)
2109
+ Restricts the detection of lagged causal links in Step 2 to the
2110
+ significant adjacencies found in the PC1 algorithm in Step 1. For
2111
+ True, *all* lagged links are considered again, which improves
2112
+ detection power for lagged links, but also leads to larger
2113
+ runtimes.
2114
+ max_conds_dim : int, optional (default: None)
2115
+ Maximum number of conditions to test. If None is passed, this number
2116
+ is unrestricted.
2117
+ max_combinations : int, optional (default: 1)
2118
+ Maximum number of combinations of conditions of current cardinality
2119
+ to test in PC1 step.
2120
+ max_conds_py : int, optional (default: None)
2121
+ Maximum number of lagged conditions of Y to use in MCI tests. If
2122
+ None is passed, this number is unrestricted.
2123
+ max_conds_px : int, optional (default: None)
2124
+ Maximum number of lagged conditions of X to use in MCI tests. If
2125
+ None is passed, this number is unrestricted.
2126
+ max_conds_px_lagged : int, optional (default: None)
2127
+ Maximum number of lagged conditions of X when X is lagged in MCI
2128
+ tests. If None is passed, this number is equal to max_conds_px.
2129
+ fdr_method : str, optional (default: 'none')
2130
+ Correction method, default is Benjamini-Hochberg False Discovery
2131
+ Rate method.
2132
+
2133
+ Returns
2134
+ -------
2135
+ graph : array of shape [N, N, tau_max+1]
2136
+ Resulting causal graph, see description above for interpretation.
2137
+ val_matrix : array of shape [N, N, tau_max+1]
2138
+ Estimated matrix of test statistic values regarding adjacencies.
2139
+ p_matrix : array of shape [N, N, tau_max+1]
2140
+ Estimated matrix of p-values regarding adjacencies.
2141
+ sepsets : dictionary
2142
+ Separating sets. See paper for details.
2143
+ ambiguous_triples : list
2144
+ List of ambiguous triples, only relevant for 'majority' and
2145
+ 'conservative' rules, see paper for details.
2146
+ """
2147
+
2148
+ if selected_links is not None:
2149
+ raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
2150
+
2151
+ # Check if pc_alpha is chosen to optimze over a list
2152
+ if pc_alpha is None or isinstance(pc_alpha, (list, tuple, np.ndarray)):
2153
+ # Call optimizer wrapper around run_pcmciplus()
2154
+ return self._optimize_pcmciplus_alpha(
2155
+ link_assumptions=link_assumptions,
2156
+ tau_min=tau_min,
2157
+ tau_max=tau_max,
2158
+ pc_alpha=pc_alpha,
2159
+ contemp_collider_rule=contemp_collider_rule,
2160
+ conflict_resolution=conflict_resolution,
2161
+ reset_lagged_links=reset_lagged_links,
2162
+ max_conds_dim=max_conds_dim,
2163
+ max_combinations=max_combinations,
2164
+ max_conds_py=max_conds_py,
2165
+ max_conds_px=max_conds_px,
2166
+ max_conds_px_lagged=max_conds_px_lagged,
2167
+ fdr_method=fdr_method)
2168
+
2169
+ elif pc_alpha < 0. or pc_alpha > 1:
2170
+ raise ValueError("Choose 0 <= pc_alpha <= 1")
2171
+
2172
+ # Check the limits on tau
2173
+ self._check_tau_limits(tau_min, tau_max)
2174
+ # Set the link assumption
2175
+ _int_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max)
2176
+
2177
+
2178
+ #
2179
+ # Phase 1: Get a superset of lagged parents from run_pc_stable
2180
+ #
2181
+ lagged_parents = self.run_pc_stable(link_assumptions=link_assumptions,
2182
+ tau_min=tau_min,
2183
+ tau_max=tau_max,
2184
+ pc_alpha=pc_alpha,
2185
+ max_conds_dim=max_conds_dim,
2186
+ max_combinations=max_combinations)
2187
+ # Extract p- and val-matrix
2188
+ p_matrix = self.p_matrix
2189
+ val_matrix = self.val_matrix
2190
+
2191
+ #
2192
+ # Phase 2: PC algorithm with contemp. conditions and MCI tests
2193
+ #
2194
+ if self.verbosity > 0:
2195
+ print("\n##\n## Step 2: PC algorithm with contemp. conditions "
2196
+ "and MCI tests\n##"
2197
+ "\n\nParameters:")
2198
+ if link_assumptions is not None:
2199
+ print("\nlink_assumptions = %s" % str(_int_link_assumptions))
2200
+ print("\nindependence test = %s" % self.cond_ind_test.measure
2201
+ + "\ntau_min = %d" % tau_min
2202
+ + "\ntau_max = %d" % tau_max
2203
+ + "\npc_alpha = %s" % pc_alpha
2204
+ + "\ncontemp_collider_rule = %s" % contemp_collider_rule
2205
+ + "\nconflict_resolution = %s" % conflict_resolution
2206
+ + "\nreset_lagged_links = %s" % reset_lagged_links
2207
+ + "\nmax_conds_dim = %s" % max_conds_dim
2208
+ + "\nmax_conds_py = %s" % max_conds_py
2209
+ + "\nmax_conds_px = %s" % max_conds_px
2210
+ + "\nmax_conds_px_lagged = %s" % max_conds_px_lagged
2211
+ + "\nfdr_method = %s" % fdr_method
2212
+ )
2213
+
2214
+ skeleton_results = self._pcmciplus_mci_skeleton_phase(
2215
+ lagged_parents=lagged_parents,
2216
+ link_assumptions=_int_link_assumptions,
2217
+ pc_alpha=pc_alpha,
2218
+ tau_min=tau_min,
2219
+ tau_max=tau_max,
2220
+ max_conds_dim=max_conds_dim,
2221
+ max_combinations=None, # Otherwise MCI step is not consistent
2222
+ max_conds_py=max_conds_py,
2223
+ max_conds_px=max_conds_px,
2224
+ max_conds_px_lagged=max_conds_px_lagged,
2225
+ reset_lagged_links=reset_lagged_links,
2226
+ fdr_method=fdr_method,
2227
+ p_matrix=p_matrix,
2228
+ val_matrix=val_matrix,
2229
+ )
2230
+
2231
+ #
2232
+ # Phase 3: Collider orientations (with MCI tests for default majority collider rule)
2233
+ #
2234
+ colliders_step_results = self._pcmciplus_collider_phase(
2235
+ skeleton_graph=skeleton_results['graph'],
2236
+ sepsets=skeleton_results['sepsets'],
2237
+ lagged_parents=lagged_parents,
2238
+ pc_alpha=pc_alpha,
2239
+ tau_min=tau_min,
2240
+ tau_max=tau_max,
2241
+ max_conds_py=max_conds_py,
2242
+ max_conds_px=max_conds_px,
2243
+ max_conds_px_lagged=max_conds_px_lagged,
2244
+ conflict_resolution=conflict_resolution,
2245
+ contemp_collider_rule=contemp_collider_rule)
2246
+
2247
+ #
2248
+ # Phase 4: Meek rule orientations
2249
+ #
2250
+ final_graph = self._pcmciplus_rule_orientation_phase(
2251
+ collider_graph=colliders_step_results['graph'],
2252
+ ambiguous_triples=colliders_step_results['ambiguous_triples'],
2253
+ conflict_resolution=conflict_resolution)
2254
+
2255
+ # Store the parents in the pcmci member
2256
+ self.all_lagged_parents = lagged_parents
2257
+
2258
+ return_dict = {
2259
+ 'graph': final_graph,
2260
+ 'p_matrix': skeleton_results['p_matrix'],
2261
+ 'val_matrix': skeleton_results['val_matrix'],
2262
+ 'sepsets': colliders_step_results['sepsets'],
2263
+ 'ambiguous_triples': colliders_step_results['ambiguous_triples'],
2264
+ }
2265
+
2266
+ # No confidence interval estimation here
2267
+ return_dict['conf_matrix'] = None
2268
+
2269
+ # Print the results
2270
+ if self.verbosity > 0:
2271
+ self.print_results(return_dict, alpha_level=pc_alpha)
2272
+
2273
+ # Return the dictionary
2274
+ self.results = return_dict
2275
+
2276
+ return return_dict
2277
+
2278
+ def _pcmciplus_mci_skeleton_phase(self,
2279
+ lagged_parents,
2280
+ link_assumptions,
2281
+ pc_alpha,
2282
+ tau_min,
2283
+ tau_max,
2284
+ max_conds_dim,
2285
+ max_combinations,
2286
+ max_conds_py,
2287
+ max_conds_px,
2288
+ max_conds_px_lagged,
2289
+ reset_lagged_links,
2290
+ fdr_method,
2291
+ p_matrix,
2292
+ val_matrix,
2293
+ ):
2294
+ """MCI Skeleton phase."""
2295
+
2296
+ # Set the maximum condition dimension for Y and X
2297
+ max_conds_py = self._set_max_condition_dim(max_conds_py,
2298
+ tau_min, tau_max)
2299
+ max_conds_px = self._set_max_condition_dim(max_conds_px,
2300
+ tau_min, tau_max)
2301
+
2302
+ if reset_lagged_links:
2303
+ # Run PCalg on full graph, ignoring that some lagged links
2304
+ # were determined as non-significant in PC1 step
2305
+ links_for_pc = deepcopy(link_assumptions)
2306
+ else:
2307
+ # Run PCalg only on lagged parents found with PC1
2308
+ # plus all contemporaneous links
2309
+ links_for_pc = {} #deepcopy(lagged_parents)
2310
+ for j in range(self.N):
2311
+ links_for_pc[j] = {}
2312
+ for parent in lagged_parents[j]:
2313
+ if link_assumptions[j][parent] in ['-?>', '-->']:
2314
+ links_for_pc[j][parent] = link_assumptions[j][parent]
2315
+
2316
+ # Add contemporaneous links
2317
+ for link in link_assumptions[j]:
2318
+ i, tau = link
2319
+ link_type = link_assumptions[j][link]
2320
+ if abs(tau) == 0:
2321
+ links_for_pc[j][(i, 0)] = link_type
2322
+
2323
+
2324
+ if max_conds_dim is None:
2325
+ max_conds_dim = self.N
2326
+
2327
+ if max_combinations is None:
2328
+ max_combinations = np.inf
2329
+
2330
+ initial_graph = self._dict_to_graph(links_for_pc, tau_max=tau_max)
2331
+
2332
+ skeleton_results = self._pcalg_skeleton(
2333
+ initial_graph=initial_graph,
2334
+ lagged_parents=lagged_parents,
2335
+ mode='contemp_conds',
2336
+ pc_alpha=pc_alpha,
2337
+ tau_min=tau_min,
2338
+ tau_max=tau_max,
2339
+ max_conds_dim=max_conds_dim,
2340
+ max_combinations=max_combinations,
2341
+ max_conds_py=max_conds_py,
2342
+ max_conds_px=max_conds_px,
2343
+ max_conds_px_lagged=max_conds_px_lagged,
2344
+ )
2345
+
2346
+ # Symmetrize p_matrix and val_matrix coming from skeleton
2347
+ symmetrized_results = self.symmetrize_p_and_val_matrix(
2348
+ p_matrix=skeleton_results['p_matrix'],
2349
+ val_matrix=skeleton_results['val_matrix'],
2350
+ link_assumptions=links_for_pc,
2351
+ conf_matrix=None)
2352
+
2353
+ # Update p_matrix and val_matrix with values from skeleton phase
2354
+ # Contemporaneous entries (not filled in run_pc_stable lagged phase)
2355
+ p_matrix[:, :, 0] = symmetrized_results['p_matrix'][:, :, 0]
2356
+ val_matrix[:, :, 0] = symmetrized_results['val_matrix'][:, :, 0]
2357
+
2358
+ # Update all entries computed in the MCI step
2359
+ # (these are in links_for_pc); values for entries
2360
+ # that were removed in the lagged-condition phase are kept from before
2361
+ for j in range(self.N):
2362
+ for link in links_for_pc[j]:
2363
+ i, tau = link
2364
+ if links_for_pc[j][link] not in ['<--', '<?-']:
2365
+ p_matrix[i, j, abs(tau)] = symmetrized_results['p_matrix'][i, j, abs(tau)]
2366
+ val_matrix[i, j, abs(tau)] = symmetrized_results['val_matrix'][i, j,
2367
+ abs(tau)]
2368
+
2369
+ # Optionally correct the p_matrix
2370
+ if fdr_method != 'none':
2371
+ p_matrix = self.get_corrected_pvalues(p_matrix=p_matrix, tau_min=tau_min,
2372
+ tau_max=tau_max,
2373
+ link_assumptions=link_assumptions,
2374
+ fdr_method=fdr_method)
2375
+
2376
+ # Update matrices
2377
+ skeleton_results['p_matrix'] = p_matrix
2378
+ skeleton_results['val_matrix'] = val_matrix
2379
+
2380
+ return skeleton_results
2381
+
2382
+
2383
+ def _pcmciplus_collider_phase(self, skeleton_graph, sepsets, lagged_parents,
2384
+ pc_alpha, tau_min, tau_max, max_conds_py, max_conds_px, max_conds_px_lagged,
2385
+ conflict_resolution, contemp_collider_rule):
2386
+ """MCI collider phase."""
2387
+
2388
+ # Set the maximum condition dimension for Y and X
2389
+ max_conds_py = self._set_max_condition_dim(max_conds_py,
2390
+ tau_min, tau_max)
2391
+ max_conds_px = self._set_max_condition_dim(max_conds_px,
2392
+ tau_min, tau_max)
2393
+
2394
+ # Now change assumed links marks
2395
+ skeleton_graph[skeleton_graph=='o?o'] = 'o-o'
2396
+ skeleton_graph[skeleton_graph=='-?>'] = '-->'
2397
+ skeleton_graph[skeleton_graph=='<?-'] = '<--'
2398
+
2399
+ colliders_step_results = self._pcalg_colliders(
2400
+ graph=skeleton_graph,
2401
+ sepsets=sepsets,
2402
+ lagged_parents=lagged_parents,
2403
+ mode='contemp_conds',
2404
+ pc_alpha=pc_alpha,
2405
+ tau_max=tau_max,
2406
+ max_conds_py=max_conds_py,
2407
+ max_conds_px=max_conds_px,
2408
+ max_conds_px_lagged=max_conds_px_lagged,
2409
+ conflict_resolution=conflict_resolution,
2410
+ contemp_collider_rule=contemp_collider_rule,
2411
+ )
2412
+
2413
+ return colliders_step_results
2414
+
2415
+ def _pcmciplus_rule_orientation_phase(self, collider_graph,
2416
+ ambiguous_triples, conflict_resolution):
2417
+ """MCI rule orientation phase."""
2418
+
2419
+ final_graph = self._pcalg_rules_timeseries(
2420
+ graph=collider_graph,
2421
+ ambiguous_triples=ambiguous_triples,
2422
+ conflict_resolution=conflict_resolution,
2423
+ )
2424
+
2425
+ return final_graph
2426
+
2427
+
2428
+ def run_pcalg(self,
2429
+ selected_links=None,
2430
+ link_assumptions=None,
2431
+ pc_alpha=0.01,
2432
+ tau_min=0,
2433
+ tau_max=1,
2434
+ max_conds_dim=None,
2435
+ max_combinations=None,
2436
+ lagged_parents=None,
2437
+ max_conds_py=None,
2438
+ max_conds_px=None,
2439
+ max_conds_px_lagged=None,
2440
+ mode='standard',
2441
+ contemp_collider_rule='majority',
2442
+ conflict_resolution=True):
2443
+
2444
+ """Runs PC algorithm for time-lagged and contemporaneous causal
2445
+ discovery for time series.
2446
+
2447
+ For ``mode='contemp_conds'`` this implements Steps 2-4 of the
2448
+ PCMCIplus method described in [5]. For ``mode='standard'`` this
2449
+ implements the standard PC algorithm adapted to time series.
2450
+
2451
+ [5] J. Runge, Discovering contemporaneous and lagged causal relations
2452
+ in autocorrelated nonlinear time series datasets
2453
+ http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf
2454
+
2455
+ Parameters
2456
+ ----------
2457
+ selected_links : dict or None
2458
+ Deprecated, replaced by link_assumptions
2459
+ link_assumptions : dict
2460
+ Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
2461
+ assumptions about links. This initializes the graph with entries
2462
+ graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
2463
+ implies that a directed link from i to j at lag 0 must exist.
2464
+ Valid link types are 'o-o', '-->', '<--'. In addition, the middle
2465
+ mark can be '?' instead of '-'. Then '-?>' implies that this link
2466
+ may not exist, but if it exists, its orientation is '-->'. Link
2467
+ assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
2468
+ requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
2469
+ does not appear in the dictionary, it is assumed absent. That is,
2470
+ if link_assumptions is not None, then all links have to be specified
2471
+ or the links are assumed absent.
2472
+ lagged_parents : dictionary
2473
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
2474
+ additional conditions for each CI test. As part of PCMCIplus
2475
+ these are the superset of lagged parents estimated with the PC1
2476
+ algorithm.
2477
+ mode : {'standard', 'contemp_conds'}
2478
+ For ``mode='contemp_conds'`` this implements Steps 2-4 of the
2479
+ PCMCIplus method. For ``mode='standard'`` this implements the
2480
+ standard PC algorithm adapted to time series.
2481
+ tau_min : int, optional (default: 0)
2482
+ Minimum time lag to test.
2483
+ tau_max : int, optional (default: 1)
2484
+ Maximum time lag. Must be larger or equal to tau_min.
2485
+ pc_alpha : float, optional (default: 0.01)
2486
+ Significance level.
2487
+ contemp_collider_rule : {'majority', 'conservative', 'none'}
2488
+ Rule for collider phase to use. See the paper for details. Only
2489
+ 'majority' and 'conservative' lead to an order-independent
2490
+ algorithm.
2491
+ conflict_resolution : bool, optional (default: True)
2492
+ Whether to mark conflicts in orientation rules. Only for True
2493
+ this leads to an order-independent algorithm.
2494
+ max_conds_dim : int, optional (default: None)
2495
+ Maximum number of conditions to test. If None is passed, this number
2496
+ is unrestricted.
2497
+ max_combinations : int
2498
+ Maximum number of combinations of conditions of current cardinality
2499
+ to test. Must be infinite (default for max_combinations=1) for consistency.
2500
+ max_conds_py : int, optional (default: None)
2501
+ Maximum number of lagged conditions of Y to use in MCI tests. If
2502
+ None is passed, this number is unrestricted.
2503
+ max_conds_px : int, optional (default: None)
2504
+ Maximum number of lagged conditions of X to use in MCI tests. If
2505
+ None is passed, this number is unrestricted.
2506
+ max_conds_px_lagged : int, optional (default: None)
2507
+ Maximum number of lagged conditions of X when X is lagged in MCI
2508
+ tests. If None is passed, this number is equal to max_conds_px.
2509
+
2510
+ Returns
2511
+ -------
2512
+ graph : array of shape [N, N, tau_max+1]
2513
+ Resulting causal graph, see description above for interpretation.
2514
+ val_matrix : array of shape [N, N, tau_max+1]
2515
+ Estimated matrix of test statistic values regarding adjacencies.
2516
+ p_matrix : array of shape [N, N, tau_max+1]
2517
+ Estimated matrix of p-values regarding adjacencies.
2518
+ sepsets : dictionary
2519
+ Separating sets. See paper for details.
2520
+ ambiguous_triples : list
2521
+ List of ambiguous triples, only relevant for 'majority' and
2522
+ 'conservative' rules, see paper for details.
2523
+ """
2524
+ # TODO: save_iterations
2525
+
2526
+ if selected_links is not None:
2527
+ raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
2528
+
2529
+ # Sanity checks
2530
+ if pc_alpha is None:
2531
+ raise ValueError("pc_alpha=None not supported in PC algorithm, "
2532
+ "choose 0 < pc_alpha < 1 (e.g., 0.01)")
2533
+
2534
+ if mode not in ['contemp_conds', 'standard']:
2535
+ raise ValueError("mode must be either 'contemp_conds' or "
2536
+ "'standard'")
2537
+
2538
+ # Check the limits on tau
2539
+ self._check_tau_limits(tau_min, tau_max)
2540
+ # Set the selected links
2541
+ # _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max)
2542
+ _int_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max)
2543
+
2544
+ if max_conds_dim is None:
2545
+ if mode == 'standard':
2546
+ max_conds_dim = self._set_max_condition_dim(max_conds_dim,
2547
+ tau_min, tau_max)
2548
+ elif mode == 'contemp_conds':
2549
+ max_conds_dim = self.N
2550
+
2551
+ if max_combinations is None:
2552
+ max_combinations = np.inf
2553
+
2554
+ initial_graph = self._dict_to_graph(_int_link_assumptions, tau_max=tau_max)
2555
+
2556
+ skeleton_results = self._pcalg_skeleton(
2557
+ initial_graph=initial_graph,
2558
+ lagged_parents=lagged_parents,
2559
+ mode=mode,
2560
+ pc_alpha=pc_alpha,
2561
+ tau_min=tau_min,
2562
+ tau_max=tau_max,
2563
+ max_conds_dim=max_conds_dim,
2564
+ max_combinations=max_combinations,
2565
+ max_conds_py=max_conds_py,
2566
+ max_conds_px=max_conds_px,
2567
+ max_conds_px_lagged=max_conds_px_lagged,
2568
+ )
2569
+
2570
+ skeleton_graph = skeleton_results['graph']
2571
+ sepsets = skeleton_results['sepsets']
2572
+
2573
+ # Now change assumed links marks
2574
+ skeleton_graph[skeleton_graph=='o?o'] = 'o-o'
2575
+ skeleton_graph[skeleton_graph=='-?>'] = '-->'
2576
+ skeleton_graph[skeleton_graph=='<?-'] = '<--'
2577
+
2578
+ colliders_step_results = self._pcalg_colliders(
2579
+ graph=skeleton_graph,
2580
+ sepsets=sepsets,
2581
+ lagged_parents=lagged_parents,
2582
+ mode=mode,
2583
+ pc_alpha=pc_alpha,
2584
+ tau_max=tau_max,
2585
+ max_conds_py=max_conds_py,
2586
+ max_conds_px=max_conds_px,
2587
+ max_conds_px_lagged=max_conds_px_lagged,
2588
+ conflict_resolution=conflict_resolution,
2589
+ contemp_collider_rule=contemp_collider_rule,
2590
+ )
2591
+
2592
+ collider_graph = colliders_step_results['graph']
2593
+ ambiguous_triples = colliders_step_results['ambiguous_triples']
2594
+
2595
+ final_graph = self._pcalg_rules_timeseries(
2596
+ graph=collider_graph,
2597
+ ambiguous_triples=ambiguous_triples,
2598
+ conflict_resolution=conflict_resolution,
2599
+ )
2600
+
2601
+ # Symmetrize p_matrix and val_matrix
2602
+ symmetrized_results = self.symmetrize_p_and_val_matrix(
2603
+ p_matrix=skeleton_results['p_matrix'],
2604
+ val_matrix=skeleton_results['val_matrix'],
2605
+ link_assumptions=_int_link_assumptions,
2606
+ conf_matrix=None)
2607
+
2608
+ # Convert numerical graph matrix to string
2609
+ graph_str = final_graph # self.convert_to_string_graph(final_graph)
2610
+
2611
+ pc_results = {
2612
+ 'graph': graph_str,
2613
+ 'p_matrix': symmetrized_results['p_matrix'],
2614
+ 'val_matrix': symmetrized_results['val_matrix'],
2615
+ 'sepsets': colliders_step_results['sepsets'],
2616
+ 'ambiguous_triples': colliders_step_results['ambiguous_triples'],
2617
+ }
2618
+
2619
+ if self.verbosity > 1:
2620
+ print("\n-----------------------------")
2621
+ print("PCMCIplus algorithm finished.")
2622
+ print("-----------------------------")
2623
+
2624
+ self.pc_results = pc_results
2625
+ return pc_results
2626
+
2627
+ def run_pcalg_non_timeseries_data(self, pc_alpha=0.01,
2628
+ max_conds_dim=None, max_combinations=None,
2629
+ contemp_collider_rule='majority',
2630
+ conflict_resolution=True):
2631
+
2632
+ """Runs PC algorithm for non-time series data.
2633
+
2634
+ Simply calls run_pcalg with tau_min = tau_max = 0.
2635
+ Removes lags from output dictionaries.
2636
+
2637
+ Parameters
2638
+ ----------
2639
+ pc_alpha : float, optional (default: 0.01)
2640
+ Significance level.
2641
+ contemp_collider_rule : {'majority', 'conservative', 'none'}
2642
+ Rule for collider phase to use. See the paper for details. Only
2643
+ 'majority' and 'conservative' lead to an order-independent
2644
+ algorithm.
2645
+ conflict_resolution : bool, optional (default: True)
2646
+ Whether to mark conflicts in orientation rules. Only for True
2647
+ this leads to an order-independent algorithm.
2648
+ max_conds_dim : int, optional (default: None)
2649
+ Maximum number of conditions to test. If None is passed, this number
2650
+ is unrestricted.
2651
+ max_combinations : int
2652
+ Maximum number of combinations of conditions of current cardinality
2653
+ to test. Must be infinite (default for max_combinations=1) for consistency.
2654
+
2655
+ Returns
2656
+ -------
2657
+ graph : array of shape [N, N, 1]
2658
+ Resulting causal graph, see description above for interpretation.
2659
+ val_matrix : array of shape [N, N, 1]
2660
+ Estimated matrix of test statistic values regarding adjacencies.
2661
+ p_matrix : array of shape [N, N, 1]
2662
+ Estimated matrix of p-values regarding adjacencies.
2663
+ sepsets : dictionary
2664
+ Separating sets. See paper for details.
2665
+ ambiguous_triples : list
2666
+ List of ambiguous triples, only relevant for 'majority' and
2667
+ 'conservative' rules, see paper for details.
2668
+ """
2669
+
2670
+ results = self.run_pcalg(pc_alpha=pc_alpha, tau_min=0, tau_max=0,
2671
+ max_conds_dim=max_conds_dim, max_combinations=max_combinations,
2672
+ mode='standard', contemp_collider_rule=contemp_collider_rule,
2673
+ conflict_resolution=conflict_resolution)
2674
+
2675
+ # Remove tau-dimension
2676
+ old_sepsets = results['sepsets'].copy()
2677
+ results['sepsets'] = {}
2678
+ for old_sepset in old_sepsets:
2679
+ new_sepset = (old_sepset[0][0], old_sepset[1])
2680
+ conds = [cond[0] for cond in old_sepsets[old_sepset]]
2681
+
2682
+ results['sepsets'][new_sepset] = conds
2683
+
2684
+ ambiguous_triples = results['ambiguous_triples'].copy()
2685
+ results['ambiguous_triples'] = []
2686
+ for triple in ambiguous_triples:
2687
+ new_triple = (triple[0][0], triple[1], triple[2])
2688
+
2689
+ results['ambiguous_triples'].append(new_triple)
2690
+
2691
+ self.pc_results = results
2692
+ return results
2693
+
2694
+
2695
+ def _run_pcalg_test(self, graph, i, abstau, j, S, lagged_parents, max_conds_py,
2696
+ max_conds_px, max_conds_px_lagged, tau_max, alpha_or_thres=None):
2697
+ """MCI conditional independence tests within PCMCIplus or PC algorithm.
2698
+
2699
+ Parameters
2700
+ ----------
2701
+ graph : array
2702
+ ...
2703
+ i : int
2704
+ Variable index.
2705
+ abstau : int
2706
+ Time lag (absolute value).
2707
+ j : int
2708
+ Variable index.
2709
+ S : list
2710
+ List of contemporaneous conditions.
2711
+ lagged_parents : dictionary of lists
2712
+ Dictionary of lagged parents for each node.
2713
+ max_conds_py : int
2714
+ Max number of lagged parents for node j.
2715
+ max_conds_px : int
2716
+ Max number of lagged parents for lagged node i.
2717
+ max_conds_px_lagged : int
2718
+ Maximum number of lagged conditions of X when X is lagged in MCI
2719
+ tests. If None is passed, this number is equal to max_conds_px.
2720
+ tau_max : int
2721
+ Maximum time lag.
2722
+ alpha_or_thres : float
2723
+ Significance level (if significance='analytic' or 'shuffle_test') or
2724
+ threshold (if significance='fixed_thres'). If given, run_test returns
2725
+ the test decision dependent=True/False.
2726
+
2727
+ Returns
2728
+ -------
2729
+ val, pval, Z, [dependent] : Tuple of floats, list, and bool
2730
+ The test statistic value and the p-value and list of conditions. If alpha_or_thres is
2731
+ given, run_test also returns the test decision dependent=True/False.
2732
+ """
2733
+
2734
+ # Perform independence test adding lagged parents
2735
+ if lagged_parents is not None:
2736
+ conds_y = lagged_parents[j][:max_conds_py]
2737
+ # Get the conditions for node i
2738
+ if abstau == 0:
2739
+ conds_x = lagged_parents[i][:max_conds_px]
2740
+ else:
2741
+ if max_conds_px_lagged is None:
2742
+ conds_x = lagged_parents[i][:max_conds_px]
2743
+ else:
2744
+ conds_x = lagged_parents[i][:max_conds_px_lagged]
2745
+
2746
+ else:
2747
+ conds_y = conds_x = []
2748
+ # Shift the conditions for X by tau
2749
+ conds_x_lagged = [(k, -abstau + k_tau) for k, k_tau in conds_x]
2750
+
2751
+ Z = [node for node in S]
2752
+ Z += [node for node in conds_y if
2753
+ node != (i, -abstau) and node not in Z]
2754
+ # Remove overlapping nodes between conds_x_lagged and conds_y
2755
+ Z += [node for node in conds_x_lagged if node not in Z]
2756
+
2757
+ # If middle mark is '-', then set pval=0
2758
+ if graph[i,j,abstau] != "" and graph[i,j,abstau][1] == '-':
2759
+ val = 1.
2760
+ pval = 0.
2761
+ dependent = True
2762
+ else:
2763
+ val, pval, dependent = self.cond_ind_test.run_test(X=[(i, -abstau)], Y=[(j, 0)],
2764
+ Z=Z, tau_max=tau_max,
2765
+ alpha_or_thres=alpha_or_thres,
2766
+ )
2767
+
2768
+ return val, pval, Z, dependent
2769
+
2770
+ def _print_triple_info(self, triple, index, n_triples):
2771
+ """Print info about the current triple being tested.
2772
+
2773
+ Parameters
2774
+ ----------
2775
+ triple : tuple
2776
+ Standard ((i, tau), k, j) tuple of nodes and time delays.
2777
+ index : int
2778
+ Index of triple.
2779
+ n_triples : int
2780
+ Total number of triples.
2781
+ """
2782
+ (i, tau), k, j = triple
2783
+ link_marker = {True:"o-o", False:"-->"}
2784
+
2785
+ print("\n Triple (%s % d) %s %s o-o %s (%d/%d)" % (
2786
+ self.var_names[i], tau, link_marker[tau==0], self.var_names[k],
2787
+ self.var_names[j], index + 1, n_triples))
2788
+
2789
+
2790
+ def _tests_remaining(self, i, j, abstau, graph, adjt, p):
2791
+ """Helper function returning whether a certain pair still needs to be
2792
+ tested."""
2793
+ return graph[i, j, abstau] != "" and len(
2794
+ [a for a in adjt[j] if a != (i, -abstau)]) >= p
2795
+
2796
+ def _any_tests_remaining(self, graph, adjt, tau_min, tau_max, p):
2797
+ """Helper function returning whether any pair still needs to be
2798
+ tested."""
2799
+ remaining_pairs = self._remaining_pairs(graph, adjt, tau_min, tau_max,
2800
+ p)
2801
+
2802
+ if len(remaining_pairs) > 0:
2803
+ return True
2804
+ else:
2805
+ return False
2806
+
2807
+ def _remaining_pairs(self, graph, adjt, tau_min, tau_max, p):
2808
+ """Helper function returning the remaining pairs that still need to be
2809
+ tested."""
2810
+ N = graph.shape[0]
2811
+ pairs = []
2812
+ for (i, j) in itertools.product(range(N), range(N)):
2813
+ for abstau in range(tau_min, tau_max + 1):
2814
+ if (graph[i, j, abstau] != ""
2815
+ and len(
2816
+ [a for a in adjt[j] if a != (i, -abstau)]) >= p):
2817
+ pairs.append((i, j, abstau))
2818
+
2819
+ return pairs
2820
+
2821
+ def _pcalg_skeleton(self,
2822
+ initial_graph,
2823
+ lagged_parents,
2824
+ mode,
2825
+ pc_alpha,
2826
+ tau_min,
2827
+ tau_max,
2828
+ max_conds_dim,
2829
+ max_combinations,
2830
+ max_conds_py,
2831
+ max_conds_px,
2832
+ max_conds_px_lagged,
2833
+ ):
2834
+ """Implements the skeleton discovery step of the PC algorithm for
2835
+ time series.
2836
+
2837
+ Parameters
2838
+ ----------
2839
+ initial_graph : array of shape (N, N, tau_max+1) or None
2840
+ Initial graph.
2841
+ lagged_parents : dictionary
2842
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
2843
+ additional conditions for each CI test. As part of PCMCIplus
2844
+ these are the superset of lagged parents estimated with the PC1
2845
+ algorithm.
2846
+ mode : {'standard', 'contemp_conds'}
2847
+ For ``mode='contemp_conds'`` this implements Steps 2-4 of the
2848
+ PCMCIplus method. For ``mode='standard'`` this implements the
2849
+ standard PC algorithm adapted to time series.
2850
+ tau_min : int, optional (default: 0)
2851
+ Minimum time lag to test.
2852
+ tau_max : int, optional (default: 1)
2853
+ Maximum time lag. Must be larger or equal to tau_min.
2854
+ pc_alpha : float, optional (default: 0.01)
2855
+ Significance level.
2856
+ max_conds_dim : int, optional (default: None)
2857
+ Maximum number of conditions to test. If None is passed, this number
2858
+ is unrestricted.
2859
+ max_combinations : int
2860
+ Maximum number of combinations of conditions of current cardinality
2861
+ to test. Must be infinite (default for max_combinations=1) for consistency.
2862
+ max_conds_py : int, optional (default: None)
2863
+ Maximum number of lagged conditions of Y to use in MCI tests. If
2864
+ None is passed, this number is unrestricted.
2865
+ max_conds_px : int, optional (default: None)
2866
+ Maximum number of lagged conditions of X to use in MCI tests. If
2867
+ None is passed, this number is unrestricted.
2868
+ max_conds_px_lagged : int, optional (default: None)
2869
+ Maximum number of lagged conditions of X when X is lagged in MCI
2870
+ tests. If None is passed, this number is equal to max_conds_px.
2871
+
2872
+ Returns
2873
+ -------
2874
+ graph : array of shape [N, N, tau_max+1]
2875
+ Resulting causal graph, see description above for interpretation.
2876
+ val_matrix : array of shape [N, N, tau_max+1]
2877
+ Estimated matrix of test statistic values regarding adjacencies.
2878
+ p_matrix : array of shape [N, N, tau_max+1]
2879
+ Estimated matrix of p-values regarding adjacencies.
2880
+ sepsets : dictionary
2881
+ Separating sets. See paper for details.
2882
+ """
2883
+ N = self.N
2884
+
2885
+ # Form complete graph
2886
+ if initial_graph is None:
2887
+ graph = np.ones((N, N, tau_max + 1), dtype='<U3')
2888
+ graph[:, :, 0] = "o?o"
2889
+ graph[:, :, 1:] = "-?>"
2890
+ else:
2891
+ graph = initial_graph
2892
+
2893
+ # Remove lag-zero self-loops
2894
+ graph[range(N), range(N), 0] = ""
2895
+
2896
+ # Define adjacencies for standard and contemp_conds mode
2897
+ if mode == 'contemp_conds':
2898
+ adjt = self._get_adj_time_series_contemp(graph)
2899
+ elif mode == 'standard':
2900
+ adjt = self._get_adj_time_series(graph)
2901
+
2902
+ val_matrix = np.zeros((N, N, tau_max + 1))
2903
+
2904
+ val_min = dict()
2905
+ for j in range(self.N):
2906
+ val_min[j] = {(p[0], -p[1]): np.inf
2907
+ for p in zip(*np.where(graph[:, j, :] != ""))}
2908
+
2909
+ # Initialize p-values. Set to 1 if there's no link in the initial graph
2910
+ p_matrix = np.zeros((N, N, tau_max + 1))
2911
+ p_matrix[graph == ""] = 1.
2912
+
2913
+ pval_max = dict()
2914
+ for j in range(self.N):
2915
+ pval_max[j] = {(p[0], -p[1]): 0.
2916
+ for p in zip(*np.where(graph[:, j, :] != ""))}
2917
+
2918
+ # TODO: Remove sepsets alltogether?
2919
+ # Intialize sepsets that store the conditions that make i and j
2920
+ # independent
2921
+ sepsets = self._get_sepsets(tau_min, tau_max)
2922
+
2923
+ if self.verbosity > 1:
2924
+ print("\n--------------------------")
2925
+ print("Skeleton discovery phase")
2926
+ print("--------------------------")
2927
+
2928
+ # Start with zero cardinality conditions
2929
+ p = 0
2930
+ while (self._any_tests_remaining(graph, adjt, tau_min, tau_max,
2931
+ p) and p <= max_conds_dim):
2932
+ if self.verbosity > 1:
2933
+ print(
2934
+ "\nTesting contemporaneous condition sets of dimension "
2935
+ "%d: " % p)
2936
+
2937
+ remaining_pairs = self._remaining_pairs(graph, adjt, tau_min,
2938
+ tau_max, p)
2939
+ n_remaining = len(remaining_pairs)
2940
+ for ir, (i, j, abstau) in enumerate(remaining_pairs):
2941
+ # Check if link was not already removed (contemp links)
2942
+ if graph[i, j, abstau] != "":
2943
+ if self.verbosity > 1:
2944
+ self._print_link_info(j=j, index_parent=ir,
2945
+ parent=(i, -abstau),
2946
+ num_parents=n_remaining)
2947
+
2948
+ # Generate all subsets of conditions of cardinality p
2949
+ conditions = list(itertools.combinations(
2950
+ [(k, tauk) for (k, tauk) in adjt[j]
2951
+ if not (k == i and tauk == -abstau)], p))
2952
+
2953
+ n_conditions = len(conditions)
2954
+ if self.verbosity > 1:
2955
+ print(
2956
+ " Iterate through %d subset(s) of conditions: "
2957
+ % n_conditions)
2958
+ if lagged_parents is not None:
2959
+ self._print_pcmciplus_conditions(lagged_parents, i,
2960
+ j, abstau,
2961
+ max_conds_py,
2962
+ max_conds_px,
2963
+ max_conds_px_lagged)
2964
+ nonsig = False
2965
+ # Iterate through condition sets
2966
+ for q, S in enumerate(conditions):
2967
+ if q > max_combinations:
2968
+ break
2969
+
2970
+ # Run MCI test
2971
+ val, pval, Z, dependent = self._run_pcalg_test(graph=graph,
2972
+ i=i, abstau=abstau, j=j, S=S, lagged_parents=lagged_parents,
2973
+ max_conds_py=max_conds_py,
2974
+ max_conds_px=max_conds_px, max_conds_px_lagged=max_conds_px_lagged,
2975
+ tau_max=tau_max, alpha_or_thres=pc_alpha)
2976
+
2977
+ # Store minimum absolute test statistic value for sorting adjt
2978
+ # (only internally used)
2979
+ val_min[j][(i, -abstau)] = min(np.abs(val),
2980
+ val_min[j].get(
2981
+ (i, -abstau)))
2982
+ # Store maximum p-value (only internally used)
2983
+ pval_max[j][(i, -abstau)] = max(pval,
2984
+ pval_max[j].get(
2985
+ (i, -abstau)))
2986
+
2987
+ # Store max. p-value and corresponding value to return
2988
+ if pval >= p_matrix[i, j, abstau]:
2989
+ p_matrix[i, j, abstau] = pval
2990
+ val_matrix[i, j, abstau] = val
2991
+
2992
+ if self.verbosity > 1:
2993
+ self._print_cond_info(Z=S, comb_index=q, pval=pval,
2994
+ val=val)
2995
+
2996
+ # If conditional independence is found, remove link
2997
+ # from graph and store sepsets
2998
+ if not dependent: # pval > pc_alpha:
2999
+ nonsig = True
3000
+ if abstau == 0:
3001
+ graph[i, j, 0] = graph[j, i, 0] = ""
3002
+ sepsets[((i, 0), j)] = sepsets[
3003
+ ((j, 0), i)] = list(S)
3004
+ # Also store p-value in other contemp. entry
3005
+ p_matrix[j, i, 0] = p_matrix[i, j, 0]
3006
+ else:
3007
+ graph[i, j, abstau] = ""
3008
+ sepsets[((i, -abstau), j)] = list(S)
3009
+ break
3010
+
3011
+ # Print the results if needed
3012
+ if self.verbosity > 1:
3013
+ self._print_a_pc_result(nonsig,
3014
+ conds_dim=p,
3015
+ max_combinations=
3016
+ max_combinations)
3017
+ else:
3018
+ self._print_link_info(j=j, index_parent=ir,
3019
+ parent=(i, -abstau),
3020
+ num_parents=n_remaining,
3021
+ already_removed=True)
3022
+
3023
+ # Increase condition cardinality
3024
+ p += 1
3025
+
3026
+ # Re-compute adj and sort by minimum absolute test statistic value
3027
+ if mode == 'contemp_conds':
3028
+ adjt = self._get_adj_time_series_contemp(graph, sort_by=val_min)
3029
+ elif mode == 'standard':
3030
+ adjt = self._get_adj_time_series(graph, sort_by=val_min)
3031
+
3032
+ if self.verbosity > 1:
3033
+ print("\nUpdated contemp. adjacencies:")
3034
+ self._print_parents(all_parents=adjt, val_min=val_min,
3035
+ pval_max=pval_max)
3036
+
3037
+ if self.verbosity > 1:
3038
+ if not (self._any_tests_remaining(graph, adjt, tau_min, tau_max,
3039
+ p) and p <= max_conds_dim):
3040
+ print("\nAlgorithm converged at p = %d." % (p - 1))
3041
+ else:
3042
+ print(
3043
+ "\nAlgorithm not yet converged, but max_conds_dim = %d"
3044
+ " reached." % max_conds_dim)
3045
+
3046
+ return {'graph': graph,
3047
+ 'sepsets': sepsets,
3048
+ 'p_matrix': p_matrix,
3049
+ 'val_matrix': val_matrix,
3050
+ }
3051
+
3052
+ def _get_sepsets(self, tau_min, tau_max):
3053
+ """Returns initial sepsets.
3054
+
3055
+ Parameters
3056
+ ----------
3057
+ tau_min : int, optional (default: 0)
3058
+ Minimum time lag to test.
3059
+ tau_max : int, optional (default: 1)
3060
+ Maximum time lag. Must be larger or equal to tau_min.
3061
+
3062
+ Returns
3063
+ -------
3064
+ sepsets : dict
3065
+ Initialized sepsets.
3066
+ """
3067
+ sepsets = dict([(((i, -tau), j), [])
3068
+ for tau in range(tau_min, tau_max + 1)
3069
+ for i in range(self.N)
3070
+ for j in range(self.N)])
3071
+
3072
+ return sepsets
3073
+
3074
+ def _find_unshielded_triples(self, graph):
3075
+ """Find unshielded triples i_tau o-(>) k_t o-o j_t with i_tau -/- j_t.
3076
+
3077
+ Excludes conflicting links.
3078
+
3079
+ Parameters
3080
+ ----------
3081
+ graph : array of shape [N, N, tau_max+1]
3082
+ Causal graph, see description above for interpretation.
3083
+
3084
+ Returns
3085
+ -------
3086
+ triples : list
3087
+ List of triples.
3088
+ """
3089
+
3090
+ N = graph.shape[0]
3091
+ adjt = self._get_adj_time_series(graph, include_conflicts=False)
3092
+
3093
+ # Find unshielded triples
3094
+ # Find triples i_tau o-(>) k_t o-o j_t with i_tau -/- j_t
3095
+ triples = []
3096
+ for j in range(N):
3097
+ for (k, tauk) in adjt[j]:
3098
+ if tauk == 0 and graph[k,j,0] == "o-o":
3099
+ for (i, taui) in adjt[k]:
3100
+ if ((i, taui) != (j, 0)
3101
+ and graph[i,j,abs(taui)] == ""
3102
+ and (graph[i,k,abs(taui)] == "o-o"
3103
+ or graph[i,k,abs(taui)] == "-->")):
3104
+ # if not (k == j or (
3105
+ # taui == 0 and (i == k or i == j))):
3106
+ # if ((taui == 0 and graph[i, j, 0] == "" and
3107
+ # graph[j, i, 0] == "" and graph[j, k, 0] == "o-o")
3108
+ # or (taui < 0 and graph[j, k, 0] == "o-o"
3109
+ # and graph[i, j, abs(taui)] == "")):
3110
+ triples.append(((i, taui), k, j))
3111
+
3112
+ return triples
3113
+
3114
+ def _pcalg_colliders(self,
3115
+ graph,
3116
+ sepsets,
3117
+ lagged_parents,
3118
+ mode,
3119
+ pc_alpha,
3120
+ tau_max,
3121
+ max_conds_py,
3122
+ max_conds_px,
3123
+ max_conds_px_lagged,
3124
+ contemp_collider_rule,
3125
+ conflict_resolution,
3126
+ ):
3127
+ """Implements the collider orientation step of the PC algorithm for
3128
+ time series.
3129
+
3130
+ Parameters
3131
+ ----------
3132
+ graph : array of shape (N, N, tau_max+1)
3133
+ Current graph.
3134
+ sepsets : dictionary
3135
+ Separating sets. See paper for details.
3136
+ lagged_parents : dictionary
3137
+ Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
3138
+ additional conditions for each CI test. As part of PCMCIplus
3139
+ these are the superset of lagged parents estimated with the PC1
3140
+ algorithm.
3141
+ mode : {'standard', 'contemp_conds'}
3142
+ For ``mode='contemp_conds'`` this implements Steps 2-4 of the
3143
+ PCMCIplus method. For ``mode='standard'`` this implements the
3144
+ standard PC algorithm adapted to time series.
3145
+ pc_alpha : float, optional (default: 0.01)
3146
+ Significance level.
3147
+ tau_max : int, optional (default: 1)
3148
+ Maximum time lag. Must be larger or equal to tau_min.
3149
+ max_conds_py : int, optional (default: None)
3150
+ Maximum number of lagged conditions of Y to use in MCI tests. If
3151
+ None is passed, this number is unrestricted.
3152
+ max_conds_px : int, optional (default: None)
3153
+ Maximum number of lagged conditions of X to use in MCI tests. If
3154
+ None is passed, this number is unrestricted.
3155
+ max_conds_px_lagged : int, optional (default: None)
3156
+ Maximum number of lagged conditions of X when X is lagged in MCI
3157
+ tests. If None is passed, this number is equal to max_conds_px.
3158
+ contemp_collider_rule : {'majority', 'conservative', 'none'}
3159
+ Rule for collider phase to use. See the paper for details. Only
3160
+ 'majority' and 'conservative' lead to an order-independent
3161
+ algorithm.
3162
+ conflict_resolution : bool, optional (default: True)
3163
+ Whether to mark conflicts in orientation rules. Only for True
3164
+ this leads to an order-independent algorithm.
3165
+
3166
+ Returns
3167
+ -------
3168
+ graph : array of shape [N, N, tau_max+1]
3169
+ Resulting causal graph, see description above for interpretation.
3170
+ sepsets : dictionary
3171
+ Separating sets. See paper for details.
3172
+ ambiguous_triples : list
3173
+ List of ambiguous triples, only relevant for 'majority' and
3174
+ 'conservative' rules, see paper for details.
3175
+ """
3176
+
3177
+ if self.verbosity > 1:
3178
+ print("\n----------------------------")
3179
+ print("Collider orientation phase")
3180
+ print("----------------------------")
3181
+ print("\ncontemp_collider_rule = %s" % contemp_collider_rule)
3182
+ print("conflict_resolution = %s\n" % conflict_resolution)
3183
+
3184
+ # Check that no middle mark '?' exists
3185
+ for (i, j, tau) in zip(*np.where(graph!='')):
3186
+ if graph[i,j,tau][1] != '-':
3187
+ raise ValueError("Middle mark '?' exists!")
3188
+
3189
+ # Find unshielded triples
3190
+ triples = self._find_unshielded_triples(graph)
3191
+
3192
+ v_structures = []
3193
+ ambiguous_triples = []
3194
+
3195
+ if contemp_collider_rule is None or contemp_collider_rule == 'none':
3196
+ # Standard collider orientation rule of PC algorithm
3197
+ # If k_t not in sepsets(i_tau, j_t), then orient
3198
+ # as i_tau --> k_t <-- j_t
3199
+ for itaukj in triples:
3200
+ (i, tau), k, j = itaukj
3201
+ if (k, 0) not in sepsets[((i, tau), j)]:
3202
+ v_structures.append(itaukj)
3203
+ else:
3204
+ # Apply 'majority' or 'conservative' rule to orient colliders
3205
+ # Compute all (contemp) subsets of potential parents of i and all
3206
+ # subsets of potential parents of j that make i and j independent
3207
+ def subsets(s):
3208
+ if len(s) == 0: return []
3209
+ subsets = []
3210
+ for cardinality in range(len(s) + 1):
3211
+ subsets += list(itertools.combinations(s, cardinality))
3212
+ subsets = [list(sub) for sub in list(set(subsets))]
3213
+ return subsets
3214
+
3215
+ # We only consider contemporaneous adjacencies because only these
3216
+ # can include the (contemp) k. Furthermore, next to adjacencies of j,
3217
+ # we only need to check adjacencies of i for tau=0
3218
+ if mode == 'contemp_conds':
3219
+ adjt = self._get_adj_time_series_contemp(graph)
3220
+ elif mode == 'standard':
3221
+ adjt = self._get_adj_time_series(graph)
3222
+
3223
+ n_triples = len(triples)
3224
+ for ir, itaukj in enumerate(triples):
3225
+ (i, tau), k, j = itaukj
3226
+
3227
+ if self.verbosity > 1:
3228
+ self._print_triple_info(itaukj, ir, n_triples)
3229
+
3230
+ neighbor_subsets_tmp = subsets(
3231
+ [(l, taul) for (l, taul) in adjt[j]
3232
+ if not (l == i and tau == taul)])
3233
+ if tau == 0:
3234
+ # Furthermore, we only need to check contemp. adjacencies
3235
+ # of i for tau=0
3236
+ neighbor_subsets_tmp += subsets(
3237
+ [(l, taul) for (l, taul) in adjt[i]
3238
+ if not (l == j and taul == 0)])
3239
+
3240
+ # Make unique
3241
+ neighbor_subsets = []
3242
+ for subset in neighbor_subsets_tmp:
3243
+ if subset not in neighbor_subsets:
3244
+ neighbor_subsets.append(subset)
3245
+
3246
+ n_neighbors = len(neighbor_subsets)
3247
+
3248
+ if self.verbosity > 1:
3249
+ print(
3250
+ " Iterate through %d condition subset(s) of "
3251
+ "neighbors: " % n_neighbors)
3252
+ if lagged_parents is not None:
3253
+ self._print_pcmciplus_conditions(lagged_parents, i, j,
3254
+ abs(tau), max_conds_py, max_conds_px,
3255
+ max_conds_px_lagged)
3256
+
3257
+ # Test which neighbor subsets separate i and j
3258
+ neighbor_sepsets = []
3259
+ for iss, S in enumerate(neighbor_subsets):
3260
+ val, pval, Z, dependent = self._run_pcalg_test(graph=graph,
3261
+ i=i, abstau=abs(tau), j=j, S=S, lagged_parents=lagged_parents,
3262
+ max_conds_py=max_conds_py,
3263
+ max_conds_px=max_conds_px, max_conds_px_lagged=max_conds_px_lagged,
3264
+ tau_max=tau_max, alpha_or_thres=pc_alpha)
3265
+
3266
+ if self.verbosity > 1:
3267
+ self._print_cond_info(Z=S, comb_index=iss, pval=pval,
3268
+ val=val)
3269
+
3270
+ if not dependent: #pval > pc_alpha:
3271
+ neighbor_sepsets += [S]
3272
+
3273
+ if len(neighbor_sepsets) > 0:
3274
+ fraction = np.sum(
3275
+ [(k, 0) in S for S in neighbor_sepsets]) / float(
3276
+ len(neighbor_sepsets))
3277
+
3278
+ if contemp_collider_rule == 'conservative':
3279
+ # Triple is labeled as unambiguous if at least one
3280
+ # separating set is found and either k is in ALL
3281
+ # (fraction == 1) or NONE (fraction == 0) of them
3282
+ if len(neighbor_sepsets) == 0:
3283
+ if self.verbosity > 1:
3284
+ print(
3285
+ " No separating subsets --> ambiguous "
3286
+ "triple found")
3287
+ ambiguous_triples.append(itaukj)
3288
+ else:
3289
+ if fraction == 0:
3290
+ # If (k, 0) is in none of the neighbor_sepsets,
3291
+ # orient as collider
3292
+ v_structures.append(itaukj)
3293
+ if self.verbosity > 1:
3294
+ print(
3295
+ " Fraction of separating subsets "
3296
+ "containing (%s 0) is = 0 --> collider "
3297
+ "found" % self.var_names[k])
3298
+ # Also delete (k, 0) from sepsets (if present)
3299
+ if (k, 0) in sepsets[((i, tau), j)]:
3300
+ sepsets[((i, tau), j)].remove((k, 0))
3301
+ if tau == 0:
3302
+ if (k, 0) in sepsets[((j, tau), i)]:
3303
+ sepsets[((j, tau), i)].remove((k, 0))
3304
+ elif fraction == 1:
3305
+ # If (k, 0) is in all of the neighbor_sepsets,
3306
+ # leave unoriented
3307
+ if self.verbosity > 1:
3308
+ print(
3309
+ " Fraction of separating subsets "
3310
+ "containing (%s 0) is = 1 --> "
3311
+ "non-collider found" % self.var_names[k])
3312
+ # Also add (k, 0) to sepsets (if not present)
3313
+ if (k, 0) not in sepsets[((i, tau), j)]:
3314
+ sepsets[((i, tau), j)].append((k, 0))
3315
+ if tau == 0:
3316
+ if (k, 0) not in sepsets[((j, tau), i)]:
3317
+ sepsets[((j, tau), i)].append((k, 0))
3318
+ else:
3319
+ if self.verbosity > 1:
3320
+ print(
3321
+ " Fraction of separating subsets "
3322
+ "containing (%s 0) is = between 0 and 1 "
3323
+ "--> ambiguous triple found" %
3324
+ self.var_names[k])
3325
+ ambiguous_triples.append(itaukj)
3326
+
3327
+ elif contemp_collider_rule == 'majority':
3328
+
3329
+ if len(neighbor_sepsets) == 0:
3330
+ if self.verbosity > 1:
3331
+ print(
3332
+ " No separating subsets --> ambiguous "
3333
+ "triple found")
3334
+ ambiguous_triples.append(itaukj)
3335
+ else:
3336
+ if fraction == 0.5:
3337
+ if self.verbosity > 1:
3338
+ print(
3339
+ " Fraction of separating subsets "
3340
+ "containing (%s 0) is = 0.5 --> ambiguous "
3341
+ "triple found" % self.var_names[k])
3342
+ ambiguous_triples.append(itaukj)
3343
+ elif fraction < 0.5:
3344
+ v_structures.append(itaukj)
3345
+ if self.verbosity > 1:
3346
+ print(
3347
+ " Fraction of separating subsets "
3348
+ "containing (%s 0) is < 0.5 "
3349
+ "--> collider found" % self.var_names[k])
3350
+ # Also delete (k, 0) from sepsets (if present)
3351
+ if (k, 0) in sepsets[((i, tau), j)]:
3352
+ sepsets[((i, tau), j)].remove((k, 0))
3353
+ if tau == 0:
3354
+ if (k, 0) in sepsets[((j, tau), i)]:
3355
+ sepsets[((j, tau), i)].remove((k, 0))
3356
+ elif fraction > 0.5:
3357
+ if self.verbosity > 1:
3358
+ print(
3359
+ " Fraction of separating subsets "
3360
+ "containing (%s 0) is > 0.5 "
3361
+ "--> non-collider found" %
3362
+ self.var_names[k])
3363
+ # Also add (k, 0) to sepsets (if not present)
3364
+ if (k, 0) not in sepsets[((i, tau), j)]:
3365
+ sepsets[((i, tau), j)].append((k, 0))
3366
+ if tau == 0:
3367
+ if (k, 0) not in sepsets[((j, tau), i)]:
3368
+ sepsets[((j, tau), i)].append((k, 0))
3369
+
3370
+ if self.verbosity > 1 and len(v_structures) > 0:
3371
+ print("\nOrienting links among colliders:")
3372
+
3373
+ link_marker = {True:"o-o", False:"-->"}
3374
+
3375
+ # Now go through list of v-structures and (optionally) detect conflicts
3376
+ oriented_links = []
3377
+ for itaukj in v_structures:
3378
+ (i, tau), k, j = itaukj
3379
+
3380
+ if self.verbosity > 1:
3381
+ print("\n Collider (%s % d) %s %s o-o %s:" % (
3382
+ self.var_names[i], tau, link_marker[
3383
+ tau==0], self.var_names[k],
3384
+ self.var_names[j]))
3385
+
3386
+ if (k, j) not in oriented_links and (j, k) not in oriented_links:
3387
+ if self.verbosity > 1:
3388
+ print(" Orient %s o-o %s as %s --> %s " % (
3389
+ self.var_names[j], self.var_names[k], self.var_names[j],
3390
+ self.var_names[k]))
3391
+ # graph[k, j, 0] = 0
3392
+ graph[k, j, 0] = "<--" #0
3393
+ graph[j, k, 0] = "-->"
3394
+
3395
+ oriented_links.append((j, k))
3396
+ else:
3397
+ if conflict_resolution is False and self.verbosity > 1:
3398
+ print(" Already oriented")
3399
+
3400
+ if conflict_resolution:
3401
+ if (k, j) in oriented_links:
3402
+ if self.verbosity > 1:
3403
+ print(
3404
+ " Conflict since %s <-- %s already "
3405
+ "oriented: Mark link as `2` in graph" % (
3406
+ self.var_names[j], self.var_names[k]))
3407
+ graph[j, k, 0] = graph[k, j, 0] = "x-x" #2
3408
+
3409
+ if tau == 0:
3410
+ if (i, k) not in oriented_links and (
3411
+ k, i) not in oriented_links:
3412
+ if self.verbosity > 1:
3413
+ print(" Orient %s o-o %s as %s --> %s " % (
3414
+ self.var_names[i], self.var_names[k],
3415
+ self.var_names[i], self.var_names[k]))
3416
+ graph[k, i, 0] = "<--" #0
3417
+ graph[i, k, 0] = "-->"
3418
+
3419
+ oriented_links.append((i, k))
3420
+ else:
3421
+ if conflict_resolution is False and self.verbosity > 1:
3422
+ print(" Already oriented")
3423
+
3424
+ if conflict_resolution:
3425
+ if (k, i) in oriented_links:
3426
+ if self.verbosity > 1:
3427
+ print(
3428
+ " Conflict since %s <-- %s already "
3429
+ "oriented: Mark link as `2` in graph" % (
3430
+ self.var_names[i], self.var_names[k]))
3431
+ graph[i, k, 0] = graph[k, i, 0] = "x-x" #2
3432
+
3433
+ if self.verbosity > 1:
3434
+ adjt = self._get_adj_time_series(graph)
3435
+ print("\nUpdated adjacencies:")
3436
+ self._print_parents(all_parents=adjt, val_min=None, pval_max=None)
3437
+
3438
+ return {'graph': graph,
3439
+ 'sepsets': sepsets,
3440
+ 'ambiguous_triples': ambiguous_triples,
3441
+ }
3442
+
3443
+ def _find_triples_rule1(self, graph):
3444
+ """Find triples i_tau --> k_t o-o j_t with i_tau -/- j_t.
3445
+
3446
+ Excludes conflicting links.
3447
+
3448
+ Parameters
3449
+ ----------
3450
+ graph : array of shape [N, N, tau_max+1]
3451
+ Causal graph, see description above for interpretation.
3452
+
3453
+ Returns
3454
+ -------
3455
+ triples : list
3456
+ List of triples.
3457
+ """
3458
+ adjt = self._get_adj_time_series(graph, include_conflicts=False)
3459
+
3460
+ N = graph.shape[0]
3461
+ triples = []
3462
+ for j in range(N):
3463
+ for (k, tauk) in adjt[j]:
3464
+ if tauk == 0 and graph[j, k, 0] == 'o-o':
3465
+ for (i, taui) in adjt[k]:
3466
+ if ((i, taui) != (j, 0)
3467
+ and graph[i,j,abs(taui)] == ""
3468
+ and (graph[i,k,abs(taui)] == "-->")):
3469
+ triples.append(((i, taui), k, j))
3470
+ return triples
3471
+
3472
+ def _find_triples_rule2(self, graph):
3473
+ """Find triples i_t --> k_t --> j_t with i_t o-o j_t.
3474
+
3475
+ Excludes conflicting links.
3476
+
3477
+ Parameters
3478
+ ----------
3479
+ graph : array of shape [N, N, tau_max+1]
3480
+ Causal graph, see description above for interpretation.
3481
+
3482
+ Returns
3483
+ -------
3484
+ triples : list
3485
+ List of triples.
3486
+ """
3487
+
3488
+ adjtcont = self._get_adj_time_series_contemp(graph,
3489
+ include_conflicts=False)
3490
+ N = graph.shape[0]
3491
+
3492
+ triples = []
3493
+ for j in range(N):
3494
+ for (k, tauk) in adjtcont[j]:
3495
+ if graph[k, j, 0] == '-->':
3496
+ for (i, taui) in adjtcont[k]:
3497
+ if graph[i, k, 0] == '-->' and (i, taui) != (j, 0):
3498
+ if graph[i, j, 0] == 'o-o' and graph[j, i, 0] == 'o-o':
3499
+ triples.append(((i, 0), k, j))
3500
+ return triples
3501
+
3502
+ def _find_chains_rule3(self, graph):
3503
+ """Find chains i_t o-o k_t --> j_t and i_t o-o l_t --> j_t with
3504
+ i_t o-o j_t and k_t -/- l_t.
3505
+
3506
+ Excludes conflicting links.
3507
+
3508
+ Parameters
3509
+ ----------
3510
+ graph : array of shape [N, N, tau_max+1]
3511
+ Causal graph, see description above for interpretation.
3512
+
3513
+ Returns
3514
+ -------
3515
+ chains : list
3516
+ List of chains.
3517
+ """
3518
+ N = graph.shape[0]
3519
+ adjtcont = self._get_adj_time_series_contemp(graph,
3520
+ include_conflicts=False)
3521
+
3522
+ chains = []
3523
+ for j in range(N):
3524
+ for (i, _) in adjtcont[j]:
3525
+ if graph[j, i, 0] == 'o-o':
3526
+ for (k, _) in adjtcont[j]:
3527
+ for (l, _) in adjtcont[j]:
3528
+ if ((k != l)
3529
+ and (k != i)
3530
+ and (l != i)
3531
+ and graph[k,j,0] == "-->"
3532
+ and graph[l,j,0] == "-->"
3533
+ and graph[k,i,0] == "o-o"
3534
+ and graph[l,i,0] == "o-o"
3535
+ and graph[k,l,0] == ""
3536
+ ):
3537
+ chains.append((((i, 0), k, j),
3538
+ ((i, 0), l, j)))
3539
+
3540
+ return chains
3541
+
3542
+ def _pcalg_rules_timeseries(self,
3543
+ graph,
3544
+ ambiguous_triples,
3545
+ conflict_resolution,
3546
+ ):
3547
+ """Implements the rule orientation step of the PC algorithm for
3548
+ time series.
3549
+
3550
+ Parameters
3551
+ ----------
3552
+ graph : array of shape (N, N, tau_max+1)
3553
+ Current graph.
3554
+ ambiguous_triples : list
3555
+ List of ambiguous triples, only relevant for 'majority' and
3556
+ 'conservative' rules, see paper for details.
3557
+ conflict_resolution : bool
3558
+ Whether to mark conflicts in orientation rules. Only for True
3559
+ this leads to an order-independent algorithm.
3560
+
3561
+ Returns
3562
+ -------
3563
+ graph : array of shape [N, N, tau_max+1]
3564
+ Resulting causal graph, see description above for interpretation.
3565
+ """
3566
+ N = graph.shape[0]
3567
+
3568
+ def rule1(graph, oriented_links):
3569
+ """Find (unambiguous) triples i_tau --> k_t o-o j_t with
3570
+ i_tau -/- j_t and orient as i_tau --> k_t --> j_t.
3571
+ """
3572
+ triples = self._find_triples_rule1(graph)
3573
+ triples_left = False
3574
+
3575
+ for itaukj in triples:
3576
+ if itaukj not in ambiguous_triples:
3577
+ triples_left = True
3578
+ # Orient as i_tau --> k_t --> j_t
3579
+ (i, tau), k, j = itaukj
3580
+ if (j, k) not in oriented_links and (
3581
+ k, j) not in oriented_links:
3582
+ if self.verbosity > 1:
3583
+ print(
3584
+ " R1: Found (%s % d) --> %s o-o %s, "
3585
+ "orient as %s --> %s" % (
3586
+ self.var_names[i], tau, self.var_names[k],
3587
+ self.var_names[j],
3588
+ self.var_names[k], self.var_names[j]))
3589
+ # graph[j, k, 0] = 0
3590
+ graph[k, j, 0] = '-->'
3591
+ graph[j, k, 0] = '<--' # 0
3592
+
3593
+ oriented_links.append((k, j))
3594
+
3595
+ if conflict_resolution:
3596
+ if (j, k) in oriented_links:
3597
+ if self.verbosity > 1:
3598
+ print(
3599
+ " Conflict since %s <-- %s already"
3600
+ " oriented: Mark link as `2` in graph" % (
3601
+ self.var_names[k], self.var_names[j]))
3602
+ # graph[j, k, 0] = graph[k, j, 0] = 2
3603
+ graph[j, k, 0] = graph[k, j, 0] = 'x-x'
3604
+
3605
+ return triples_left, graph, oriented_links
3606
+
3607
+ def rule2(graph, oriented_links):
3608
+ """Find (unambiguous) triples i_t --> k_t --> j_t with i_t o-o j_t
3609
+ and orient as i_t --> j_t.
3610
+ """
3611
+
3612
+ triples = self._find_triples_rule2(graph)
3613
+ triples_left = False
3614
+
3615
+ for itaukj in triples:
3616
+ if itaukj not in ambiguous_triples:
3617
+ # TODO: CHeck whether this is actually needed
3618
+ # since ambiguous triples are always unshielded and here
3619
+ # we look for triples where i and j are connected
3620
+ triples_left = True
3621
+ # Orient as i_t --> j_t
3622
+ (i, tau), k, j = itaukj
3623
+ if (j, i) not in oriented_links and (
3624
+ i, j) not in oriented_links:
3625
+ if self.verbosity > 1:
3626
+ print(
3627
+ " R2: Found %s --> %s --> %s with %s "
3628
+ "o-o %s, orient as %s --> %s" % (
3629
+ self.var_names[i], self.var_names[k],
3630
+ self.var_names[j],
3631
+ self.var_names[i], self.var_names[j],
3632
+ self.var_names[i], self.var_names[j]))
3633
+ graph[i, j, 0] = '-->'
3634
+ graph[j, i, 0] = '<--' # 0
3635
+
3636
+ oriented_links.append((i, j))
3637
+ if conflict_resolution:
3638
+ if (j, i) in oriented_links:
3639
+ if self.verbosity > 1:
3640
+ print(
3641
+ " Conflict since %s <-- %s already "
3642
+ "oriented: Mark link as `2` in graph" % (
3643
+ self.var_names[i], self.var_names[j]))
3644
+ # graph[j, i, 0] = graph[i, j, 0] = 2
3645
+ graph[j, i, 0] = graph[i, j, 0] = 'x-x'
3646
+
3647
+ return triples_left, graph, oriented_links
3648
+
3649
+ def rule3(graph, oriented_links):
3650
+ """Find (unambiguous) chains i_t o-o k_t --> j_t
3651
+ and i_t o-o l_t --> j_t with i_t o-o j_t
3652
+ and k_t -/- l_t: Orient as i_t --> j_t.
3653
+ """
3654
+ # First find all chains i_t -- k_t --> j_t with i_t -- j_t
3655
+ # and k_t -/- l_t
3656
+ chains = self._find_chains_rule3(graph)
3657
+
3658
+ chains_left = False
3659
+
3660
+ for (itaukj, itaulj) in chains:
3661
+ if (itaukj not in ambiguous_triples and
3662
+ itaulj not in ambiguous_triples):
3663
+ # TODO: CHeck whether this is actually needed
3664
+ # since ambiguous triples are always unshielded and here
3665
+ # we look for triples where i and j are connected
3666
+ chains_left = True
3667
+ # Orient as i_t --> j_t
3668
+ (i, tau), k, j = itaukj
3669
+ _ , l, _ = itaulj
3670
+
3671
+ if (j, i) not in oriented_links and (
3672
+ i, j) not in oriented_links:
3673
+ if self.verbosity > 1:
3674
+ print(
3675
+ " R3: Found %s o-o %s --> %s and %s o-o "
3676
+ "%s --> %s with %s o-o %s and %s -/- %s, "
3677
+ "orient as %s --> %s" % (
3678
+ self.var_names[i], self.var_names[k],
3679
+ self.var_names[j], self.var_names[i],
3680
+ self.var_names[l], self.var_names[j],
3681
+ self.var_names[i], self.var_names[j],
3682
+ self.var_names[k], self.var_names[l],
3683
+ self.var_names[i], self.var_names[j]))
3684
+ graph[i, j, 0] = '-->'
3685
+ graph[j, i, 0] = '<--' # 0
3686
+
3687
+ oriented_links.append((i, j))
3688
+ if conflict_resolution:
3689
+ if (j, i) in oriented_links:
3690
+ if self.verbosity > 1:
3691
+ print(
3692
+ " Conflict since %s <-- %s already "
3693
+ "oriented: Mark link as `2` in graph" % (
3694
+ self.var_names[i], self.var_names[j]))
3695
+ graph[j, i, 0] = graph[i, j, 0] = 'x-x'
3696
+
3697
+ return chains_left, graph, oriented_links
3698
+
3699
+ if self.verbosity > 1:
3700
+ print("\n")
3701
+ print("----------------------------")
3702
+ print("Rule orientation phase")
3703
+ print("----------------------------")
3704
+
3705
+ oriented_links = []
3706
+ graph_new = np.copy(graph)
3707
+ any1 = any2 = any3 = True
3708
+ while (any1 or any2 or any3):
3709
+ if self.verbosity > 1:
3710
+ print("\nTry rule(s) %s" % (
3711
+ np.where(np.array([0, any1, any2, any3]))))
3712
+ any1, graph_new, oriented_links = rule1(graph_new, oriented_links)
3713
+ any2, graph_new, oriented_links = rule2(graph_new, oriented_links)
3714
+ any3, graph_new, oriented_links = rule3(graph_new, oriented_links)
3715
+
3716
+ if self.verbosity > 1:
3717
+ adjt = self._get_adj_time_series(graph_new)
3718
+ print("\nUpdated adjacencies:")
3719
+ self._print_parents(all_parents=adjt, val_min=None, pval_max=None)
3720
+
3721
+ return graph_new
3722
+
3723
+ def _optimize_pcmciplus_alpha(self,
3724
+ link_assumptions,
3725
+ tau_min,
3726
+ tau_max,
3727
+ pc_alpha,
3728
+ contemp_collider_rule,
3729
+ conflict_resolution,
3730
+ reset_lagged_links,
3731
+ max_conds_dim,
3732
+ max_combinations,
3733
+ max_conds_py,
3734
+ max_conds_px,
3735
+ max_conds_px_lagged,
3736
+ fdr_method,
3737
+ ):
3738
+ """Optimizes pc_alpha in PCMCIplus.
3739
+
3740
+ If a list or None is passed for ``pc_alpha``, the significance level is
3741
+ optimized for every graph across the given ``pc_alpha`` values using the
3742
+ score computed in ``cond_ind_test.get_model_selection_criterion()``
3743
+
3744
+ Parameters
3745
+ ----------
3746
+ See those for run_pcmciplus()
3747
+
3748
+ Returns
3749
+ -------
3750
+ Results for run_pcmciplus() for the optimal pc_alpha.
3751
+ """
3752
+
3753
+ if pc_alpha is None:
3754
+ pc_alpha_list = [0.001, 0.005, 0.01, 0.025, 0.05]
3755
+ else:
3756
+ pc_alpha_list = pc_alpha
3757
+
3758
+ if self.verbosity > 0:
3759
+ print("\n##\n## Optimizing pc_alpha over " +
3760
+ "pc_alpha_list = %s" % str(pc_alpha_list) +
3761
+ "\n##")
3762
+
3763
+ results = {}
3764
+ score = np.zeros_like(pc_alpha_list)
3765
+ for iscore, pc_alpha_here in enumerate(pc_alpha_list):
3766
+ # Print statement about the pc_alpha being tested
3767
+ if self.verbosity > 0:
3768
+ print("\n## pc_alpha = %s (%d/%d):" % (pc_alpha_here,
3769
+ iscore + 1,
3770
+ score.shape[0]))
3771
+ # Get the results for this alpha value
3772
+ results[pc_alpha_here] = \
3773
+ self.run_pcmciplus(link_assumptions=link_assumptions,
3774
+ tau_min=tau_min,
3775
+ tau_max=tau_max,
3776
+ pc_alpha=pc_alpha_here,
3777
+ contemp_collider_rule=contemp_collider_rule,
3778
+ conflict_resolution=conflict_resolution,
3779
+ reset_lagged_links=reset_lagged_links,
3780
+ max_conds_dim=max_conds_dim,
3781
+ max_combinations=max_combinations,
3782
+ max_conds_py=max_conds_py,
3783
+ max_conds_px=max_conds_px,
3784
+ max_conds_px_lagged=max_conds_px_lagged,
3785
+ fdr_method=fdr_method)
3786
+
3787
+ # Get one member of the Markov equivalence class of the result
3788
+ # of PCMCIplus, which is a CPDAG
3789
+
3790
+ # First create order that is based on some feature of the variables
3791
+ # to avoid order-dependence of DAG, i.e., it should not matter
3792
+ # in which order the variables appear in dataframe
3793
+ # Here we use the sum of absolute val_matrix values incident at j
3794
+ val_matrix = results[pc_alpha_here]['val_matrix']
3795
+ variable_order = np.argsort(
3796
+ np.abs(val_matrix).sum(axis=(0,2)))[::-1]
3797
+
3798
+ dag = self._get_dag_from_cpdag(
3799
+ cpdag_graph=results[pc_alpha_here]['graph'],
3800
+ variable_order=variable_order)
3801
+
3802
+
3803
+ # Compute the best average score when the model selection
3804
+ # is applied to all N variables
3805
+ for j in range(self.N):
3806
+ parents = []
3807
+ for i, tau in zip(*np.where(dag[:,j,:] == "-->")):
3808
+ parents.append((i, -tau))
3809
+ score_j = self.cond_ind_test.get_model_selection_criterion(
3810
+ j, parents, tau_max)
3811
+ score[iscore] += score_j
3812
+ score[iscore] /= float(self.N)
3813
+
3814
+ # Record the optimal alpha value
3815
+ optimal_alpha = pc_alpha_list[score.argmin()]
3816
+
3817
+ if self.verbosity > 0:
3818
+ print("\n##"+
3819
+ "\n\n## Scores for individual pc_alpha values:\n")
3820
+ for iscore, pc_alpha in enumerate(pc_alpha_list):
3821
+ print(" pc_alpha = %7s yields score = %.5f" % (pc_alpha,
3822
+ score[iscore]))
3823
+ print("\n##\n## Results for optimal " +
3824
+ "pc_alpha = %s\n##" % optimal_alpha)
3825
+ self.print_results(results[optimal_alpha], alpha_level=optimal_alpha)
3826
+
3827
+ optimal_results = results[optimal_alpha]
3828
+ optimal_results['optimal_alpha'] = optimal_alpha
3829
+ return optimal_results
3830
+
3831
+
3832
+ if __name__ == '__main__':
3833
+ from tigramite.independence_tests.parcorr import ParCorr
3834
+ from tigramite.independence_tests.regressionCI import RegressionCI
3835
+ # from tigramite.independence_tests.cmiknn import CMIknn
3836
+
3837
+ import tigramite.data_processing as pp
3838
+ from tigramite.toymodels import structural_causal_processes as toys
3839
+ import tigramite.plotting as tp
3840
+ from matplotlib import pyplot as plt
3841
+
3842
+ # random_state = np.random.default_rng(seed=43)
3843
+ # # Example process to play around with
3844
+ # # Each key refers to a variable and the incoming links are supplied
3845
+ # # as a list of format [((var, -lag), coeff, function), ...]
3846
+ # def lin_f(x): return x
3847
+ # def nonlin_f(x): return (x + 5. * x ** 2 * np.exp(-x ** 2 / 20.))
3848
+
3849
+ # T = 1000
3850
+ # data = random_state.standard_normal((T, 4))
3851
+ # # Simple sun
3852
+ # data[:,3] = random_state.standard_normal((T)) # np.sin(np.arange(T)*20/np.pi) + 0.1*random_state.standard_normal((T))
3853
+ # c = 0.8
3854
+ # for t in range(1, T):
3855
+ # data[t, 0] += 0.4*data[t-1, 0] + 0.4*data[t-1, 1] + c*data[t-1,3]
3856
+ # data[t, 1] += 0.5*data[t-1, 1] + c*data[t,3]
3857
+ # data[t, 2] += 0.6*data[t-1, 2] + 0.3*data[t-2, 1] #+ c*data[t-1,3]
3858
+ # dataframe = pp.DataFrame(data, var_names=[r'$X^0$', r'$X^1$', r'$X^2$', 'Sun'])
3859
+ # # tp.plot_timeseries(dataframe); plt.show()
3860
+
3861
+ # ci_test = CMIknn(significance="fixed_thres", verbosity=3) #
3862
+ # ci_test = ParCorr() #significance="fixed_thres") #
3863
+ # dataframe_nosun = pp.DataFrame(data[:,[0,1,2]], var_names=[r'$X^0$', r'$X^1$', r'$X^2$'])
3864
+ # pcmci_parcorr = PCMCI(
3865
+ # dataframe=dataframe_nosun,
3866
+ # cond_ind_test=parcorr,
3867
+ # verbosity=0)
3868
+ # tau_max = 1 #2
3869
+ # results = pcmci_parcorr.run_pcmci(tau_max=tau_max, pc_alpha=0.2, alpha_level = 0.01)
3870
+ # Remove parents of variable 3
3871
+ # Only estimate parents of variables 0, 1, 2
3872
+ # link_assumptions = None #{}
3873
+ # for j in range(4):
3874
+ # if j in [0, 1, 2]:
3875
+ # # Directed lagged links
3876
+ # link_assumptions[j] = {(var, -lag): '-?>' for var in [0, 1, 2]
3877
+ # for lag in range(1, tau_max + 1)}
3878
+ # # Unoriented contemporaneous links
3879
+ # link_assumptions[j].update({(var, 0): 'o?o' for var in [0, 1, 2] if var != j})
3880
+ # # Directed lagged and contemporaneous links from the sun (3)
3881
+ # link_assumptions[j].update({(var, -lag): '-?>' for var in [3]
3882
+ # for lag in range(0, tau_max + 1)})
3883
+ # else:
3884
+ # link_assumptions[j] = {}
3885
+
3886
+ # for j in link_assumptions:
3887
+ # print(link_assumptions[j])
3888
+ # pcmci_parcorr = PCMCI(
3889
+ # dataframe=dataframe,
3890
+ # cond_ind_test=ci_test,
3891
+ # verbosity=1)
3892
+ # results = pcmci_parcorr.run_pcmciplus(tau_max=tau_max,
3893
+ # pc_alpha=[0.001, 0.01, 0.05, 0.8],
3894
+ # reset_lagged_links=False,
3895
+ # link_assumptions=link_assumptions
3896
+ # ) #, alpha_level = 0.01)
3897
+ # print(results['graph'].shape)
3898
+ # # print(results['graph'][:,3,:])
3899
+ # print(np.round(results['p_matrix'][:,:,0], 2))
3900
+ # print(np.round(results['val_matrix'][:,:,0], 2))
3901
+ # print(results['graph'][:,:,0])
3902
+
3903
+ # Plot time series graph
3904
+ # tp.plot_graph(
3905
+ # val_matrix=results['val_matrix'],
3906
+ # graph=results['graph'],
3907
+ # var_names=[r'$X^0$', r'$X^1$', r'$X^2$', 'Sun'],
3908
+ # link_colorbar_label='MCI',
3909
+ # ); plt.show()
3910
+
3911
+ # links_coeffs = {0: [((0, -1), 0.7, lin_f)],
3912
+ # 1: [((1, -1), 0.7, lin_f), ((0, 0), 0.2, lin_f), ((2, -2), 0.2, lin_f)],
3913
+ # 2: [((2, -1), 0.3, lin_f)],
3914
+ # }
3915
+ # T = 100 # time series length
3916
+ # data, _ = toys.structural_causal_process(links_coeffs, T=T, seed=3)
3917
+ # T, N = data.shape
3918
+
3919
+
3920
+ multidata = np.random.randn(10, 100, 5)
3921
+ data_type = np.zeros((10, 100, 5), dtype='bool')
3922
+ data_type[:,:,:3] = True
3923
+
3924
+ dataframe = pp.DataFrame(multidata,
3925
+ data_type=data_type,
3926
+ analysis_mode='multiple',
3927
+ missing_flag = 999.,
3928
+ # time_offsets = {0:50, 1:0}
3929
+ # reference_points=list(range(500, 1000))
3930
+ )
3931
+
3932
+ pcmci = PCMCI(dataframe=dataframe,
3933
+ cond_ind_test=RegressionCI(verbosity=0), verbosity=0)
3934
+
3935
+ # results = pcmci.run_pcmciplus(tau_max=1)