tigramite-fast 5.2.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. tigramite/__init__.py +0 -0
  2. tigramite/causal_effects.py +1525 -0
  3. tigramite/causal_mediation.py +1592 -0
  4. tigramite/data_processing.py +1574 -0
  5. tigramite/graphs.py +1509 -0
  6. tigramite/independence_tests/LBFGS.py +1114 -0
  7. tigramite/independence_tests/__init__.py +0 -0
  8. tigramite/independence_tests/cmiknn.py +661 -0
  9. tigramite/independence_tests/cmiknn_mixed.py +1397 -0
  10. tigramite/independence_tests/cmisymb.py +286 -0
  11. tigramite/independence_tests/gpdc.py +664 -0
  12. tigramite/independence_tests/gpdc_torch.py +820 -0
  13. tigramite/independence_tests/gsquared.py +190 -0
  14. tigramite/independence_tests/independence_tests_base.py +1310 -0
  15. tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
  16. tigramite/independence_tests/pairwise_CI.py +383 -0
  17. tigramite/independence_tests/parcorr.py +369 -0
  18. tigramite/independence_tests/parcorr_mult.py +485 -0
  19. tigramite/independence_tests/parcorr_wls.py +451 -0
  20. tigramite/independence_tests/regressionCI.py +403 -0
  21. tigramite/independence_tests/robust_parcorr.py +403 -0
  22. tigramite/jpcmciplus.py +966 -0
  23. tigramite/lpcmci.py +3649 -0
  24. tigramite/models.py +2257 -0
  25. tigramite/pcmci.py +3935 -0
  26. tigramite/pcmci_base.py +1218 -0
  27. tigramite/plotting.py +4735 -0
  28. tigramite/rpcmci.py +467 -0
  29. tigramite/toymodels/__init__.py +0 -0
  30. tigramite/toymodels/context_model.py +261 -0
  31. tigramite/toymodels/non_additive.py +1231 -0
  32. tigramite/toymodels/structural_causal_processes.py +1201 -0
  33. tigramite/toymodels/surrogate_generator.py +319 -0
  34. tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
  35. tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
  36. tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
  37. tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
  38. tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
tigramite/lpcmci.py ADDED
@@ -0,0 +1,3649 @@
1
+ import numpy as np
2
+ from itertools import product, combinations
3
+ from copy import deepcopy
4
+
5
+ from .pcmci_base import PCMCIbase
6
+
7
+ class LPCMCI(PCMCIbase):
8
+ r""" LPCMCI is an algorithm for causal discovery in large-scale times series that allows for latent confounders and
9
+ learns lag-specific causal relationships. The algorithm is introduced and explained in:
10
+
11
+ [1] Gerhardus, A. & Runge, J. High-recall causal discovery for autocorrelated time series with latent confounders.
12
+ Advances in Neural Information Processing Systems, 2020, 33.
13
+ https://proceedings.neurips.cc/paper/2020/hash/94e70705efae423efda1088614128d0b-Abstract.html
14
+
15
+ NOTE: This method is still EXPERIMENTAL since the default settings of hyperparameters are still being fine-tuned.
16
+ We actually invite feedback on which work best in applications and numerical experiments.
17
+ The main function, which applies the algorithm, is 'run_lpcmci'.
18
+
19
+ Parameters passed to the constructor:
20
+
21
+ - dataframe: Tigramite dataframe object that contains the the time series dataset \bold{X}
22
+
23
+ - cond_ind_test: A conditional independence test object that specifies which conditional independence test CI is to be used
24
+
25
+ - verbosity: Controls the verbose output self.run_lpcmci() and the function it calls.
26
+
27
+ Parameters passed to self.run_lpcmci():
28
+ Note: The default values are still being tuned and some parameters might be removed in the future.
29
+
30
+ - link_assumptions: dict or None
31
+ Two-level nested dictionary such that link_assumptions[j][(i, lag_i)], where 0 <= j, i <= N-1 (with N the number of component
32
+ time series) and -tau_max <= lag_i <= -tau_min, is a string which specifies background knowledge about the link from X^i_{t+lag_i} to
33
+ X^j_t. These are the possibilities for this string and the corresponding claim:
34
+
35
+ '-?>' : X^i_{t+lag_i} is an ancestor of X^j_t.
36
+ '-->' : X^i_{t+lag_i} is an ancestor of X^j_t, and there is a link between X^i_{t+lag_i} and X^j_t
37
+ '<?-' : Only allowed for lag_i = 0. X^j_t is an ancestor of X^i_t.
38
+ '<--' : Only allowed for lag_i = 0. X^j_t is an ancestor of X^i_t, and there is a link between X^i_t and X^j_t
39
+ '<?>' : Neither X^i_{t+lag_i} is an ancestor of X^j_t nor the other way around
40
+ '<->' : Neither X^i_{t+lag_i} is an ancestor of X^j_t nor the other way around, and there is a link between X^i_{t+lag_i} and X^j_t
41
+ 'o?>' : X^j_t is not an ancestor of X^i_{t+lag_i} (for lag_i < 0 this background knowledge is (for the default settings of self.run_lpcmci()) imposed automatically)
42
+ 'o->' : X^j_t is not an ancestor of X^i_{t+lag_i}, and there is a link between X^i_{t+lag_i} and X^j_t
43
+ '<?o' : Only allowed for lag_i = 0. X^i_t is not an ancestor of X^j_t
44
+ '<-o' : Only allowed for lag_i = 0. X^i_t is not an ancestor of X^j_t, and there is a link between X^i_t and X^j_t
45
+ 'o-o' : Only allowed for lag_i = 0. There is a link between X^i_t and X^j_t
46
+ 'o?o' : Only allowed for lag_i = 0. No claim is made
47
+ '' : There is no link between X^i_{t+lag_i} and X^j_t.
48
+
49
+ Another way to specify the absent link is if the form of the link between (i, lag_i) and (j, 0) is not specified by the dictionary, that is, if either
50
+ link_assumptions[j] does not exist or link_assumptions[j] does exist but link_assumptions[j][(i, lag_i)] does
51
+ not exist, then the link between (i, lag_i) and (j, 0) is assumed to be absent.
52
+
53
+ - tau_min: The assumed minimum time lag, i.e., links with a lag smaller
54
+ than tau_min are assumed to be absent.
55
+
56
+ - tau_max: The maximum considered time lag, i.e., the algorithm learns a
57
+ DPAG on a time window [t-\taumax, t] with \tau_max + 1 time steps. It
58
+ is *not* assumed that in the underlying time series DAG there are no
59
+ links with a lag larger than \tau_max.
60
+
61
+ - pc_alpha: The significance level of conditional independence tests
62
+
63
+ - n_preliminary_iterations: Determines the number of iterations in the
64
+ preliminary phase of LPCMCI, corresponding to the 'k' in LPCMCI(k) in
65
+ [1].
66
+
67
+ - max_cond_px: Consider a pair of variables (X^i_{t-\tau}, X^j_t)
68
+ with \tau > 0. In Algorithm S2 in [1] (here this is
69
+ self._run_ancestral_removal_phase()), the algorithm does not test for
70
+ conditional independence given subsets of apds_t(X^i_{t-\tau}, X^j_t, C
71
+ (G)) of cardinality higher than max_cond_px. In Algorithm S3 in [1]
72
+ (here this is self._run_non_ancestral_removal_phase()), the algorithm
73
+ does not test for conditional independence given subsets of napds_t
74
+ (X^i_{t-\tau}, X^j_t, C(G)) of cardinality higher than max_cond_px.
75
+
76
+ - max_p_global: Restricts all conditional independence tests to
77
+ conditioning sets with cardinality smaller or equal to max_p_global
78
+
79
+ - max_p_non_ancestral: Restricts all conditional independence tests in the
80
+ second removal phase (here this is self._run_dsep_removal_phase()) to
81
+ conditioning sets with cardinality smaller or equal to max_p_global
82
+
83
+ - max_q_global: For each ordered pair (X^i_{t-\tau}, X^j_t) of adjacent
84
+ variables and for each cardinality of the conditioning sets test at
85
+ most max_q_global many conditioning sets (when summing over all tested
86
+ cardinalities more than max_q_global tests may be made)
87
+
88
+ - max_pds_set: In Algorithm S3 (here this is
89
+ self._run_non_ancestral_removal_phase()), the algorithm tests for
90
+ conditional independence given subsets of the relevant napds_t sets. If
91
+ for a given link the set napds_t(X^j_t, X^i_{t-\tau}, C(G)) has more
92
+ than max_pds_set many elements (or, if the link is also tested in the
93
+ opposite directed, if napds_t(X^i_{t-\tau}, X^j_t, C(G)) has more than
94
+ max_pds_set elements), this link is not tested.
95
+
96
+ - prelim_with_collider_rules: If True: As in pseudocode If False: Line 22
97
+ of Algorithm S2 in [1] is replaced by line 18 of Algorithm S2 when
98
+ Algorithm S2 is called from the preliminary phase (not in the last
99
+ application of Algorithm S2 directly before Algorithm S3 is applied)
100
+
101
+ - parents_of_lagged: If True: As in pseudocode If False: The default
102
+ conditioning set is pa(X^j_t, C(G)) rather than pa({X^j_t, X^i_
103
+ {t-\tau}, C(G)) for tau > 0
104
+
105
+ - prelim_only: If True, stop after the preliminary phase. Can be used for
106
+ detailed performance analysis
107
+
108
+ - break_once_separated: If True: As in pseudocode If False: The break
109
+ commands are removed from Algorithms S2 and S3 in in [1]
110
+
111
+ - no_non_ancestral_phase: If True, do not execute Algorithm S3. Can be
112
+ used for detailed performance analysis
113
+
114
+ - use_a_pds_t_for_majority: If True: As in pseudocode If False: The search
115
+ for separating sets instructed by the majority rule is made given
116
+ subsets adj(X^j_t, C(G)) rather than subsets of apds_t(X^j_t, X^i_
117
+ {t-\tau}, C(G))
118
+
119
+ - orient_contemp:
120
+ If orient_contemp == 1: As in pseudocode of Algorithm S2 in [1]
121
+ If orient_contemp == 2: Also orient contemporaneous links in line 18 of Algorithm S2
122
+ If orient_comtemp == 0: Also not orient contemporaneous links in line 22 of Algorithm S2
123
+
124
+ - update_middle_marks:
125
+ If True: As in pseudoce of Algorithms S2 and S3 in [1]
126
+ If False: The MMR rule is not applied
127
+
128
+ - prelim_rules:
129
+ If prelim_rules == 1: As in pseudocode of Algorithm S2 in [1]
130
+ If prelim_rules == 0: Exclude rules R9^prime and R10^\prime from line 18 in Algorithm S2
131
+
132
+ - fix_all_edges_before_final_orientation: When one of max_p_global,
133
+ max_p_non_ancestral, max_q_global or max_pds_set is not np.inf, the
134
+ algorithm may terminate although not all middle marks are empty. All
135
+ orientation rules are nevertheless sound, since the rules always check
136
+ for the appropriate middle marks. If
137
+ fix_all_edges_before_final_orientation is True, all middle marks are
138
+ set to the empty middle mark by force, followed by another application
139
+ of the rules.
140
+
141
+ - auto_first: If True: As in pseudcode of Algorithms S2 and S3 in [1] If
142
+ False: Autodependency links are not prioritized even before
143
+ contemporaneous links
144
+
145
+ - remember_only_parents:
146
+ If True: As in pseudocode of Algorithm 1
147
+ If False: If X^i_{t-\tau} has been marked as ancestor of X^j_t at any point of a preliminary iteration but the link between
148
+ X^i_{t-\tau} and X^j_t was removed later, the link is nevertheless initialized with a tail at X^i_{t-\tau} in the re-initialization
149
+
150
+ - no_apr:
151
+ If no_apr == 0: As in pseudcode of Algorithms S2 and S3 in [1]
152
+ If no_apr == 1: The APR is not applied by Algorithm S2, except in line 22 of its last call directly before the call of Algorithm S3
153
+ If no_apr == 2: The APR is never applied
154
+
155
+ Return value of self.run_lpcmci():
156
+ graph : array of shape (N, N, tau_max+1)
157
+ Resulting DPAG, representing the learned causal relationships.
158
+ val_matrix : array of shape (N, N, tau_max+1)
159
+ Estimated matrix of test statistic values regarding adjacencies.
160
+ p_matrix : array of shape [N, N, tau_max+1]
161
+ Estimated matrix of p-values regarding adjacencies.
162
+
163
+ A note on middle marks: For convenience (to have strings of the same
164
+ lengths) we here internally denote the empty middle mark by '-'. For
165
+ post-processing purposes all middle marks are set to the empty middle
166
+ mark (here '-').
167
+
168
+ A note on wildcards: The middle mark wildcard \ast and the edge mark
169
+ wildcard are here represented as '*'', the edge mark wildcard \star
170
+ as '+'.
171
+ """
172
+
173
+ def __init__(self, dataframe, cond_ind_test, verbosity = 0):
174
+ """Class constructor. Store:
175
+ i) data
176
+ ii) conditional independence test object
177
+ iii) some instance attributes"""
178
+
179
+ # Init base class
180
+ PCMCIbase.__init__(self, dataframe=dataframe,
181
+ cond_ind_test=cond_ind_test,
182
+ verbosity=verbosity)
183
+
184
+ def run_lpcmci(self,
185
+ link_assumptions = None,
186
+ tau_min = 0,
187
+ tau_max = 1,
188
+ pc_alpha = 0.05,
189
+ n_preliminary_iterations = 1,
190
+ max_cond_px = 0,
191
+ max_p_global = np.inf,
192
+ max_p_non_ancestral = np.inf,
193
+ max_q_global = np.inf,
194
+ max_pds_set = np.inf,
195
+ prelim_with_collider_rules = True,
196
+ parents_of_lagged = True,
197
+ prelim_only = False,
198
+ break_once_separated = True,
199
+ no_non_ancestral_phase = False,
200
+ use_a_pds_t_for_majority = True,
201
+ orient_contemp = 1,
202
+ update_middle_marks = True,
203
+ prelim_rules = 1,
204
+ fix_all_edges_before_final_orientation = True,
205
+ auto_first = True,
206
+ remember_only_parents = True,
207
+ no_apr = 0):
208
+ """Run LPCMCI on the dataset and with the conditional independence test passed to the class constructor and with the
209
+ options passed to this function."""
210
+
211
+ #######################################################################################################################
212
+ #######################################################################################################################
213
+ # Step 0: Initializations
214
+ self._initialize(link_assumptions, tau_min, tau_max, pc_alpha, n_preliminary_iterations, max_cond_px, max_p_global,
215
+ max_p_non_ancestral, max_q_global, max_pds_set, prelim_with_collider_rules, parents_of_lagged, prelim_only,
216
+ break_once_separated, no_non_ancestral_phase, use_a_pds_t_for_majority, orient_contemp, update_middle_marks,
217
+ prelim_rules, fix_all_edges_before_final_orientation, auto_first, remember_only_parents, no_apr)
218
+
219
+ #######################################################################################################################
220
+ #######################################################################################################################
221
+ # Step 1: Preliminary phases
222
+ for i in range(self.n_preliminary_iterations):
223
+
224
+ # Verbose output
225
+ if self.verbosity >= 1:
226
+ print("\n=======================================================")
227
+ print("=======================================================")
228
+ print("Starting preliminary phase {:2}".format(i + 1))
229
+
230
+ # In the preliminary phases, auto-lag links are tested with first priority. Among the auto-lag links, different lags are
231
+ # not distinguished. All other links have lower priority, among which those which shorter lags have higher priority
232
+ self._run_ancestral_removal_phase(prelim = True)
233
+
234
+ # Verbose output
235
+ if self.verbosity >= 1:
236
+ print("\nPreliminary phase {:2} complete".format(i + 1))
237
+ print("\nGraph:\n--------------------------------")
238
+ self._print_graph_dict()
239
+ print("--------------------------------")
240
+
241
+ # When the option self.prelim_only is chosen, do not re-initialize in the last iteration
242
+ if i == self.n_preliminary_iterations - 1 and self.prelim_only:
243
+ break
244
+
245
+ # Remember ancestorships, re-initialize and re-apply the remembered ancestorships
246
+ def_ancs = self.def_ancs
247
+
248
+ if self.remember_only_parents:
249
+ smaller_def_ancs = dict()
250
+ for j in range(self.N):
251
+ smaller_def_ancs[j] = {(i, lag_i) for (i, lag_i) in def_ancs[j] if self._get_link((i, lag_i), (j, 0)) != ""}
252
+ def_ancs = smaller_def_ancs
253
+
254
+ self._initialize_run_memory()
255
+ self._apply_new_ancestral_information(None, def_ancs)
256
+
257
+ #######################################################################################################################
258
+ #######################################################################################################################
259
+ # Step 2: Full ancestral phase
260
+ if not self.prelim_only:
261
+
262
+ # Verbose output
263
+ if self.verbosity >= 1:
264
+ print("\n=======================================================")
265
+ print("=======================================================")
266
+ print("Starting final ancestral phase")
267
+
268
+ # In the standard ancestral phase, links are prioritized in the same as in the preliminary phases
269
+ self._run_ancestral_removal_phase()
270
+
271
+ # Verbose output
272
+ if self.verbosity >= 1:
273
+ print("\nFinal ancestral phase complete")
274
+ print("\nGraph:\n--------------------------------")
275
+ self._print_graph_dict()
276
+ print("--------------------------------")
277
+
278
+ #######################################################################################################################
279
+ #######################################################################################################################
280
+ # Step 3: Non-ancestral phase
281
+ if (not self.prelim_only) and (not self.no_non_ancestral_phase):
282
+
283
+ # Verbose output
284
+ if self.verbosity >= 1:
285
+ print("\n=======================================================")
286
+ print("=======================================================")
287
+ print("Starting non-ancestral phase")
288
+
289
+ # In the non-ancestral phase, large lags are prioritized
290
+ self._run_non_ancestral_removal_phase()
291
+
292
+ # Verbose output
293
+ if self.verbosity >= 1:
294
+ print("\nNon-ancestral phase complete")
295
+ print("\nGraph:\n--------------------------------")
296
+ self._print_graph_dict()
297
+ print("--------------------------------")
298
+
299
+ if self.fix_all_edges_before_final_orientation:
300
+
301
+ # Verbose output
302
+ if self.verbosity >= 1:
303
+ print("\n=======================================================")
304
+ print("=======================================================")
305
+ print("Final rule application phase")
306
+ print("\nSetting all middle marks to '-'")
307
+
308
+ self._fix_all_edges()
309
+ self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False)
310
+
311
+ #######################################################################################################################
312
+ #######################################################################################################################
313
+
314
+ # Verbose output
315
+ if self.verbosity >= 1:
316
+ print("\n=======================================================")
317
+ print("=======================================================")
318
+ print("\nLPCMCI has converged")
319
+ print("\nFinal graph:\n--------------------------------")
320
+ print("--------------------------------")
321
+ self._print_graph_dict()
322
+ print("--------------------------------")
323
+ print("--------------------------------\n")
324
+
325
+ print("Max search set: {}".format(self.max_na_search_set_found))
326
+ print("Max na-pds set: {}\n".format(self.max_na_pds_set_found))
327
+
328
+ # Post processing
329
+ self._fix_all_edges()
330
+ self.graph = self._dict2graph()
331
+ self.pval_max_matrix = self._dict_to_matrix(self.pval_max, self.tau_max, self.N, default = 0)
332
+ self.val_min_matrix = self._dict_to_matrix(self.pval_max_val, self.tau_max, self.N, default = 0)
333
+ self.cardinality_matrix = self._dict_to_matrix(self.pval_max_card, self.tau_max, self.N, default = 0)
334
+
335
+ # Build and return the return dictionariy
336
+ return_dict = {"graph": self.graph,
337
+ "p_matrix": self.pval_max_matrix,
338
+ "val_matrix": self.val_min_matrix}
339
+ return return_dict
340
+
341
+
342
+ def _initialize(self, link_assumptions, tau_min, tau_max, pc_alpha, n_preliminary_iterations, max_cond_px, max_p_global,
343
+ max_p_non_ancestral, max_q_global, max_pds_set, prelim_with_collider_rules, parents_of_lagged, prelim_only,
344
+ break_once_separated, no_non_ancestral_phase, use_a_pds_t_for_majority, orient_contemp, update_middle_marks, prelim_rules,
345
+ fix_all_edges_before_final_orientation, auto_first, remember_only_parents, no_apr):
346
+ """Function for
347
+ i) saving the arguments passed to self.run_lpcmci() as instance attributes
348
+ ii) initializing various memory variables for storing the current graph, sepsets etc.
349
+ """
350
+
351
+ # Save the arguments passed to self.run_lpcmci()
352
+ self.link_assumptions = link_assumptions
353
+ self.tau_min = tau_min
354
+ self.tau_max = tau_max
355
+ self.pc_alpha = pc_alpha
356
+ self.n_preliminary_iterations = n_preliminary_iterations
357
+ self.max_cond_px = max_cond_px
358
+ self.max_p_global = max_p_global
359
+ self.max_p_non_ancestral = max_p_non_ancestral
360
+ self.max_q_global = max_q_global
361
+ self.max_pds_set = max_pds_set
362
+ self.prelim_with_collider_rules = prelim_with_collider_rules
363
+ self.parents_of_lagged = parents_of_lagged
364
+ self.prelim_only = prelim_only
365
+ self.break_once_separated = break_once_separated
366
+ self.no_non_ancestral_phase = no_non_ancestral_phase
367
+ self.use_a_pds_t_for_majority = use_a_pds_t_for_majority
368
+ self.orient_contemp = orient_contemp
369
+ self.update_middle_marks = update_middle_marks
370
+ self.prelim_rules = prelim_rules
371
+ self.fix_all_edges_before_final_orientation = fix_all_edges_before_final_orientation
372
+ self.auto_first = auto_first
373
+ self.remember_only_parents = remember_only_parents
374
+ self.no_apr = no_apr
375
+
376
+ if isinstance(pc_alpha, (list, tuple, np.ndarray)):
377
+ raise ValueError("pc_alpha must be single float in LPCMCI.")
378
+ if pc_alpha < 0. or pc_alpha > 1:
379
+ raise ValueError("Choose 0 <= pc_alpha <= 1")
380
+
381
+ # Check that validity of tau_min and tau_max
382
+ self._check_tau_min_tau_max()
383
+
384
+ # Check the validity of 'link_assumptions'
385
+ if self.link_assumptions is not None:
386
+ self._check_link_assumptions()
387
+
388
+ # Rules to be executed at the end of a preliminary phase
389
+ self._rules_prelim_final= [["APR"], ["ER-08"], ["ER-02"], ["ER-01"], ["ER-09"], ["ER-10"]]
390
+
391
+ # Rules to be executed within the while loop of a preliminary phase
392
+ self._rules_prelim = [["APR"], ["ER-08"], ["ER-02"], ["ER-01"]] if self.prelim_rules == 0 else self._rules_prelim_final
393
+
394
+ # Full list of all rules
395
+ self._rules_all = [["APR"], ["ER-08"], ["ER-02"], ["ER-01"], ["ER-00-d"], ["ER-00-c"], ["ER-03"], ["R-04"], ["ER-09"], ["ER-10"], ["ER-00-b"], ["ER-00-a"]]
396
+
397
+ # Initialize various memory variables for storing the current graph, sepsets etc.
398
+ self._initialize_run_memory()
399
+
400
+ # Return
401
+ return True
402
+
403
+ def _check_tau_min_tau_max(self):
404
+ """Check whether the choice of tau_min and tau_max is valid."""
405
+
406
+ if not 0 <= self.tau_min <= self.tau_max:
407
+ raise ValueError("tau_min = {}, ".format(self.tau_min) + \
408
+ "tau_max = {}, ".format(self.tau_max) + \
409
+ "but 0 <= tau_min <= tau_max required.")
410
+
411
+ def _check_link_assumptions(self):
412
+ """Check the validity of user-specified 'link_assumptions'.
413
+
414
+ The checks assert:
415
+ - Valid dictionary keys
416
+ - Valid edge types
417
+ - That no causal cycle is specified
418
+ - That no almost causal cycle is specified
419
+
420
+ The checks do not assert that maximality is not violated."""
421
+
422
+ # Ancestorship matrices
423
+ ancs_mat_contemp = np.zeros((self.N, self.N), dtype = "int32")
424
+ ancs_mat = np.zeros((self.N*(self.tau_max + 1),
425
+ self.N*(self.tau_max + 1)), dtype = "int32")
426
+
427
+ # Run through the outer dictionary
428
+ for j, links_j in self.link_assumptions.items():
429
+
430
+ # Check validity of keys of outer dictionary
431
+ if not 0 <= j <= self.N - 1:
432
+ raise ValueError("The argument 'link_assumption' must be a "\
433
+ "dictionary whose keys are in {0, 1, ..., N-1}, where N "\
434
+ "is the number of component time series. Here, "\
435
+ f"N = {self.N}.")
436
+
437
+ # Run through the inner dictionary
438
+ for (i, lag_i), link_ij in links_j.items():
439
+
440
+ # Check validity of keys of inner dictionary
441
+ if i == j and lag_i == 0:
442
+ raise ValueError(f"The dictionary 'link_assumptions[{j}] "\
443
+ f"must not have the key ({j}, 0), because this refers "\
444
+ "to a self-link.")
445
+
446
+ if (not (0 <= i <= self.N - 1)
447
+ or not (-self.tau_max <= lag_i <= -self.tau_min)):
448
+ raise ValueError("All values of 'link_assumptions' must "\
449
+ "be dictionaries whose keys are of the form (i, "\
450
+ "lag_i), where i in {0, 1, ..., N-1} with N the "\
451
+ "number of component time series and lag_i in "\
452
+ "{-tau_max, ..., -tau_min} with tau_max the maximum "\
453
+ "considered time lag and tau_min the minimum assumed "\
454
+ f"time lag. Here, N = {self.N} and tau_max = "\
455
+ f"{self.tau_max} and tau_min = {self.tau_min}.")
456
+
457
+ # Check for validity of entries. At the same time mark the
458
+ # ancestorships in ancs_mat_contemp and ancs_mat
459
+
460
+ if link_ij == "":
461
+
462
+ # Check for symmetry of lag zero links
463
+ if lag_i == 0:
464
+
465
+ if (self.link_assumptions.get(i) is None
466
+ or self.link_assumptions[i].get((j, 0)) is None
467
+ or self.link_assumptions[i][(j, 0)] != ""):
468
+ raise ValueError("The lag zero links specified by "\
469
+ "'link_assumptions' must be symmetric: Because"\
470
+ f"'link_assumptions'[{j}][({i}, {0})] = '', "\
471
+ " there must also be "\
472
+ f"'link_assumptions'[{i}][({j}, {0})] = ''.")
473
+ continue
474
+
475
+ if len(link_ij) != 3:
476
+ if lag_i < 0:
477
+ raise ValueError("Invalid link: "\
478
+ f"'link_assumptions'[{j}][({i}, {lag_i})] = "\
479
+ f"{link_ij}. Allowed are: '-?>', '-->', '<?>', "\
480
+ "'<->', 'o?>', 'o->'.")
481
+ else:
482
+ raise ValueError("Invalid link: "\
483
+ f"'link_assumptions'[{j}][({i}, {lag_i})] = "\
484
+ f"{link_ij}. Allowed are: '-?>', '-->', '<?>', "\
485
+ "'<->', 'o?>', 'o->', '<?-', '<--', '<?o', '<--', "\
486
+ "'o-o', 'o?o'.")
487
+
488
+ if link_ij[0] == "-":
489
+
490
+ if link_ij[2] != ">":
491
+ raise ValueError("Invalid link: "\
492
+ f"'link_assumptions'[{j}][({i}, {lag_i})] = "\
493
+ f"{link_ij}. The first character '-', which says "\
494
+ f"that ({i}, {lag_i}) is an ancestor (cause) of "\
495
+ f"({j}, 0). Hence, ({j}, 0) is a non-ancestor "\
496
+ f"(non-cause) of ({i}, {lag_i}) and the third "\
497
+ "character must be '>'.")
498
+
499
+ # Mark the ancestorship
500
+ if lag_i == 0:
501
+ ancs_mat_contemp[i, j] = 1
502
+ for Delta_t in range(0, self.tau_max + 1 - abs(lag_i)):
503
+ ancs_mat[self.N*(abs(lag_i) + Delta_t) + i,
504
+ self.N*Delta_t + j] = 1
505
+
506
+ elif link_ij[0] in ["<", "o"]:
507
+
508
+ if lag_i < 0:
509
+
510
+ if link_ij[2] != ">":
511
+ raise ValueError("Invalid link: "\
512
+ f"'link_assumptions'[{j}][({i}, {lag_i})] = "\
513
+ f"{link_ij}. Since {lag_i} < 0, ({j}, 0) "\
514
+ f"cannot be an ancestor (cause) of "\
515
+ f"({i}, {lag_i}). Hence, the third character "\
516
+ f"must be '>'.")
517
+
518
+ else:
519
+
520
+ if link_ij[2] not in ["-", ">", "o"]:
521
+ raise ValueError("Invalid link: "\
522
+ f"'link_assumptions'[{j}][({i}, {0})] = "\
523
+ f"{link_ij}. The third character must be one "\
524
+ "of the following: 1) '-', which says that "\
525
+ f"({j}, 0) is an ancestor (cause) of "\
526
+ f"({i}, {0}). 2) '>', which says that "\
527
+ f"({j}, 0) is a non-ancestor (non-cause) of "\
528
+ f"({i}, {0}). 3) 'o', which says that it is "\
529
+ f"unknown whether or not ({j}, {0}) is an "\
530
+ f"ancestor (cause) of ({i}, {0}).")
531
+
532
+ if link_ij[2] == "-":
533
+
534
+ if link_ij[0] != "<":
535
+ raise ValueError("Invalid link: "\
536
+ f"'link_assumptions'[{j}][({i}, {0})] = "\
537
+ f"{link_ij}. The third character is '-', "\
538
+ f"which says that ({j}, {0}) is an "\
539
+ f"ancestor (cause) of ({i}, 0). Hence, "\
540
+ f"({i}, 0) is a non-ancestor (non-cause) "\
541
+ f"of ({j}, {0}) and the first character "\
542
+ "must be '<'.")
543
+
544
+ # Mark the ancestorship
545
+ ancs_mat_contemp[j, i] = 1
546
+ for Delta_t in range(0, self.tau_max + 1):
547
+ ancs_mat[self.N*Delta_t + j,
548
+ self.N*Delta_t + i] = 1
549
+
550
+ else:
551
+ raise ValueError(f"Invalid link: "\
552
+ f"'link_assumptions'[{j}][({i}, {lag_i})] = "\
553
+ f"{link_ij}. The first character must be one of the "\
554
+ f"following: 1) '-', which says that ({i}, {lag_i}) "\
555
+ f"is an ancestor (cause) of ({j}, 0). 2) '<', which "\
556
+ f"says that ({i}, {lag_i}) is a non-ancestor "\
557
+ f"(non-cause) of ({j}, 0). 3) 'o', which says that it"\
558
+ f"is unknown whether or not ({i}, {lag_i}) is an "\
559
+ f"ancestor (cause) of ({j}, {0}).")
560
+
561
+ if link_ij[1] not in ["-", "?"]:
562
+ raise ValueError("Invalid link: "\
563
+ f"'link_assumptions'[{j}][({i}, {lag_i})] = "\
564
+ f"{link_ij}. The second character must be one of the "\
565
+ "following: 1) '-', which says that the link "\
566
+ f"({i}, {lag_i}) {link_ij} ({j}, 0) is definitely "\
567
+ "part of the graph. 2) '?', which says that link "\
568
+ "might be but does not need to be part of the graph.")
569
+
570
+ # Check for symmetry of lag zero links
571
+ if lag_i == 0:
572
+
573
+ if (self.link_assumptions.get(i) is None
574
+ or self.link_assumptions[i].get((j, 0)) is None
575
+ or self.link_assumptions[i][(j, 0)] != self._reverse_link(link_ij)):
576
+ raise ValueError(f"The lag zero links specified by "\
577
+ "'link_assumptions' must be symmetric: Because "\
578
+ f"'link_assumptions'[{j}][({i}, {0})] = "\
579
+ f"'{link_ij}' there must also be "\
580
+ f"'link_assumptions'[{i}][({j}, {0})] = "\
581
+ f"'{self._reverse_link(link_ij)}'.")
582
+
583
+ # Check for contemporaneous cycles
584
+ ancs_mat_contemp_to_N = np.linalg.matrix_power(ancs_mat_contemp, self.N)
585
+ if np.sum(ancs_mat_contemp_to_N) != 0:
586
+ raise ValueError("According to 'link_assumptions', there is a "\
587
+ "contemporaneous causal cycle. Causal cycles are not allowed.")
588
+
589
+ # Check for almost directed cycles
590
+ ancs_mat_summed = np.linalg.inv(np.eye(ancs_mat.shape[0], dtype = "int32") - ancs_mat)
591
+ for j, links_j in self.link_assumptions.items():
592
+ for (i, lag_i), link_ij in links_j.items():
593
+ if (link_ij != ""
594
+ and link_ij[0] == "<"
595
+ and ancs_mat_summed[self.N*abs(lag_i) + i, j] != 0):
596
+ raise ValueError(f"Inconsistency in 'link_assumptions': "\
597
+ f"Since 'link_assumptions'[{j}][({i}, {lag_i})] "\
598
+ f"= {link_ij}, variable ({i}, {lag_i}) is a "\
599
+ f"non-ancestor (non-cause) of ({j}, 0). At the same "\
600
+ "time, however, 'link_assumptions' specifies a "\
601
+ f"directed path (causal path) from ({i}, {lag_i}) to "\
602
+ f"({j}, 0).")
603
+
604
+ # Replace absent entries by ''
605
+ for j in range(self.N):
606
+ if self.link_assumptions.get(j) is None:
607
+ self.link_assumptions[j] = {(i, -tau_i): ""
608
+ for (i, tau_i) in product(range(self.N), range(self.tau_min, self.tau_max+1))
609
+ if (tau_i > 0 or i != j)}
610
+ else:
611
+ for (i, tau_i) in product(range(self.N), range(self.tau_min, self.tau_max+1)):
612
+ if (tau_i > 0 or i != j):
613
+ if self.link_assumptions[j].get((i, -tau_i)) is None:
614
+ self.link_assumptions[j][(i, -tau_i)] = ""
615
+
616
+ def _initialize_run_memory(self):
617
+ """Function for initializing various memory variables for storing the current graph, sepsets etc."""
618
+
619
+ # Initialize the nested dictionary for storing the current graph.
620
+ # Syntax: self.graph_dict[j][(i, -tau)] gives the string representing the link from X^i_{t-tau} to X^j_t
621
+ self.graph_dict = {}
622
+ for j in range(self.N):
623
+
624
+ self.graph_dict[j] = {(i, 0): "o?o" for i in range(self.N) if j != i}
625
+
626
+ if self.max_cond_px == 0 and self.update_middle_marks:
627
+ self.graph_dict[j].update({(i, -tau): "oL>" for i in range(self.N) for tau in range(1, self.tau_max + 1)})
628
+ else:
629
+ self.graph_dict[j].update({(i, -tau): "o?>" for i in range(self.N) for tau in range(1, self.tau_max + 1)})
630
+
631
+ # Initialize the nested dictionary for storing separating sets
632
+ # Syntax: self.sepsets[j][(i, -tau)] stores separating sets of X^i_{t-tau} to X^j_t. For tau = 0, i < j.
633
+ self.sepsets = {j: {(i, -tau): set() for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)}
634
+
635
+ # Initialize dictionaries for storing known ancestorships, non-ancestorships, and ambiguous ancestorships
636
+ # Syntax: self.def_ancs[j] contains the set of all known ancestors of X^j_t. Equivalently for the others
637
+ self.def_ancs = {j: set() for j in range(self.N)}
638
+ self.def_non_ancs = {j: set() for j in range(self.N)}
639
+ self.ambiguous_ancestorships = {j: set() for j in range(self.N)}
640
+
641
+ # Initialize nested dictionaries for saving the maximal p-value among all conditional independence tests of a given
642
+ # pair of variables as well as the corresponding test statistic values and conditioning set cardinalities
643
+ # Syntax: As for self.sepsets
644
+ self.pval_max = {j: {(i, -tau): -np.inf for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)}
645
+ self.pval_max_val = {j: {(i, -tau): np.inf for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)}
646
+ self.pval_max_card = {j: {(i, -tau): -np.inf for i in range(self.N) for tau in range(self.tau_max + 1) if (tau > 0 or i < j)} for j in range(self.N)}
647
+ # Initialize a nested dictionary for caching na-pds-sets
648
+ # Syntax: self._na_pds_t[(i, t_i)][(j, t_j)] stores na_pds_t((i, t_i), (j, t_j))
649
+ self._na_pds_t = {(j, -tau_j): {} for j in range(self.N) for tau_j in range(self.tau_max + 1)}
650
+
651
+ # Initialize a variable for remembering the maximal cardinality among all calculated na-pds-sets, as well as the
652
+ # maximial cardinality of any search set in the non-ancestral phase
653
+ self.max_na_search_set_found = -1
654
+ self.max_na_pds_set_found = -1
655
+
656
+ # Apply the restriction imposed by tau_min
657
+ self._apply_tau_min_restriction()
658
+
659
+ # Apply the background knowledge given by background_knowledge
660
+ if self.link_assumptions is not None:
661
+ self._apply_link_assumptions()
662
+
663
+ # Return
664
+ return True
665
+
666
+ def _apply_tau_min_restriction(self):
667
+ """Apply the restrictions imposed by a non-zero tau_min:
668
+ - Remove all links of lag smaller than tau_min from self.graph_dict
669
+ - Set the corresponding entries in self.pval_max, self.pval_max_val, and self.pval_max_card to np.inf, -np.inf, np.inf
670
+ """
671
+
672
+ for (i, j, tau) in product(range(self.N), range(self.N), range(0, self.tau_min)):
673
+ if tau > 0 or j != i:
674
+ self.graph_dict[j][(i, -tau)] = ""
675
+
676
+ if tau > 0 or i < j:
677
+ self.pval_max[j][(i, -tau)] = np.inf
678
+ self.pval_max_val[j][(i, -tau)] = -np.inf
679
+ self.pval_max_card[j][(i, -tau)] = np.inf
680
+
681
+ def _apply_link_assumptions(self):
682
+ """Apply the background knowledge specified by 'link_assumptions':
683
+ - Write the specified edge types to self.graph_dict
684
+ - Set the corresponding entries in self.pval_max to np.inf, in self.pval_max_val to -np.inf, and in
685
+ - to self.pval_max_card to np.inf
686
+ """
687
+
688
+ for j, links_j in self.link_assumptions.items():
689
+ for (i, lag_i), link in self.link_assumptions[j].items():
690
+
691
+ # Apply background knowledge
692
+ if link != "" and link[1] == "?" and lag_i < 0 and self.max_cond_px == 0 and self.update_middle_marks:
693
+ self.graph_dict[j][(i, lag_i)] = link[0] + "L" + link[2]
694
+ else:
695
+ self.graph_dict[j][(i, lag_i)] = link
696
+
697
+ # If background knowledge amounts to absence of link, set the corresponding entries in
698
+ # self.pval_max to 2, in self.pval_max_val to -np.inf, and in self.pval_max_card to None to np.inf
699
+ if link == "" and (lag_i < 0 or i < j):
700
+ self.pval_max[j][(i, lag_i)] = np.inf
701
+ self.pval_max_val[j][(i, lag_i)] = -np.inf
702
+ self.pval_max_card[j][(i, lag_i)] = np.inf
703
+
704
+ def _run_ancestral_removal_phase(self, prelim = False):
705
+ """Run an ancestral edge removal phase, this is Algorithm S2"""
706
+
707
+ # Iterate until convergence
708
+ # p_pc is the cardinality of the non-default part of the conditioning sets. The full conditioning sets may have
709
+ # higher cardinality due to default conditioning on known parents
710
+ p_pc = 0
711
+ while_broken = False
712
+ while True:
713
+
714
+ ##########################################################################################################
715
+ ### Run the next removal iteration #######################################################################
716
+
717
+ # Force-quit while loop when p_pc exceeds the limit put by self.max_p_global
718
+ if p_pc > self.max_p_global:
719
+ while_broken = True
720
+ break
721
+
722
+ # Verbose output
723
+ if self.verbosity >= 1:
724
+ if p_pc == 0:
725
+ print("\nStarting test phase\n")
726
+ print("p = {}".format(p_pc))
727
+
728
+ # Variables to memorize the occurence and absence of certain events in the below edge removal phase
729
+ has_converged = True
730
+ any_removal = False
731
+
732
+ # Generate the prioritized link list
733
+ if self.auto_first:
734
+
735
+ link_list = [product(range(self.N), range(-self.tau_max, 0))]
736
+ link_list = link_list + [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)]
737
+
738
+ else:
739
+
740
+ link_list = [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)]
741
+
742
+
743
+ # Run through all elements of link_list. Each element of link_list specifies ordered pairs of variables whose
744
+ # connecting edges are then subjected to conditional independence tests
745
+ for links in link_list:
746
+
747
+ # Memory variables for storing edges that are marked for removal
748
+ to_remove = {j: {} for j in range(self.N)}
749
+
750
+ # Iterate through all edges specified by links. Note that since the variables paris are ordered, (A, B) and (B, A)
751
+ # are seen as different pairs.
752
+ for pair in links:
753
+
754
+ # Decode the elements of links into pairs of variables (X, Y)
755
+ if len(pair) == 2:
756
+ X = (pair[0], pair[1])
757
+ Y = (pair[0], 0)
758
+ else:
759
+ X = (pair[0], pair[2])
760
+ Y = (pair[1], 0)
761
+
762
+ # Do not test auto-links twice
763
+ if self.auto_first and X[0] == Y[0]:
764
+ continue
765
+
766
+ ######################################################################################################
767
+ ### Exclusion of links ###############################################################################
768
+
769
+ # Exclude the current link if ...
770
+ # ... X = Y
771
+ if X[1] == 0 and X[0] == Y[0]:
772
+ continue
773
+ # ... X > Y
774
+ if self._is_smaller(Y, X):
775
+ continue
776
+
777
+ # Get the current link
778
+ link = self._get_link(X, Y)
779
+
780
+ # Moreover exclude the current link if ...
781
+ # ... X and Y are not adjacent anymore
782
+ if link == "":
783
+ continue
784
+ # ... the link is definitely part of G
785
+ if link[1] == "-":
786
+ continue
787
+
788
+ ######################################################################################################
789
+ ### Determine which tests the link will be subjected to ###########################################
790
+
791
+ # Depending on the middle mark on the link between X and Y as well as on some global options, we may not need
792
+ # to search for separating set among the potential parents of Y and/or X.
793
+ test_Y = True if link[1] not in ["R", "!"] else False
794
+ test_X = True if (link[1] not in ["L", "!"] and (X[1] == 0 or (self.max_cond_px > 0 and self.max_cond_px >= p_pc))) else False
795
+
796
+ ######################################################################################################
797
+ ### Preparation PC search set and default conditioning set ###########################################
798
+
799
+ if test_Y:
800
+ S_default_YX, S_search_YX = self._get_default_and_search_sets(Y, X, "ancestral")
801
+
802
+ if test_X:
803
+ S_default_XY, S_search_XY = self._get_default_and_search_sets(X, Y, "ancestral")
804
+
805
+ ######################################################################################################
806
+ ### Middle mark updates ##############################################################################
807
+
808
+ any_middle_mark_update = False
809
+
810
+ # Note: Updating the middle marks here, within the for-loop, does not spoil order independence. In fact, this
811
+ # update does not influence the flow of the for-loop at all
812
+ if test_Y:
813
+ if len(S_search_YX) < p_pc:
814
+ # Note that X is smaller than Y. If S_search_YX exists and has fewer than p elements, X and Y are not
815
+ # d-separated by S \subset Par(Y). Therefore, the middle mark on the edge between X and Y can be updated
816
+ # with 'R'
817
+ self._apply_middle_mark(X, Y, "R")
818
+ else:
819
+ # Since S_search_YX exists and has hat least p_pc elements, the link between X and Y will be subjected to
820
+ # conditional independenc tests. Therefore, the algorithm has not converged yet.
821
+ has_converged = False
822
+
823
+ if test_X:
824
+ if len(S_search_XY) < p_pc:
825
+ # Note that X is smaller than Y. If S_search_XY exists and has fewer than p elements, X and Y are not
826
+ # d-separated by S \subset Par(X). Therefore, the middle mark on the edge between X and Y can be updated
827
+ # with 'L'
828
+ self._apply_middle_mark(X, Y, "L")
829
+ else:
830
+ # Since S_search_YX exists and has hat least p_pc elements, the link between X and Y will be subjected to
831
+ # conditional independenc tests. Therefore, the algorithm has not converged yet.
832
+ has_converged = False
833
+
834
+ ######################################################################################################
835
+
836
+ ######################################################################################################
837
+ ### Tests for conditional independence ###############################################################
838
+
839
+ # If option self.break_once_separated is True, the below for-loops will be broken immediately once a separating set
840
+ # has been found. In conjunction with the modified majority rule employed for orienting links, order independence
841
+ # (with respect to the index 'i' on X^i_t) then requires that the tested conditioning sets are ordered in an order
842
+ # independent way. Here, the minimal effect size of previous conditional independence tests serve as an order
843
+ # independent order criterion.
844
+ if self.break_once_separated or not np.isinf(self.max_q_global):
845
+ if test_Y:
846
+ S_search_YX = self._sort_search_set(S_search_YX, Y)
847
+ if test_X:
848
+ S_search_XY = self._sort_search_set(S_search_XY, X)
849
+
850
+ # Run through all cardinality p_pc subsets of S_search_YX
851
+ if test_Y:
852
+
853
+ q_count = 0
854
+ for S_pc in combinations(S_search_YX, p_pc):
855
+
856
+ q_count = q_count + 1
857
+ if q_count > self.max_q_global:
858
+ break
859
+
860
+ # Build the full conditioning set
861
+ Z = set(S_pc)
862
+ Z = Z.union(S_default_YX)
863
+
864
+ # Test conditional independence of X and Y given Z
865
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z),
866
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
867
+
868
+ if self.verbosity >= 2:
869
+ print("ANC(Y): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" %
870
+ (X, Y, ' '.join([str(z) for z in S_default_YX]), ' '.join([str(z) for z in S_pc]), val, pval))
871
+
872
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic
873
+ # values and conditioning set cardinalities
874
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z))
875
+
876
+ # Check whether test result was significant
877
+ if not dependent: #pval > self.pc_alpha:
878
+
879
+ # Mark the edge from X to Y for removal and save sepset
880
+ to_remove[Y[0]][X] = True
881
+ self._save_sepset(X, Y, (frozenset(Z), "wm"))
882
+
883
+ # Verbose output
884
+ if self.verbosity >= 1:
885
+ print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_YX))
886
+
887
+ if self.break_once_separated:
888
+ break
889
+
890
+ # Run through all cardinality p_pc subsets of S_search_XY
891
+ if test_X:
892
+
893
+ q_count = 0
894
+ for S_pc in combinations(S_search_XY, p_pc):
895
+
896
+ q_count = q_count + 1
897
+ if q_count > self.max_q_global:
898
+ break
899
+
900
+ # Build the full conditioning set
901
+ Z = set(S_pc)
902
+ Z = Z.union(S_default_XY)
903
+
904
+ # Test conditional independence of X and Y given Z
905
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z),
906
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
907
+
908
+ if self.verbosity >= 2:
909
+ print("ANC(X): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" %
910
+ (X, Y, ' '.join([str(z) for z in S_default_XY]), ' '.join([str(z) for z in S_pc]), val, pval))
911
+
912
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic
913
+ # values and conditioning set cardinalities
914
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z))
915
+
916
+ # Check whether test result was significant
917
+ if not dependent: # pval > self.pc_alpha:
918
+
919
+ # Mark the edge from X to Y for removal and save sepset
920
+ to_remove[Y[0]][X] = True
921
+ self._save_sepset(X, Y, (frozenset(Z), "wm"))
922
+
923
+ # Verbose output
924
+ if self.verbosity >= 1:
925
+ print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_XY))
926
+
927
+ if self.break_once_separated:
928
+ break
929
+
930
+ # for pair in links
931
+
932
+ ##########################################################################################################
933
+ ### Remove edges marked for removal in to_remove #########################################################
934
+
935
+ # Run through all of the nested dictionary
936
+ for j in range(self.N):
937
+ for (i, lag_i) in to_remove[j].keys():
938
+
939
+ # Remember that at least one edge has been removed, remove the edge
940
+ any_removal = True
941
+ self._write_link((i, lag_i), (j, 0), "", verbosity = self.verbosity)
942
+
943
+ # end for links in link_list
944
+
945
+ # Verbose output
946
+ if self.verbosity >= 1:
947
+ print("\nTest phase complete")
948
+
949
+ ##############################################################################################################
950
+ ### Orientations and next step ###############################################################################
951
+
952
+ if any_removal:
953
+ # At least one edge was removed or at least one middle mark has been updated. Therefore: i) apply the restricted set of
954
+ # orientation rules, ii) restart the while loop at p_pc = 0, unless all edges have converged, then break the while loop
955
+
956
+ only_lagged = False if self.orient_contemp == 2 else True
957
+ any_update = self._run_orientation_phase(rule_list = self._rules_prelim, only_lagged = only_lagged)
958
+
959
+ # If the orientation phase made a non-trivial update, then restart the while loop. Else increase p_pc by one
960
+ if any_update:
961
+ if self.max_cond_px == 0 and self.update_middle_marks:
962
+ self._update_middle_marks()
963
+ p_pc = 0
964
+
965
+ else:
966
+ p_pc = p_pc + 1
967
+
968
+ else:
969
+ # The graph has not changed at all in this iteration of the while loop. Therefore, if all edges have converged, break the
970
+ # while loop. If at least one edge has not yet converged, increase p_pc by one.
971
+
972
+ if has_converged:
973
+ break
974
+ else:
975
+ p_pc = p_pc + 1
976
+
977
+ # end while True
978
+
979
+ ##################################################################################################################
980
+ ### Consistency test and middle mark update ######################################################################
981
+
982
+ # Run through the entire graph
983
+ for j in range(self.N):
984
+ for (i, lag_i) in self.graph_dict[j].keys():
985
+
986
+ X = (i, lag_i)
987
+ Y = (j, 0)
988
+
989
+ if self._is_smaller(Y, X):
990
+ continue
991
+
992
+ # Consider only those links that are still part G
993
+ link = self._get_link((i, lag_i), (j, 0))
994
+ if len(link) > 0:
995
+
996
+ # Consistency check
997
+ if not while_broken:
998
+ assert link[1] != "?"
999
+ assert link[1] != "L"
1000
+ assert ((link[1] != "R") or (lag_i < 0 and (self.max_cond_px > 0 or not self.update_middle_marks))
1001
+ or (self.no_apr != 0))
1002
+
1003
+
1004
+ # Update all middle marks to '!'
1005
+ if link[1] not in ["-", "!"]:
1006
+ self._write_link((i, lag_i), (j, 0), link[0] + "!" + link[2])
1007
+
1008
+
1009
+ ##################################################################################################################
1010
+ ### Final rule applications ######################################################################################
1011
+
1012
+ if not prelim or self.prelim_with_collider_rules:
1013
+
1014
+ if not prelim:
1015
+ self.no_apr = self.no_apr - 1
1016
+
1017
+ any_update = self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False)
1018
+
1019
+ if self.max_cond_px == 0 and self.update_middle_marks and any_update:
1020
+ self._update_middle_marks()
1021
+
1022
+ else:
1023
+
1024
+ only_lagged = False if self.orient_contemp >= 1 else True
1025
+ any_update = self._run_orientation_phase(rule_list = self._rules_prelim_final, only_lagged = only_lagged)
1026
+
1027
+ if self.max_cond_px == 0 and self.update_middle_marks and any_update:
1028
+ self._update_middle_marks()
1029
+
1030
+ # Return
1031
+ return True
1032
+
1033
+
1034
+ def _run_non_ancestral_removal_phase(self):
1035
+ """Run the non-ancestral edge removal phase, this is Algorithm S3"""
1036
+
1037
+ # Update of middle marks
1038
+ self._update_middle_marks()
1039
+
1040
+ # This function initializeds self._graph_full_dict, a nested dictionary representing the graph including links that are
1041
+ # forward in time. This will make the calculcation of na-pds-t sets easier.
1042
+ self._initialize_full_graph()
1043
+
1044
+ # Iterate until convergence. Here, p_pc is the cardinality of the non-default part of the conditioning sets. The full
1045
+ # conditioning sets may have higher cardinality due to default conditioning on known parents
1046
+ p_pc = 0
1047
+ while True:
1048
+
1049
+ ##########################################################################################################
1050
+ ### Run the next removal iteration #######################################################################
1051
+
1052
+ # Force-quit while loop when p_pc exceeds the limit put by self.max_p_global or self.max_p_non_ancestral
1053
+ if p_pc > self.max_p_global or p_pc > self.max_p_non_ancestral:
1054
+ break
1055
+
1056
+ # Verbose output
1057
+ if self.verbosity >= 1:
1058
+ if p_pc == 0:
1059
+ print("\nStarting test phase\n")
1060
+ print("p = {}".format(p_pc))
1061
+
1062
+ # Variables to memorize the occurence and absence of certain events in the below edge removal phase
1063
+ has_converged = True
1064
+ any_removal = False
1065
+
1066
+ # Generate the prioritized link list
1067
+ if self.auto_first:
1068
+
1069
+ link_list = [product(range(self.N), range(-self.tau_max, 0))]
1070
+ link_list = link_list + [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)]
1071
+
1072
+ else:
1073
+
1074
+ link_list = [product(range(self.N), range(self.N), range(-lag, -lag + 1)) for lag in range(0, self.tau_max + 1)]
1075
+
1076
+
1077
+ # Run through all elements of link_list. Each element of link_list specifies ordered pairs of variables whose connecting
1078
+ # edges are then subjected to conditional independence tests
1079
+ for links in link_list:
1080
+
1081
+ # Memory variables for storing edges that are marked for removal
1082
+ to_remove = {j: {} for j in range(self.N)}
1083
+
1084
+ # Iterate through all edges specified by links. Note that since the variables paris are ordered, (A, B) and (B, A) are
1085
+ # seen as different pairs.
1086
+ for pair in links:
1087
+
1088
+ if len(pair) == 2:
1089
+ X = (pair[0], pair[1])
1090
+ Y = (pair[0], 0)
1091
+ else:
1092
+ X = (pair[0], pair[2])
1093
+ Y = (pair[1], 0)
1094
+
1095
+ # Do not test auto-links twice
1096
+ if self.auto_first and X[0] == Y[0]:
1097
+ continue
1098
+
1099
+ ######################################################################################################
1100
+ ### Exclusion of links ###############################################################################
1101
+
1102
+ # Exclude the current link if ...
1103
+ # ... X = Y
1104
+ if X[1] == 0 and X[0] == Y[0]:
1105
+ continue
1106
+ # ... X > Y
1107
+ if self._is_smaller(Y, X):
1108
+ continue
1109
+
1110
+ # Get the current link
1111
+ link = self._get_link(X, Y)
1112
+
1113
+ # Exclude the current link if ...
1114
+ if link == "":
1115
+ continue
1116
+ # ... the link is definitely part of G
1117
+ if link[1] == "-":
1118
+ continue
1119
+
1120
+ ######################################################################################################
1121
+ ### Determine which tests the link will be subjected to #############################################
1122
+
1123
+ # The algorithm always searches for separating sets in na-pds-t(Y, X). Depending on whether the X and Y are
1124
+ # contemporaneous on some global options, the algorithm may also search for separating sets in na-pds-t(X, Y)
1125
+ test_X = True if (X[1] == 0 or (self.max_cond_px > 0 and self.max_cond_px >= p_pc)) else False
1126
+
1127
+ ######################################################################################################
1128
+ ### Preparation of default conditioning sets and PC search sets ######################################
1129
+
1130
+ # Verbose output
1131
+ if self.verbosity >= 2:
1132
+ print("_get_na_pds_t ")
1133
+
1134
+ S_default_YX, S_search_YX = self._get_default_and_search_sets(Y, X, "non-ancestral")
1135
+
1136
+ self.max_na_search_set_found = max(self.max_na_search_set_found, len(S_search_YX))
1137
+
1138
+ if test_X:
1139
+ S_default_XY, S_search_XY = self._get_default_and_search_sets(X, Y, "non-ancestral")
1140
+
1141
+ self.max_na_search_set_found = max(self.max_na_search_set_found, len(S_search_XY))
1142
+
1143
+ # If the search set exceeds the specified bounds, do not test this link
1144
+ if len(S_search_YX) > self.max_pds_set or (test_X and len(S_search_XY) > self.max_pds_set):
1145
+ continue
1146
+
1147
+ ######################################################################################################
1148
+
1149
+ ######################################################################################################
1150
+ ### Middle mark updates ##############################################################################
1151
+
1152
+ # Note: Updating the middle marks here, within the for-loop, does not spoil order independence. In fact, this
1153
+ # update does not influence the flow of the for-loop at all
1154
+ if len(S_search_YX) < p_pc or (test_X and len(S_search_XY) < p_pc):
1155
+ # Mark the link from X to Y as converged, remember the fixation, then continue
1156
+ self._write_link(X, Y, link[0] + "-" + link[2], verbosity = self.verbosity)
1157
+ continue
1158
+
1159
+ else:
1160
+ has_converged = False
1161
+
1162
+
1163
+ ######################################################################################################
1164
+ ### Tests for conditional independence ###############################################################
1165
+
1166
+ # If option self.break_once_separated is True, the below for-loops will be broken immediately once a separating set
1167
+ # has been found. In conjunction with the modified majority rule employed for orienting links, order independence
1168
+ # (with respect to the index 'i' on X^i_t) then requires that the tested conditioning sets are ordered in an order
1169
+ # independent way. Here, the minimal effect size of previous conditional independence tests serve as an order
1170
+ # independent order criterion.
1171
+ if self.break_once_separated or not np.isinf(self.max_q_global):
1172
+ S_search_YX = self._sort_search_set(S_search_YX, Y)
1173
+ if test_X:
1174
+ S_search_XY = self._sort_search_set(S_search_XY, X)
1175
+
1176
+ # Verbose output
1177
+ if self.verbosity >= 2:
1178
+ print("for S_pc in combinations(S_search_YX, p_pc)")
1179
+
1180
+ # Run through all cardinality p_pc subsets of S_search_YX
1181
+ q_count = 0
1182
+ for S_pc in combinations(S_search_YX, p_pc):
1183
+
1184
+ q_count = q_count + 1
1185
+ if q_count > self.max_q_global:
1186
+ break
1187
+
1188
+ # Build the full conditioning set
1189
+ Z = set(S_pc)
1190
+ Z = Z.union(S_default_YX)
1191
+
1192
+ # Test conditional independence of X and Y given Z
1193
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
1194
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z),
1195
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
1196
+
1197
+ if self.verbosity >= 2:
1198
+ print("Non-ANC(Y): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" %
1199
+ (X, Y, ' '.join([str(z) for z in S_default_YX]), ' '.join([str(z) for z in S_pc]), val, pval))
1200
+
1201
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic
1202
+ # values and conditioning set cardinalities
1203
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z))
1204
+
1205
+ # Check whether test result was significant
1206
+ if not dependent: # pval > self.pc_alpha:
1207
+
1208
+ # Mark the edge from X to Y for removal and save sepset
1209
+ to_remove[Y[0]][X] = True
1210
+ self._save_sepset(X, Y, (frozenset(Z), "wm"))
1211
+
1212
+ # Verbose output
1213
+ if self.verbosity >= 1:
1214
+ print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_YX))
1215
+
1216
+ if self.break_once_separated:
1217
+ break
1218
+
1219
+ if test_X:
1220
+
1221
+ # Verbose output
1222
+ if self.verbosity >= 2:
1223
+ print("for S_pc in combinations(S_search_XY, p_pc)")
1224
+
1225
+ # Run through all cardinality p_pc subsets of S_search_XY
1226
+ q_count = 0
1227
+ for S_pc in combinations(S_search_XY, p_pc):
1228
+
1229
+ q_count = q_count + 1
1230
+ if q_count > self.max_q_global:
1231
+ break
1232
+
1233
+ # Build the full conditioning set
1234
+ Z = set(S_pc)
1235
+ Z = Z.union(S_default_XY)
1236
+
1237
+ # Test conditional independence of X and Y given Z
1238
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
1239
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z),
1240
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
1241
+
1242
+ if self.verbosity >= 2:
1243
+ print("Non-ANC(X): %s _|_ %s | S_def = %s, S_pc = %s: val = %.2f / pval = % .4f" %
1244
+ (X, Y, ' '.join([str(z) for z in S_default_XY]), ' '.join([str(z) for z in S_pc]), val, pval))
1245
+
1246
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic
1247
+ # values and conditioning set cardinalities
1248
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z))
1249
+
1250
+ # Check whether test result was significant
1251
+ if not dependent: # pval > self.pc_alpha:
1252
+
1253
+ # Mark the edge from X to Y for removal and save sepset
1254
+ to_remove[Y[0]][X] = True
1255
+ self._save_sepset(X, Y, (frozenset(Z), "wm"))
1256
+
1257
+ # Verbose output
1258
+ if self.verbosity >= 1:
1259
+ print("({},{:2}) {:11} {} given {} union {}".format(X[0], X[1], "independent", Y, S_pc, S_default_YX))
1260
+
1261
+ if self.break_once_separated:
1262
+ break
1263
+
1264
+ # end for links in link_list
1265
+
1266
+ ##########################################################################################################
1267
+ ### Remove edges marked for removal in to_remove #########################################################
1268
+
1269
+ # Check whether there is any removal at all
1270
+ any_removal_this = False
1271
+
1272
+ # Run through all of the nested dictionary
1273
+ for j in range(self.N):
1274
+ for (i, lag_i) in to_remove[j].keys():
1275
+
1276
+ # Remember that at least one edge has been removed, remove the edge
1277
+ any_removal = True
1278
+ any_removal_this = True
1279
+ self._write_link((i, lag_i), (j, 0), "", verbosity = self.verbosity)
1280
+
1281
+ # If any_removal_this = True, we need to recalculate full graph dict
1282
+ if any_removal_this:
1283
+ self._initialize_full_graph()
1284
+ self._na_pds_t = {(j, -tau_j): {} for j in range(self.N) for tau_j in range(self.tau_max + 1)}
1285
+
1286
+
1287
+ # end for links in link_list
1288
+
1289
+ # Verbose output
1290
+ if self.verbosity >= 1:
1291
+ print("\nTest phase complete")
1292
+
1293
+ ##############################################################################################################
1294
+ ### Orientations and next step ###############################################################################
1295
+
1296
+ if any_removal:
1297
+ # At least one edge was removed or at least one middle mark has been updated. Therefore: i) apply the full set of
1298
+ # orientation rules, ii) restart the while loop at p_pc = 0, unless all edges have converged, then break the while loop
1299
+
1300
+ any_update = self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False)
1301
+
1302
+ if any_update:
1303
+ self._initialize_full_graph()
1304
+ self._na_pds_t = {(j, -tau_j): {} for j in range(self.N) for tau_j in range(self.tau_max + 1)}
1305
+ p_pc = 0
1306
+
1307
+ else:
1308
+ p_pc = p_pc + 1
1309
+
1310
+ else:
1311
+ # The graph has not changed at all in this iteration of the while loop. Therefore, if all edges have converged, break
1312
+ # the while loop. If at least one edge has not yet converged, increase p_pc by one.
1313
+
1314
+ if has_converged:
1315
+ break
1316
+ else:
1317
+ p_pc = p_pc + 1
1318
+
1319
+ # end while True
1320
+
1321
+ ##################################################################################################################
1322
+ ### Final rule applications ######################################################################################
1323
+
1324
+ self._run_orientation_phase(rule_list = self._rules_all, only_lagged = False)
1325
+
1326
+ # Return
1327
+ return True
1328
+
1329
+
1330
+ def _run_orientation_phase(self, rule_list, only_lagged = False):
1331
+ """Exhaustively apply the rules specified by rule_list, this is Algorithm S4"""
1332
+
1333
+ # Verbose output
1334
+ if self.verbosity >= 1:
1335
+ print("\nStarting orientation phase")
1336
+ print("with rule list: ", rule_list)
1337
+
1338
+ # Remember whether this call to _run_orientation_phase has made any update to G
1339
+ restarted_once = False
1340
+
1341
+ # Run through all priority levels of rule_list
1342
+ idx = 0
1343
+ while idx <= len(rule_list) - 1:
1344
+
1345
+ # Some rule require self._graph_full_dict. Therefore, it is initialized once the while loop (re)-starts at the first
1346
+ # prioprity level
1347
+ if idx == 0:
1348
+ self._initialize_full_graph()
1349
+
1350
+ # Remember whether G will be updated with new useful information ('x' marks are considered not useful)
1351
+ restart = False
1352
+
1353
+ ###########################################################################################################
1354
+ ### Rule application ######################################################################################
1355
+
1356
+ # Get the current rules
1357
+ current_rules = rule_list[idx]
1358
+
1359
+ # Prepare a list to remember marked orientations
1360
+ to_orient = []
1361
+
1362
+ # Run through all current rules
1363
+ for rule in current_rules:
1364
+
1365
+ # Verbose output
1366
+ if self.verbosity >= 1:
1367
+ print("\n{}:".format(rule))
1368
+
1369
+ # Exhaustively apply the rule to the graph...
1370
+ orientations = self._apply_rule(rule, only_lagged)
1371
+
1372
+ # Verbose output
1373
+ if self.verbosity >= 1:
1374
+ for ((i, j, lag_i), new_link) in set(orientations):
1375
+ print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Marked:", i, lag_i, self._get_link((i, lag_i), (j, 0)), j, 0,i, lag_i, new_link, j, 0))
1376
+ if len(orientations) == 0:
1377
+ print("Found nothing")
1378
+
1379
+ # ... and stage the results for orientation and removal
1380
+ to_orient.extend(orientations)
1381
+
1382
+ ###########################################################################################################
1383
+ ### Aggregation of marked orientations ####################################################################
1384
+
1385
+ links_to_remove = set()
1386
+ links_to_fix = set()
1387
+ new_ancs = {j: set() for j in range(self.N)}
1388
+ new_non_ancs = {j: set() for j in range(self.N)}
1389
+
1390
+ # Run through all of the nested dictionary
1391
+ for ((i, j, lag_i), new_link) in to_orient:
1392
+
1393
+ # The old link
1394
+ old_link = self._get_link((i, lag_i), (j, 0))
1395
+
1396
+ # Is the link marked for removal?
1397
+ if new_link == "" and len(old_link) > 0:
1398
+ links_to_remove.add((i, j, lag_i))
1399
+ continue
1400
+
1401
+ # Assert that no preceeding variable is marked as an ancestor of later variable
1402
+ assert not (lag_i > 0 and new_link[2] == "-")
1403
+
1404
+ # Is the link marked for fixation?
1405
+ if new_link[1] == "-" and old_link[1] != "-":
1406
+ links_to_fix.add((i, j, lag_i))
1407
+
1408
+ # New ancestral relation of (i, lag_i) to (j, 0)
1409
+ if new_link[0] == "-" and old_link[0] != "-":
1410
+ new_ancs[j].add((i, lag_i))
1411
+ elif new_link[0] == "<" and old_link[0] != "<":
1412
+ new_non_ancs[j].add((i, lag_i))
1413
+
1414
+ # New ancestral relation of (j, 0) to (i, lag_i == 0)
1415
+ if lag_i == 0:
1416
+ if new_link[2] == "-" and old_link[2] != "-":
1417
+ new_ancs[i].add((j, 0))
1418
+ elif new_link[2] == ">" and old_link[2] != ">":
1419
+ new_non_ancs[i].add((j, 0))
1420
+
1421
+ # Resolve conflicts about removal and fixation
1422
+ ambiguous_links = links_to_fix.intersection(links_to_remove)
1423
+ links_to_fix = links_to_fix.difference(ambiguous_links)
1424
+ links_to_remove = links_to_remove.difference(ambiguous_links)
1425
+
1426
+ ###########################################################################################################
1427
+ ### Removals, update middle marks, update ancestral information ###########################################
1428
+
1429
+ # Remove links
1430
+ for (i, j, lag_i) in links_to_remove:
1431
+ self._write_link((i, lag_i), (j, 0), "", verbosity = self.verbosity)
1432
+ restart = True
1433
+
1434
+ # Fix links
1435
+ for (i, j, lag_i) in links_to_fix:
1436
+ old_link = self._get_link((i, lag_i), (j, 0))
1437
+ new_link = old_link[0] + "-" + old_link[2]
1438
+ self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity)
1439
+ restart = True
1440
+
1441
+ # Mark links as ambiguous
1442
+ for (i, j, lag_i) in ambiguous_links:
1443
+ old_link = self._get_link((i, lag_i), (j, 0))
1444
+ new_link = old_link[0] + "x" + old_link[2]
1445
+ self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity)
1446
+
1447
+ # Update ancestral information. The function called includes conflict resolution
1448
+ restart = restart or self._apply_new_ancestral_information(new_non_ancs, new_ancs)
1449
+
1450
+ ###########################################################################################################
1451
+ ### Make separating sets of removed links weakly minimal ##################################################
1452
+
1453
+ if len(links_to_remove) > 0:
1454
+
1455
+ # Verbose output
1456
+ if self.verbosity >= 1:
1457
+ print("\nLinks were removed by rules\n")
1458
+
1459
+ new_ancs = {j: set() for j in range(self.N)}
1460
+ new_non_ancs = {j: set() for j in range(self.N)}
1461
+
1462
+ # Run through all links that have been removed
1463
+ for (i, j, lag_i) in links_to_remove:
1464
+
1465
+ X = (i, lag_i)
1466
+ Y = (j, 0)
1467
+
1468
+ # Get ancestors of X and Y
1469
+ ancs_XY = self._get_ancs([X, Y]).difference({X, Y})
1470
+
1471
+ # Read out all separating sets that were found in the rule phase, then consider only those of minimal
1472
+ # cardinality
1473
+ old_sepsets_all = {Z for (Z, _) in self._get_sepsets(X, Y)}
1474
+ min_size = min({len(Z) for Z in old_sepsets_all})
1475
+ old_sepsets_smallest = {Z for Z in old_sepsets_all if len(Z) == min_size}
1476
+
1477
+ # For all separating sets of minimal cardinality, find weakly minimal separating subsets
1478
+ self._delete_sepsets(X, Y)
1479
+ self._make_sepset_weakly_minimal(X, Y, old_sepsets_smallest, ancs_XY)
1480
+ new_sepsets = self._get_sepsets(X, Y)
1481
+
1482
+ # end for (i, j, lag_i) in links_to_remove
1483
+ # end if len(links_to_remove) > 0
1484
+
1485
+ # If any useful new information was found, go back to idx = 0, else increase idx by 1
1486
+ if restart:
1487
+ idx = 0
1488
+ restarted_once = True
1489
+ else:
1490
+ idx = idx + 1
1491
+
1492
+ # end while idx <= len(rule_list) - 1
1493
+
1494
+ # Verbose output
1495
+ if self.verbosity >= 1:
1496
+ print("\nOrientation phase complete")
1497
+
1498
+ # No return value
1499
+ return restarted_once
1500
+
1501
+ ########################################################################################################################
1502
+ ########################################################################################################################
1503
+ ########################################################################################################################
1504
+
1505
+ def _get_default_and_search_sets(self, A, B, phase):
1506
+ """Return the default conditioning set and PC search set"""
1507
+
1508
+ if phase == "ancestral":
1509
+
1510
+ # This is a-pds-t(A, B)
1511
+ S_raw = self._get_a_pds_t(A, B)
1512
+
1513
+ # Determine the default conditioning set
1514
+ S_default = self._get_parents(A, B).difference({A, B})
1515
+
1516
+ # Determine the PC search set
1517
+ S_search = S_raw.difference(S_default)
1518
+
1519
+
1520
+ elif phase == "non-ancestral":
1521
+
1522
+ # This is na-pds-t(A, B)
1523
+ S_raw = self._get_na_pds_t(A, B)
1524
+
1525
+ self.max_na_pds_set_found = max(self.max_na_pds_set_found, len(S_raw))
1526
+
1527
+ # Determine the default conditioning set
1528
+ S_default = S_raw.intersection(self._get_ancs([A, B]))
1529
+ S_default = S_default.union(self._get_parents(A, B))
1530
+ S_default = S_default.difference({A, B})
1531
+
1532
+ # Determine the PC search set
1533
+ S_search = S_raw.difference(S_default)
1534
+
1535
+ # Return
1536
+ return S_default, S_search
1537
+
1538
+
1539
+ def _apply_new_ancestral_information(self, new_non_ancs, new_ancs):
1540
+ """Apply the new ancestorships and non-ancestorships specified by new_non_ancs and new_ancs to the current graph. Conflicts
1541
+ are resolved by marking. Returns True if any circle mark was turned into a head or tail, else False."""
1542
+
1543
+ #######################################################################################################
1544
+ ### Preprocessing #####################################################################################
1545
+
1546
+ # Memory variables
1547
+ add_to_def_non_ancs = {j: set() for j in range(self.N)}
1548
+ add_to_def_ancs = {j: set() for j in range(self.N)}
1549
+ add_to_ambiguous_ancestorships = {j: set() for j in range(self.N)}
1550
+ put_head_or_tail = False
1551
+
1552
+ # Default values
1553
+ if new_non_ancs is None:
1554
+ new_non_ancs = {j: set() for j in range(self.N)}
1555
+
1556
+ if new_ancs is None:
1557
+ new_ancs = {j: set() for j in range(self.N)}
1558
+
1559
+ # Marking A as ancestor of B implies that B is marked as a non-ancestor of A. This is only non-trivial for A before B
1560
+ for j in range(self.N):
1561
+ for (i, lag_i) in new_ancs[j]:
1562
+ if lag_i == 0:
1563
+ new_non_ancs[i].add((j, 0))
1564
+
1565
+ #######################################################################################################
1566
+ ### Conflict resolution ###############################################################################
1567
+
1568
+ # Iterate through new_non_ancs
1569
+ for j in range(self.N):
1570
+ for (i, lag_i) in new_non_ancs[j]:
1571
+ # X = (i, lag_i), Y = (j, 0)
1572
+ # X is marked as non-ancestor for Y
1573
+
1574
+ # Conflict resolution
1575
+ if (i, lag_i) in self.ambiguous_ancestorships[j]:
1576
+ # There is a conflict, since it is already marked as ambiguous whether X is an ancestor of Y
1577
+ if self.verbosity >= 1:
1578
+ print("{:10} ({}, {:2}) marked as non-anc of {} but saved as ambiguous".format("Conflict:", i, lag_i, (j, 0)))
1579
+
1580
+ elif (i, lag_i) in self.def_ancs[j]:
1581
+ # There is a conflict, since X is already marked as ancestor of Y
1582
+ add_to_ambiguous_ancestorships[j].add((i, lag_i))
1583
+
1584
+ if self.verbosity >= 1:
1585
+ print("{:10} ({}, {:2}) marked as non-anc of {} but saved as anc".format("Conflict:", i, lag_i, (j, 0)))
1586
+
1587
+ elif (i, lag_i) in new_ancs[j]:
1588
+ # There is a conflict, since X is also marked as a new ancestor of Y
1589
+ add_to_ambiguous_ancestorships[j].add((i, lag_i))
1590
+
1591
+ if self.verbosity >= 1:
1592
+ print("{:10} ({}, {:2}) marked as both anc- and non-anc of {}".format("Conflict:", i, lag_i, (j, 0)))
1593
+
1594
+ else:
1595
+ # There is no conflict
1596
+ add_to_def_non_ancs[j].add((i, lag_i))
1597
+
1598
+ # Iterate through new_ancs
1599
+ for j in range(self.N):
1600
+ for (i, lag_i) in new_ancs[j]:
1601
+ # X = (i, lag_i), Y = (j, 0)
1602
+ # X is marked as ancestor for Y
1603
+
1604
+ # Conflict resolution
1605
+ if (i, lag_i) in self.ambiguous_ancestorships[j]:
1606
+ # There is a conflict, since it is already marked as ambiguous whether X is an ancestor of Y
1607
+ if self.verbosity >= 1:
1608
+ print("{:10} ({}, {:2}) marked as anc of {} but saved as ambiguous".format("Conflict:", i, lag_i, (j, 0)))
1609
+
1610
+ elif lag_i == 0 and (j, 0) in self.ambiguous_ancestorships[i]:
1611
+ # There is a conflict, since X and Y are contemporaneous and it is already marked ambiguous as whether Y is an
1612
+ # ancestor of X
1613
+ # Note: This is required here, because X being an ancestor of Y implies that Y is not an ancestor of X. This
1614
+ # ambiguity cannot exist when X is before Y
1615
+ if self.verbosity >= 1:
1616
+ print("{:10} ({}, {:2}) marked as anc of {} but saved as ambiguous".format("Conflict:", i, lag_i, (j, 0)))
1617
+
1618
+ elif (i, lag_i) in self.def_non_ancs[j]:
1619
+ # There is a conflict, since X is already marked as non-ancestor of Y
1620
+ add_to_ambiguous_ancestorships[j].add((i, lag_i))
1621
+
1622
+ if self.verbosity >= 1:
1623
+ print("{:10} ({}, {:2}) marked as anc of {} but saved as non-anc".format("Conflict:", i, lag_i, (j, 0)))
1624
+
1625
+ elif (i, lag_i) in new_non_ancs[j]:
1626
+ # There is a conflict, since X is also marked as a new non-ancestor of Y
1627
+ add_to_ambiguous_ancestorships[j].add((i, lag_i))
1628
+
1629
+ if self.verbosity >= 1:
1630
+ print("{:10} ({}, {:2}) marked as both anc- and non-anc of {}".format("Conflict:", i, lag_i, (j, 0)))
1631
+
1632
+ else:
1633
+ # There is no conflict
1634
+ add_to_def_ancs[j].add((i, lag_i))
1635
+
1636
+ #######################################################################################################
1637
+
1638
+ #######################################################################################################
1639
+ ### Apply the ambiguous information ###################################################################
1640
+
1641
+ for j in range(self.N):
1642
+
1643
+ for (i, lag_i) in add_to_ambiguous_ancestorships[j]:
1644
+
1645
+ old_link = self._get_link((i, lag_i), (j, 0))
1646
+ if len(old_link) > 0 and old_link[0] != "x":
1647
+
1648
+ new_link = "x" + old_link[1] + old_link[2]
1649
+ self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity)
1650
+
1651
+ if self.verbosity >= 1:
1652
+ if (i, lag_i) in self.def_ancs[j]:
1653
+ print("{:10} Removing ({}, {:2}) as anc of {}".format("Update:", i, lag_i, (j, 0)))
1654
+ if (i, lag_i) in self.def_non_ancs[j]:
1655
+ print("{:10} Removing ({}, {:2}) as non-anc of {}".format("Update:", i, lag_i, (j, 0)))
1656
+
1657
+ self.def_ancs[j].discard((i, lag_i))
1658
+ self.def_non_ancs[j].discard((i, lag_i))
1659
+
1660
+ if lag_i == 0:
1661
+
1662
+ if self.verbosity >= 1 and (j, 0) in self.def_ancs[i]:
1663
+ print("{:10} Removing {} as anc of {}".format("Update:", i, lag_i, (j, 0)))
1664
+
1665
+ self.def_ancs[i].discard((j, 0))
1666
+ # Do we also need the following?
1667
+ # self.def_non_ancs[i].discard((j, 0))
1668
+
1669
+ if self.verbosity >= 1 and (i, lag_i) not in self.ambiguous_ancestorships[j]:
1670
+ print("{:10} Marking ancestorship of ({}, {:2}) to {} as ambiguous".format("Update:", i, lag_i, (j, 0)))
1671
+
1672
+ self.ambiguous_ancestorships[j].add((i, lag_i))
1673
+
1674
+ #######################################################################################################
1675
+ ### Apply the unambiguous information #################################################################
1676
+
1677
+ for j in range(self.N):
1678
+
1679
+ for (i, lag_i) in add_to_def_non_ancs[j]:
1680
+
1681
+ old_link = self._get_link((i, lag_i), (j, 0))
1682
+ if len(old_link) > 0 and old_link[0] != "<":
1683
+ new_link = "<" + old_link[1] + old_link[2]
1684
+ self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity)
1685
+ put_head_or_tail = True
1686
+
1687
+ if self.verbosity >= 1 and (i, lag_i) not in self.def_non_ancs[j]:
1688
+ print("{:10} Marking ({}, {:2}) as non-anc of {}".format("Update:", i, lag_i, (j, 0)))
1689
+
1690
+ self.def_non_ancs[j].add((i, lag_i))
1691
+
1692
+
1693
+ for (i, lag_i) in add_to_def_ancs[j]:
1694
+
1695
+ old_link = self._get_link((i, lag_i), (j, 0))
1696
+ if len(old_link) > 0 and (old_link[0] != "-" or old_link[2] != ">"):
1697
+ new_link = "-" + old_link[1] + ">"
1698
+ self._write_link((i, lag_i), (j, 0), new_link, verbosity = self.verbosity)
1699
+ put_head_or_tail = True
1700
+
1701
+ if self.verbosity >= 1 and (i, lag_i) not in self.def_ancs[j]:
1702
+ print("{:10} Marking ({}, {:2}) as anc of {}".format("Update:", i, lag_i, (j, 0)))
1703
+
1704
+ self.def_ancs[j].add((i, lag_i))
1705
+
1706
+ if lag_i == 0:
1707
+
1708
+ if self.verbosity >= 1 and (j, 0) not in self.def_non_ancs[i]:
1709
+ print("{:10} Marking {} as non-anc of {}".format("Update:",(j, 0), (i, 0)))
1710
+
1711
+ self.def_non_ancs[i].add((j, 0))
1712
+
1713
+ #######################################################################################################
1714
+
1715
+ return put_head_or_tail
1716
+
1717
+ def _apply_rule(self, rule, only_lagged):
1718
+ """Call the orientation-removal-rule specified by the string argument rule."""
1719
+
1720
+ if rule == "APR":
1721
+ return self._apply_APR(only_lagged)
1722
+ elif rule == "ER-00-a":
1723
+ return self._apply_ER00a(only_lagged)
1724
+ elif rule == "ER-00-b":
1725
+ return self._apply_ER00b(only_lagged)
1726
+ elif rule == "ER-00-c":
1727
+ return self._apply_ER00c(only_lagged)
1728
+ elif rule == "ER-00-d":
1729
+ return self._apply_ER00d(only_lagged)
1730
+ elif rule == "ER-01":
1731
+ return self._apply_ER01(only_lagged)
1732
+ elif rule == "ER-02":
1733
+ return self._apply_ER02(only_lagged)
1734
+ elif rule == "ER-03":
1735
+ return self._apply_ER03(only_lagged)
1736
+ elif rule == "R-04":
1737
+ return self._apply_R04(only_lagged)
1738
+ elif rule == "ER-08":
1739
+ return self._apply_ER08(only_lagged)
1740
+ elif rule == "ER-09":
1741
+ return self._apply_ER09(only_lagged)
1742
+ elif rule == "ER-10":
1743
+ return self._apply_ER10(only_lagged)
1744
+
1745
+
1746
+ def _get_na_pds_t(self, A, B):
1747
+ """Return the set na_pds_t(A, B), with at least one of them at lag 0"""
1748
+
1749
+ # Unpack A and B, then assert that at least one of them is at lag 0
1750
+ var_A, lag_A = A
1751
+ var_B, lag_B = B
1752
+ assert lag_A == 0 or lag_B == 0
1753
+
1754
+ # If na_pds_t(A, B) is in memory, return immediately
1755
+ memo = self._na_pds_t[A].get(B)
1756
+ if memo is not None:
1757
+ return memo
1758
+
1759
+ # Else, re-compute na_pds_t(A, B) it according to the current graph and cache it.
1760
+
1761
+ # Re-compute na_pds_t_1(A, B) according to the current graph
1762
+ na_pds_t_1 = {(var, lag + lag_A)
1763
+ # W = (var, lag + lag_A) is in na_pds_t_1(A, B) if ...
1764
+ for ((var, lag), link) in self.graph_dict[var_A].items()
1765
+ # ... it is a non-future adjacency of A
1766
+ if len(link) > 0
1767
+ # ... and is not B
1768
+ and (var, lag + lag_A) != B
1769
+ # ... and is not before t - tau_max
1770
+ and (lag + lag_A) >= -self.tau_max
1771
+ # ... and is not after both A and B
1772
+ # ... (i.e. is not after time t)
1773
+ and (lag + lag_A) <= 0
1774
+ # ... and is not a definite non-ancestor of A,
1775
+ # which implies that it is not a definite descendant of A,
1776
+ and link[0] != "<"
1777
+ # ... and is not a definite descendant of B
1778
+ # (i.e., B is not a definite ancestor of W)
1779
+ and (var_B, lag_B - (lag + lag_A)) not in self.def_ancs[var]
1780
+ }
1781
+
1782
+ # Compute na_pds_t_2(A, B)
1783
+
1784
+ # Find all potential C_1 nodes
1785
+ C1_list = set()
1786
+ for ((var, lag), link) in self.graph_full_dict[var_A].items():
1787
+
1788
+ node = (var, lag + lag_A)
1789
+
1790
+ # node is added to C1_list if, in addition to being adjacent to A, ...
1791
+ # ... it is not B
1792
+ if (var, lag + lag_A) == B:
1793
+ continue
1794
+
1795
+ # ... it is not before t - tau_max
1796
+ if (lag + lag_A) < -self.tau_max:
1797
+ continue
1798
+
1799
+ # ... it is not after B
1800
+ if (lag + lag_A) > lag_B:
1801
+ continue
1802
+
1803
+ # ... it is not a definite ancestor of A
1804
+ if link[0] == "-":
1805
+ continue
1806
+
1807
+ # ... it is not a definite descendant of A
1808
+ if link[2] == "-":
1809
+ continue
1810
+
1811
+ # ... it is not a definite non-ancestor of B,
1812
+ # which implies that it is not a definite descendant of B
1813
+ if (var, (lag + lag_A) - lag_B) in self.def_non_ancs[var_B]:
1814
+ continue
1815
+
1816
+ # If all tests are passed, node is added to C1_list
1817
+ C1_list.add(node)
1818
+
1819
+ # end for ((var, lag), link) in self.graph_full_dict[var_A].items()
1820
+
1821
+ # Breath first search to find (a superset of) na_pds_t_2(A, B)
1822
+
1823
+ visited = set()
1824
+ start_from = {(C1, A) for C1 in C1_list}
1825
+
1826
+ while start_from:
1827
+
1828
+ new_start_from = set()
1829
+ new_do_not_visit = set()
1830
+
1831
+ for (current_node, previous_node) in start_from:
1832
+
1833
+ visited.add((current_node, previous_node))
1834
+
1835
+ for (var, lag) in self.graph_full_dict[current_node[0]]:
1836
+
1837
+ next_node = (var, lag + current_node[1])
1838
+
1839
+ if next_node[1] < -self.tau_max:
1840
+ continue
1841
+ if next_node[1] > 0:
1842
+ continue
1843
+ if (next_node, current_node) in visited:
1844
+ continue
1845
+ if next_node == previous_node:
1846
+ continue
1847
+ if next_node == B:
1848
+ continue
1849
+ if next_node == A:
1850
+ continue
1851
+
1852
+ link_l = self._get_link(next_node, current_node)
1853
+ link_r = self._get_link(previous_node, current_node)
1854
+
1855
+ if link_l[2] == "-" or link_r[2] == "-":
1856
+ continue
1857
+ if self._get_link(next_node, previous_node) == "" and (link_l[2] == "o" or link_r[2] == "o"):
1858
+ continue
1859
+ if (var_A, lag_A - next_node[1]) in self.def_ancs[next_node[0]] or (var_B, lag_B - next_node[1]) in self.def_ancs[next_node[0]]:
1860
+ continue
1861
+ if ((next_node[1] - lag_A > 0) or (next_node[0], next_node[1] - lag_A) in self.def_non_ancs[var_A]) and ((next_node[1] - lag_B > 0) or (next_node[0], next_node[1] - lag_B) in self.def_non_ancs[var_B]):
1862
+ continue
1863
+
1864
+ new_start_from.add((next_node, current_node))
1865
+
1866
+ start_from = new_start_from
1867
+
1868
+ # end while start_from
1869
+
1870
+ na_pds_t_2 = {node for (node, _) in visited}
1871
+
1872
+ self._na_pds_t[A][B] = na_pds_t_1.union(na_pds_t_2).difference({A, B})
1873
+ return self._na_pds_t[A][B]
1874
+
1875
+
1876
+ def _make_sepset_weakly_minimal(self, X, Y, Z_list, ancs):
1877
+ """
1878
+ X and Y are conditionally independent given Z in Z_list However, it is not yet clear whether any of these Z are minimal
1879
+ separating set.
1880
+
1881
+ This function finds weakly minimal separating subsets in an order independent way and writes them to the self.sepsets
1882
+ dictionary. Only certainly weakly minimal separating subsets are retained.
1883
+ """
1884
+
1885
+ # Assert that all Z in Z_list have the same cardinality
1886
+ assert len({len(Z) for Z in Z_list}) == 1
1887
+
1888
+ # Base Case 1:
1889
+ # Z in Z_list is weakly minimal if len(Z) <= 1 or Z \subset ancs
1890
+ any_weakly_minimal = False
1891
+
1892
+ for Z in Z_list:
1893
+
1894
+ if len(Z) <=1 or Z.issubset(ancs):
1895
+ self._save_sepset(X, Y, (frozenset(Z), "wm"))
1896
+ any_weakly_minimal = True
1897
+
1898
+ if any_weakly_minimal:
1899
+ return None
1900
+
1901
+ # If not Base Case 1, we need to search for separating subsets. We do this for all Z in Z_list, and build a set sepsets_next_call
1902
+ # that contains all separating sets for the next recursive call
1903
+ sepsets_next_call = set()
1904
+
1905
+ for Z in Z_list:
1906
+
1907
+ # Find all nodes A in Z that are not in ancs
1908
+ removable = Z.difference(ancs)
1909
+
1910
+ # Test for removal of all nodes in removable
1911
+ new_sepsets = []
1912
+ val_values = []
1913
+
1914
+ for A in removable:
1915
+
1916
+ Z_A = [node for node in Z if node != A]
1917
+
1918
+ # Run the conditional independence test
1919
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = Z_A, tau_max = self.tau_max)
1920
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = Z_A,
1921
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
1922
+
1923
+ if self.verbosity >= 2:
1924
+ print("MakeMin: %s _|_ %s | Z_A = %s: val = %.2f / pval = % .4f" %
1925
+ (X, Y, ' '.join([str(z) for z in list(Z_A)]), val, pval))
1926
+
1927
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic
1928
+ # values and conditioning set cardinalities
1929
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z_A))
1930
+
1931
+ # Check whether the test result was significant
1932
+ if not dependent: # pval > self.pc_alpha:
1933
+ new_sepsets.append(frozenset(Z_A))
1934
+ val_values.append(val)
1935
+
1936
+ # If new_sepsets is empty, then Z is already weakly minimal
1937
+ if len(new_sepsets) == 0:
1938
+ self._save_sepset(X, Y, (frozenset(Z), "wm"))
1939
+ any_weakly_minimal = True
1940
+
1941
+ # If we did not yet find a weakly minimal separating set
1942
+ if not any_weakly_minimal:
1943
+
1944
+ # Sort all separating sets in new_sepets by their test statistic, then append those separating sets with maximal statistic
1945
+ # to sepsets_next_call. This i) guarantees order independence while ii) continuing to test as few as possible separating sets
1946
+ new_sepsets = [node for _, node in sorted(zip(val_values, new_sepsets), reverse = True)]
1947
+
1948
+ i = -1
1949
+ while i <= len(val_values) - 2 and val_values[i + 1] == val_values[0]:
1950
+ sepsets_next_call.add(new_sepsets[i])
1951
+ i = i + 1
1952
+
1953
+ assert i >= 0
1954
+
1955
+ # If we did not yet find a weakly minimal separating set, make a recursive call
1956
+ if not any_weakly_minimal:
1957
+ self._make_sepset_weakly_minimal(X, Y, sepsets_next_call, ancs)
1958
+ else:
1959
+ return None
1960
+
1961
+
1962
+ def _B_not_in_SepSet_AC(self, A, B, C):
1963
+ """Is B in less than half of the sets in SepSets(A, C)?"""
1964
+
1965
+ # Treat A - B - C as the same triple as C - B - A
1966
+ # Convention: A is before C or, if they are contemporaneous, the index of A is smaller than that of C
1967
+ if C[1] < A[1] or (C[1] == A[1] and C[0] < A[0]):
1968
+ return self._B_not_in_SepSet_AC(C, B, A)
1969
+
1970
+ # Remember all separating sets that we will find
1971
+ all_sepsets = set()
1972
+
1973
+ # Get the non-future adjacencies of A and C
1974
+ if not self.use_a_pds_t_for_majority:
1975
+ adj_A = self._get_non_future_adj([A]).difference({A, C})
1976
+ adj_C = self._get_non_future_adj([C]).difference({A, C})
1977
+ else:
1978
+ adj_A = self._get_a_pds_t(A, C).difference({A, C})
1979
+ adj_C = self._get_a_pds_t(C, A).difference({A, C})
1980
+
1981
+ Z_add = self._get_parents(A, C).difference({A, C})
1982
+
1983
+ search_A = adj_A.difference(Z_add)
1984
+ search_C = adj_C.difference(Z_add)
1985
+
1986
+ if not np.isinf(self.max_q_global):
1987
+ search_A = self._sort_search_set(search_A, A)
1988
+ search_C = self._sort_search_set(search_C, C)
1989
+
1990
+ # Test for independence given all subsets of non-future adjacencies of A
1991
+ if A[1] < C[1]:
1992
+ max_p_A = min([len(search_A), self.max_cond_px, self.max_p_global]) + 1
1993
+ else:
1994
+ max_p_A = min([len(search_A), self.max_p_global]) + 1
1995
+
1996
+ # Shift lags
1997
+ search_A = [(var, lag - C[1]) for (var, lag) in search_A]
1998
+ search_C = [(var, lag - C[1]) for (var, lag) in search_C]
1999
+ Z_add = {(var, lag - C[1]) for (var, lag) in Z_add}
2000
+ X = (A[0], A[1] - C[1])
2001
+ Y = (C[0], 0)
2002
+
2003
+ for p in range(max_p_A):
2004
+
2005
+ q_count = 0
2006
+ for Z_raw in combinations(search_A, p):
2007
+
2008
+ q_count = q_count + 1
2009
+ if q_count > self.max_q_global:
2010
+ break
2011
+
2012
+ # Prepare the conditioning set
2013
+ Z = {node for node in Z_raw if node != X and node != Y}
2014
+ Z = Z.union(Z_add)
2015
+
2016
+ # Test conditional independence of X and Y given Z
2017
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
2018
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z),
2019
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
2020
+
2021
+ if self.verbosity >= 2:
2022
+ print("BnotinSepSetAC(A): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" %
2023
+ (X, Y, ' '.join([str(z) for z in Z_add]), ' '.join([str(z) for z in {node for node in Z_raw if node != X and node != Y}]), val, pval))
2024
+
2025
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic
2026
+ # values and conditioning set cardinalities
2027
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z))
2028
+
2029
+ # Check whether test result was significant
2030
+ if not dependent: # pval > self.pc_alpha:
2031
+ all_sepsets.add(frozenset(Z))
2032
+
2033
+ # Test for independence given all subsets of non-future adjacencies of C
2034
+ for p in range(min(len(search_C), self.max_p_global) + 1):
2035
+
2036
+ q_count = 0
2037
+ for Z_raw in combinations(search_C, p):
2038
+
2039
+ q_count = q_count + 1
2040
+ if q_count > self.max_q_global:
2041
+ break
2042
+
2043
+ # Prepare the conditioning set
2044
+ Z = {node for node in Z_raw if node != X and node != Y}
2045
+ Z = Z.union(Z_add)
2046
+
2047
+ # Test conditional independence of X and Y given Z
2048
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
2049
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z),
2050
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
2051
+
2052
+ if self.verbosity >= 2:
2053
+ # print("BnotinSepSetAC(C): %s _|_ %s | Z = %s: val = %.2f / pval = % .4f" %
2054
+ # (X, Y, ' '.join([str(z) for z in list(Z)]), val, pval))
2055
+ print("BnotinSepSetAC(C): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" %
2056
+ (X, Y, ' '.join([str(z) for z in Z_add]), ' '.join([str(z) for z in {node for node in Z_raw if node != X and node != Y}]), val, pval))
2057
+
2058
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic
2059
+ # values and conditioning set cardinalities
2060
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z))
2061
+
2062
+ # Check whether test result was significant
2063
+ if not dependent: # pval > self.pc_alpha:
2064
+ all_sepsets.add(frozenset(Z))
2065
+
2066
+ # Append the already known sepset
2067
+ all_sepsets = all_sepsets.union({Z for (Z, _) in self._get_sepsets(X, Y)})
2068
+
2069
+ # Count number of sepsets and number of sepsets that contain B
2070
+ n_sepsets = len(all_sepsets)
2071
+ n_sepsets_with_B = len([1 for Z in all_sepsets if (B[0], B[1] - C[1]) in Z])
2072
+
2073
+ return True if 2*n_sepsets_with_B < n_sepsets else False
2074
+
2075
+
2076
+ def _B_in_SepSet_AC(self, A, B, C):
2077
+ """Is B in more than half of the sets in SepSets(A, C)?"""
2078
+
2079
+ # Treat A - B - C as the same triple as C - B - A
2080
+ # Convention: A is before C or, if they are contemporaneous, the index of A is smaller than that of C
2081
+ if C[1] < A[1] or (C[1] == A[1] and C[0] < A[0]):
2082
+ return self._B_in_SepSet_AC(C, B, A)
2083
+
2084
+ link_AB = self._get_link(A, B)
2085
+ link_CB = self._get_link(C, B)
2086
+
2087
+ if link_AB == "" or link_CB == "" or link_AB[1] != "-" or link_CB[1] != "-":
2088
+
2089
+ # Vote is based on those sets that where found already
2090
+ all_sepsets = {Z for (Z, _) in self._get_sepsets(A, C)}
2091
+
2092
+ # Count number of sepsets and number of sepsets that contain B
2093
+ n_sepsets = len(all_sepsets)
2094
+ n_sepsets_with_B = len([1 for Z in all_sepsets if B in Z])
2095
+
2096
+ return True if 2*n_sepsets_with_B > n_sepsets else False
2097
+
2098
+ else:
2099
+
2100
+ # Remember all separating sets that we will find
2101
+ all_sepsets = set()
2102
+
2103
+ # Get the non-future adjacencies of A and C
2104
+ if not self.use_a_pds_t_for_majority:
2105
+ adj_A = self._get_non_future_adj([A]).difference({A, C})
2106
+ adj_C = self._get_non_future_adj([C]).difference({A, C})
2107
+ else:
2108
+ adj_A = self._get_a_pds_t(A, C).difference({A, C})
2109
+ adj_C = self._get_a_pds_t(C, A).difference({A, C})
2110
+
2111
+ Z_add = self._get_parents(A, C).difference({A, C})
2112
+
2113
+ search_A = adj_A.difference(Z_add)
2114
+ search_C = adj_C.difference(Z_add)
2115
+
2116
+ if not np.isinf(self.max_q_global):
2117
+ search_A = self._sort_search_set(search_A, A)
2118
+ search_C = self._sort_search_set(search_C, C)
2119
+
2120
+ # Test for independence given all subsets of non-future adjacencies of A
2121
+ if A[1] < C[1]:
2122
+ max_p_A = min([len(search_A), self.max_cond_px, self.max_p_global]) + 1
2123
+ else:
2124
+ max_p_A = min([len(search_A), self.max_p_global]) + 1
2125
+
2126
+ # Shift lags
2127
+ search_A = [(var, lag - C[1]) for (var, lag) in search_A]
2128
+ search_C = [(var, lag - C[1]) for (var, lag) in search_C]
2129
+ Z_add = {(var, lag - C[1]) for (var, lag) in Z_add}
2130
+ X = (A[0], A[1] - C[1])
2131
+ Y = (C[0], 0)
2132
+
2133
+ for p in range(max_p_A):
2134
+
2135
+ q_count = 0
2136
+ for Z_raw in combinations(search_A, p):
2137
+
2138
+ q_count = q_count + 1
2139
+ if q_count > self.max_q_global:
2140
+ break
2141
+
2142
+ # Prepare the conditioning set
2143
+ Z = {node for node in Z_raw if node != X and node != Y}
2144
+ Z = Z.union(Z_add)
2145
+
2146
+ # Test conditional independence of X and Y given Z
2147
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
2148
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z),
2149
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
2150
+
2151
+ if self.verbosity >= 2:
2152
+ # print("BinSepSetAC(A): %s _|_ %s | Z = %s: val = %.2f / pval = % .4f" %
2153
+ # (X, Y, ' '.join([str(z) for z in list(Z)]), val, pval))
2154
+ print("BinSepSetAC(A): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" %
2155
+ (X, Y, ' '.join([str(z) for z in Z_add]), ' '.join([str(z) for z in {node for node in Z_raw if node != X and node != Y}]), val, pval))
2156
+
2157
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic
2158
+ # values and conditioning set cardinalities
2159
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z))
2160
+
2161
+ # Check whether test result was significant
2162
+ if not dependent: # pval > self.pc_alpha:
2163
+ all_sepsets.add(frozenset(Z))
2164
+
2165
+ # Test for independence given all subsets of non-future adjacencies of C
2166
+ for p in range(min(len(search_C), self.max_p_global) + 1):
2167
+
2168
+ q_count = 0
2169
+ for Z_raw in combinations(search_C, p):
2170
+
2171
+ q_count = q_count + 1
2172
+ if q_count > self.max_q_global:
2173
+ break
2174
+
2175
+ # Prepare the conditioning set
2176
+ Z = {node for node in Z_raw if node != X and node != Y}
2177
+ Z = Z.union(Z_add)
2178
+
2179
+ # Test conditional independence of X and Y given Z
2180
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z), tau_max = self.tau_max)
2181
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z),
2182
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
2183
+
2184
+ if self.verbosity >= 2:
2185
+ # print("BinSepSetAC(C): %s _|_ %s | Z = %s: val = %.2f / pval = % .4f" %
2186
+ # (X, Y, ' '.join([str(z) for z in list(Z)]), val, pval))
2187
+ print("BinSepSetAC(C): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" %
2188
+ (X, Y, ' '.join([str(z) for z in Z_add]), ' '.join([str(z) for z in {node for node in Z_raw if node != X and node != Y}]), val, pval))
2189
+
2190
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic
2191
+ # values and conditioning set cardinalities
2192
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z))
2193
+
2194
+ # Check whether test result was significant
2195
+ if not dependent: # pval > self.pc_alpha:
2196
+ all_sepsets.add(frozenset(Z))
2197
+
2198
+ # Append the already known sepset
2199
+ all_sepsets = all_sepsets.union({Z for (Z, _) in self._get_sepsets(X, Y)})
2200
+
2201
+ # Count number of sepsets and number of sepsets that contain B
2202
+ n_sepsets = len(all_sepsets)
2203
+ n_sepsets_with_B = len([1 for Z in all_sepsets if (B[0], B[1] - C[1]) in Z])
2204
+
2205
+ return True if 2*n_sepsets_with_B > n_sepsets else False
2206
+
2207
+
2208
+ def _get_parents(self, A, B):
2209
+ """Return all known parents of all nodes in node_list"""
2210
+
2211
+ if self.parents_of_lagged or A[1] == B[1]:
2212
+
2213
+ out = {(var, lag + A[1]) for ((var, lag), link) in self.graph_dict[A[0]].items() if len(link) > 0 and link[0] == "-" and lag + A[1] >= -self.tau_max}
2214
+ return out.union({(var, lag + B[1]) for ((var, lag), link) in self.graph_dict[B[0]].items() if len(link) > 0 and link[0] == "-" and lag + B[1] >= -self.tau_max})
2215
+
2216
+ else:
2217
+ if A[1] < B[1]:
2218
+ return {(var, lag + B[1]) for ((var, lag), link) in self.graph_dict[B[0]].items() if len(link) > 0 and link[0] == "-" and lag + B[1] >= -self.tau_max}
2219
+ else:
2220
+ return {(var, lag + A[1]) for ((var, lag), link) in self.graph_dict[A[0]].items() if len(link) > 0 and link[0] == "-" and lag + A[1] >= -self.tau_max}
2221
+
2222
+
2223
+ def _apply_middle_mark(self, X, Y, char):
2224
+ """Update the middle mark on the link between X and Y with the character char"""
2225
+
2226
+ # Get the old link
2227
+ old_link = self._get_link(X, Y)
2228
+
2229
+ # Determine the new link
2230
+ if old_link[1] == "?":
2231
+ new_link = old_link[0] + char + old_link[2]
2232
+ elif (old_link[1] == "L" and char == "R") or (old_link[1] == "R" and char == "L"):
2233
+ new_link = old_link[0] + "!" + old_link[2]
2234
+ else:
2235
+ assert False
2236
+
2237
+ # Write the new link
2238
+ self._write_link(X, Y, new_link, verbosity = self.verbosity)
2239
+
2240
+ # Return
2241
+ return True
2242
+
2243
+
2244
+ def _update_middle_marks(self):
2245
+ """Apply rule MMR"""
2246
+
2247
+ if self.verbosity >= 1:
2248
+ print("\nMiddle mark updates\n")
2249
+
2250
+ # Run through all links
2251
+ for j in range(self.N):
2252
+ for ((i, lag_i), link) in self.graph_dict[j].items():
2253
+
2254
+ if link == "":
2255
+ continue
2256
+
2257
+ X = (i, lag_i)
2258
+ Y = (j, 0)
2259
+
2260
+ # Apply above rule for A = X and B = Y
2261
+ link_XY = self._get_link(X, Y)
2262
+ smaller_XY = self._is_smaller(X, Y)
2263
+
2264
+ if link_XY[2] == ">":
2265
+
2266
+ if link_XY[1] == "?":
2267
+ if smaller_XY:
2268
+ new_link = link_XY[0] + "L>"
2269
+ else:
2270
+ new_link = link_XY[0] + "R>"
2271
+
2272
+ self._write_link(X, Y, new_link, verbosity = self.verbosity)
2273
+
2274
+ elif (link_XY[1] == "R" and smaller_XY) or (link_XY[1] == "L" and not smaller_XY):
2275
+
2276
+ new_link = link_XY[0] + "!>"
2277
+
2278
+ self._write_link(X, Y, new_link, verbosity = self.verbosity)
2279
+
2280
+
2281
+ # Apply above rule for A = Y and B = X
2282
+ link_YX = self._get_link(Y, X)
2283
+ smaller_YX = self._is_smaller(Y, X)
2284
+
2285
+ if link_YX[2] == ">":
2286
+
2287
+ if link_YX[1] == "?":
2288
+ if smaller_YX:
2289
+ new_link = link_YX[0] + "L>"
2290
+ else:
2291
+ new_link = link_YX[0] + "R>"
2292
+
2293
+ self._write_link(Y, X, new_link, verbosity = self.verbosity)
2294
+
2295
+
2296
+ elif (link_YX[1] == "R" and smaller_YX) or (link_YX[1] == "L" and not smaller_YX):
2297
+
2298
+ new_link = link_YX[0] + "!>"
2299
+
2300
+ self._write_link(Y, X, new_link, verbosity = self.verbosity)
2301
+
2302
+ def _is_smaller(self, X, Y):
2303
+ """
2304
+ A node X is said to be smaller than node Y if
2305
+ i) X is before Y or
2306
+ ii) X and Y are contemporaneous and the variable index of X is smaller than that of Y.
2307
+
2308
+ Return True if X is smaller than Y, else return False
2309
+ """
2310
+
2311
+ return (X[1] < Y [1]) or (X[1] == Y[1] and X[0] < Y[0])
2312
+
2313
+
2314
+ def _get_a_pds_t(self, A, B):
2315
+ """Return the set a_pds_t(A, B)"""
2316
+
2317
+ # Unpack A and assert that A is at lag 0
2318
+ var_A, lag_A = A
2319
+
2320
+ # Compute a_pds_t(A, B) according to the current graph
2321
+ return {(var, lag + lag_A)
2322
+ # W = (var, lag) is in a_pds_t(A, B) if ...
2323
+ for ((var, lag), link) in self.graph_dict[var_A].items()
2324
+ # ... it is a non-future adjacency of A
2325
+ if len(link) > 0
2326
+ # ... and it is not B
2327
+ and (var, lag + lag_A) != B
2328
+ # ... it is not before t - self.tau_max
2329
+ and lag + lag_A >= -self.tau_max
2330
+ # ... and it is not a definite non-ancestor of A
2331
+ and link[0] != "<"
2332
+ }
2333
+
2334
+
2335
+ def _get_ancs(self, node_list):
2336
+ """Return the currently known set of ancestors of all nodes in the list node_list. The nodes are not required to be at
2337
+ lag 0"""
2338
+
2339
+ # Build the output set
2340
+ out = set()
2341
+
2342
+ # Run through all nodes
2343
+ for A in node_list:
2344
+ # Unpack the node
2345
+ (var_A, lag_A) = A
2346
+ # Add the ancestors of node to out
2347
+ out = out.union({(var, lag + lag_A) for (var, lag) in self.def_ancs[var_A] if lag + lag_A >= - self.tau_max})
2348
+
2349
+ # Return
2350
+ return out
2351
+
2352
+
2353
+ def _get_non_ancs(self, node_list):
2354
+ """Return the currently known set of non-ancestors of all nodes in the list node_list. The nodes are not required to be
2355
+ at lag 0"""
2356
+
2357
+ # Build the output set
2358
+ out = set()
2359
+
2360
+ # Run through all nodes
2361
+ for A in node_list:
2362
+ # Unpack the node
2363
+ (var_A, lag_A) = A
2364
+ # Add the ancestors of node to out
2365
+ out = out.union({(var, lag + lag_A) for (var, lag) in self.def_non_ancs[var_A] if lag + lag_A >= - self.tau_max})
2366
+
2367
+ # Return
2368
+ return out
2369
+
2370
+
2371
+ def _fix_all_edges(self):
2372
+ """Remove all non-trivial orientations"""
2373
+
2374
+ for j in range(self.N):
2375
+ for (i, lag_i) in self.graph_dict[j].keys():
2376
+
2377
+ link = self._get_link((i, lag_i), (j, 0))
2378
+ if len(link) > 0:
2379
+ new_link = link[0] + "-" + link[2]
2380
+ self.graph_dict[j][(i, lag_i)] = new_link
2381
+
2382
+ ########################################################################################################################
2383
+ ########################################################################################################################
2384
+ ########################################################################################################################
2385
+
2386
+ def _apply_APR(self, only_lagged):
2387
+ """Return all orientations implied by orientation rule APR"""
2388
+
2389
+ # Build the output list
2390
+ out = []
2391
+
2392
+ if self.no_apr > 0:
2393
+ return out
2394
+
2395
+ # Get and run through all relevant graphical structures
2396
+ for j in range(self.N):
2397
+ for (i, lag_i) in self.graph_dict[j]:
2398
+
2399
+ A = (i, lag_i)
2400
+ B = (j, 0)
2401
+
2402
+ if only_lagged and lag_i == 0:
2403
+ continue
2404
+
2405
+ # Get the link from A to B
2406
+ link_AB = self._get_link(A, B)
2407
+
2408
+ if self._match_link(pattern='-!>', link=link_AB) \
2409
+ or (self._match_link(pattern='-R>', link=link_AB) and self._is_smaller(A, B)) \
2410
+ or (self._match_link(pattern='-L>', link=link_AB) and self._is_smaller(B, A)):
2411
+
2412
+ # Write the new link from A to B to the output list
2413
+ out.append(self._get_pair_key_and_new_link(A, B, "-->"))
2414
+
2415
+ # Return the output list
2416
+ return out
2417
+
2418
+ def _apply_ER01(self, only_lagged):
2419
+ """Return all orientations implied by orientation rule R1^prime"""
2420
+
2421
+ # Build the output list
2422
+ out = []
2423
+
2424
+ # Find all graphical structures that the rule applies to
2425
+ all_appropriate_triples = self._find_triples(pattern_ij='**>', pattern_jk='o*+', pattern_ik='')
2426
+
2427
+ # Run through all appropriate graphical structures
2428
+ for (A, B, C) in all_appropriate_triples:
2429
+
2430
+ if only_lagged and B[1] == C[1]:
2431
+ continue
2432
+
2433
+ if self.verbosity >= 2:
2434
+ print("ER01: ", (A, B, C))
2435
+
2436
+ # Check whether the rule applies
2437
+ if self._B_in_SepSet_AC(A, B, C):
2438
+
2439
+ if self.verbosity >= 2:
2440
+ print(" --> in sepset ")
2441
+
2442
+ # Prepare the new link from B to C and append it to the output list
2443
+ link_BC = self._get_link(B, C)
2444
+ new_link_BC = "-" + link_BC[1] + ">"
2445
+ out.append(self._get_pair_key_and_new_link(B, C, new_link_BC))
2446
+
2447
+ # Return the output list
2448
+ return out
2449
+
2450
+ def _apply_ER02(self, only_lagged):
2451
+ """Return all orientations implied by orientation rule R2^prime"""
2452
+
2453
+ # Build the output list
2454
+ out = []
2455
+
2456
+ # Find all graphical structures that the rule applies to
2457
+ all_appropriate_triples = set(self._find_triples(pattern_ij='-*>', pattern_jk='**>', pattern_ik='+*o'))
2458
+ all_appropriate_triples = all_appropriate_triples.union(set(self._find_triples(pattern_ij='**>', pattern_jk='-*>', pattern_ik='+*o')))
2459
+
2460
+ # Run through all appropriate graphical structures
2461
+ for (A, B, C) in all_appropriate_triples:
2462
+
2463
+ if only_lagged and A[1] == C[1]:
2464
+ continue
2465
+
2466
+ # The rule applies to all relevant graphical structures. Therefore, prepare the new link and append it to the output list
2467
+ link_AC = self._get_link(A, C)
2468
+ new_link_AC = link_AC[0] + link_AC[1] + ">"
2469
+ out.append(self._get_pair_key_and_new_link(A, C, new_link_AC))
2470
+
2471
+ # print("Rule 2", A, self._get_link(A, B), B, self._get_link(B, C), C, self._get_link(A, C), new_link_AC)
2472
+
2473
+ # Return the output list
2474
+ return out
2475
+
2476
+
2477
+ def _apply_ER03(self, only_lagged):
2478
+ """Return all orientations implied by orientation rule R3^prime"""
2479
+
2480
+ # Build the output list
2481
+ out = []
2482
+
2483
+ # Find all graphical structures that the rule applies to
2484
+ all_appropriate_quadruples = self._find_quadruples(pattern_ij='**>', pattern_jk='<**', pattern_ik='',
2485
+ pattern_il='+*o', pattern_jl='o*+', pattern_kl='+*o')
2486
+
2487
+ # Run through all appropriate graphical structures
2488
+ for (A, B, C, D) in all_appropriate_quadruples:
2489
+
2490
+ if only_lagged and B[1] == D[1]:
2491
+ continue
2492
+
2493
+ # Check whether the rule applies
2494
+ if self._B_in_SepSet_AC(A, D, C):
2495
+
2496
+ # Prepare the new link from D to B and append it to the output list
2497
+ link_DB = self._get_link(D, B)
2498
+ new_link_DB = link_DB[0] + link_DB[1] + ">"
2499
+ out.append(self._get_pair_key_and_new_link(D, B, new_link_DB))
2500
+
2501
+ # Return the output list
2502
+ return out
2503
+
2504
+
2505
+ def _apply_R04(self, only_lagged):
2506
+ """Return all orientations implied by orientation rule R4 (standard FCI rule)"""
2507
+
2508
+ # Build the output list
2509
+ out = []
2510
+
2511
+ # Find all relevant triangles W-V-Y
2512
+ all_appropriate_triples = self._find_triples(pattern_ij='<-*', pattern_jk='o-+', pattern_ik='-->')
2513
+
2514
+ # Run through all of these triangles
2515
+ for triple in all_appropriate_triples:
2516
+
2517
+ (W, V, Y) = triple
2518
+
2519
+ if only_lagged and (V[1] == Y[1] and W[1] == V[1]):
2520
+ continue
2521
+
2522
+ # Get the current link from W to V, which we will need below
2523
+ link_WV = self._get_link(W, V)
2524
+
2525
+ # Find all discriminating paths for this triangle
2526
+ # Note: To guarantee order independence, we check all discriminating paths. Alternatively, we could check the rule for all
2527
+ # shortest such paths
2528
+ discriminating_paths = self._get_R4_discriminating_paths(triple, max_length = np.inf)
2529
+
2530
+ # Run through all discriminating paths
2531
+ for path in discriminating_paths:
2532
+
2533
+ # Get the end point node
2534
+ X_1 = path[-1]
2535
+
2536
+ # Check which of the two cases of the rule we are in, then append the appropriate new links to the output list
2537
+ if self._B_in_SepSet_AC(X_1, V, Y):
2538
+ # New link from V to Y
2539
+ out.append(self._get_pair_key_and_new_link(V, Y, "-->"))
2540
+
2541
+ elif link_WV != "<-x" and self._B_not_in_SepSet_AC(X_1, V, Y):
2542
+ # New link from V to Y
2543
+ out.append(self._get_pair_key_and_new_link(V, Y, "<->"))
2544
+
2545
+ # If needed, also the new link from W to V
2546
+ if link_WV != "<->":
2547
+ out.append(self._get_pair_key_and_new_link(W, V, "<->"))
2548
+
2549
+ # Return the output list
2550
+ return out
2551
+
2552
+
2553
+ def _apply_ER08(self, only_lagged):
2554
+ """Return all orientations implied by orientation rule R8^prime"""
2555
+
2556
+ # Build the output list
2557
+ out = []
2558
+
2559
+ # Find all graphical structures that the rule applies to
2560
+ all_appropriate_triples = self._find_triples(pattern_ij='-*>', pattern_jk='-*>', pattern_ik='o*+')
2561
+
2562
+ # Run through all appropriate graphical structures
2563
+ for (A, B, C) in all_appropriate_triples:
2564
+
2565
+ if only_lagged and A[1] == C[1]:
2566
+ continue
2567
+
2568
+ # The rule applies to all relevant graphical structures. Therefore, prepare the new link and append it to the output list
2569
+ link_AC = self._get_link(A, C)
2570
+ new_link_AC = "-" + link_AC[1] + ">"
2571
+ out.append(self._get_pair_key_and_new_link(A, C, new_link_AC))
2572
+
2573
+ #print("Rule 8:", A, self._get_link(A, B), B, self._get_link(B, C), C, link_AC, new_link_AC)
2574
+
2575
+ # Return the output list
2576
+ return out
2577
+
2578
+
2579
+ def _apply_ER09(self, only_lagged):
2580
+ """Return all orientations implied by orientation rule R9^prime"""
2581
+
2582
+ # Build the output list
2583
+ out = []
2584
+
2585
+ # Find unshielded triples B_1 o--*--o A o--*--> C or B_1 <--*--o A o--*--> C or B_1 <--*-- A o--*--> C
2586
+ all_appropriate_triples = set(self._find_triples(pattern_ij='o*o', pattern_jk='o*>', pattern_ik=''))
2587
+ all_appropriate_triples = all_appropriate_triples.union(set(self._find_triples(pattern_ij='<*o', pattern_jk='o*>', pattern_ik='')))
2588
+ all_appropriate_triples = all_appropriate_triples.union(set(self._find_triples(pattern_ij='<*-', pattern_jk='o*>', pattern_ik='')))
2589
+
2590
+ # Run through all these triples
2591
+ for (B_1, A, C) in all_appropriate_triples:
2592
+
2593
+ if only_lagged and A[1] == C[1]:
2594
+ continue
2595
+
2596
+ # Check whether A is in SepSet(B_1, C), else the rule does not apply
2597
+ if not self._B_in_SepSet_AC(B_1, A, C):
2598
+ continue
2599
+
2600
+ # Although we do not yet know whether the rule applies, we here determine the new form of the link from A to C if the rule
2601
+ # does apply
2602
+ link_AC = self._get_link(A, C)
2603
+ new_link_AC = "-" + link_AC[1] + ">"
2604
+ pair_key, new_link = self._get_pair_key_and_new_link(A, C, new_link_AC)
2605
+
2606
+ # For the search of uncovered potentially directed paths from B_1 to C, determine the initial pattern as dictated by the link
2607
+ # from A to B_1
2608
+ first_link = self._get_link(A, B_1)
2609
+ if self._match_link(pattern='o*o', link=first_link):
2610
+ initial_allowed_patterns = ['-*>', 'o*>', 'o*o']
2611
+ elif self._match_link(pattern='o*>', link=first_link) or self._match_link(pattern='-*>', link=first_link):
2612
+ initial_allowed_patterns = ['-*>']
2613
+
2614
+ # Return all uncovered potentially directed paths from B_1 to C
2615
+ #uncovered_pd_paths = self._find_potentially_directed_paths(B_1, C, initial_allowed_patterns, return_if_any_path_found = False,
2616
+ # uncovered=True, reduce_allowed_patterns=True, max_length = np.inf)
2617
+
2618
+ # Find all uncovered potentially directed paths from B_1 to C
2619
+ uncovered_pd_paths = self._get_potentially_directed_uncovered_paths(B_1, C, initial_allowed_patterns)
2620
+
2621
+ # Run through all of these paths and check i) whether the node adjacent to B_1 is non-adjacent to A, ii) whether condition iv) of
2622
+ # the rule antecedent is true. If there is any such path, then the link can be oriented
2623
+ for upd_path in uncovered_pd_paths:
2624
+
2625
+ # Is the node adjacent to B_1 non-adjacent to A (this implies that there are at least three nodes on the path, because else the
2626
+ # node adjacent to B_1 is C) and is A not part of the path?
2627
+ if len(upd_path) < 3 or A in upd_path or self._get_link(A, upd_path[1]) != "":
2628
+ continue
2629
+
2630
+ # If the link from A to B_1 is into B_1, condition iv) is true
2631
+ if first_link[2] == ">":
2632
+ # Mark the link from A to C for orientation, break the for loop to continue with the next triple
2633
+ out.append((pair_key, new_link))
2634
+ break
2635
+
2636
+ # If the link from A to B_1 is not in B_1, we need to check whether B_1 is in SepSet(A, X) where X is the node on upd_path next
2637
+ # to B_1
2638
+ if not self._B_in_SepSet_AC(A, B_1, upd_path[1]):
2639
+ # Continue with the next upd_path
2640
+ continue
2641
+
2642
+ # Now check whether rule iv) for all triples on upd_path
2643
+ path_qualifies = True
2644
+ for i in range(len(upd_path) - 2):
2645
+ # We consider the unshielded triples upd_path[i] - upd_path[i+1] - upd_path[i+2]
2646
+
2647
+ # If the link between upd_path[i] and upd_path[i+1] is into the latter, condition iv) is true
2648
+ left_link = self._get_link(upd_path[i], upd_path[i+1])
2649
+ if left_link[2] == ">":
2650
+ # The path qualifies, break the inner for loop
2651
+ break
2652
+
2653
+ # If not, then we need to continue with checking whether upd_path[i+1] in SepSet(upd_path[i+1], upd_path[i+2])
2654
+ if not self._B_in_SepSet_AC(upd_path[i], upd_path[i+1], upd_path[i+2]):
2655
+ # The path does not qualifying, break the inner for loop
2656
+ path_qualifies = False
2657
+ break
2658
+
2659
+ # The path qualifies, mark the edge from A to C for orientation and break the outer for loop to continue with the next triple
2660
+ if path_qualifies:
2661
+ out.append((pair_key, new_link))
2662
+ break
2663
+
2664
+ # The path does not qualify, continue with the next upd_path
2665
+
2666
+ # end for upd_path in uncovered_pd_paths
2667
+ # end for (B_1, A, C) in all_appropriate_triples
2668
+
2669
+ # Return the output list
2670
+ return out
2671
+
2672
+
2673
+ def _apply_ER10(self, only_lagged):
2674
+ """Return all orientations implied by orientation rule R10^prime"""
2675
+
2676
+ # Build the output list
2677
+ out = []
2678
+
2679
+ # Find all triples A o--> C <-- P_C
2680
+ all_appropriate_triples = set(self._find_triples(pattern_ij='o*>', pattern_jk='<*-', pattern_ik=''))
2681
+ all_appropriate_triples = all_appropriate_triples.union(set(self._find_triples(pattern_ij='o*>', pattern_jk='<*-', pattern_ik='***')))
2682
+
2683
+ # Collect all triples for the given pair (A, C)
2684
+ triple_sorting_dict = {}
2685
+ for (A, C, P_C) in all_appropriate_triples:
2686
+ if triple_sorting_dict.get((A, C)) is None:
2687
+ triple_sorting_dict[(A, C)] = [P_C]
2688
+ else:
2689
+ triple_sorting_dict[(A, C)].append(P_C)
2690
+
2691
+
2692
+ # Run through all (A, C) pairs
2693
+ for (A, C) in triple_sorting_dict.keys():
2694
+
2695
+ if only_lagged and A[1] == C[1]:
2696
+ continue
2697
+
2698
+ # Find all uncovered potentially directed paths from A to C through any of the P_C nodes
2699
+ relevant_paths = []
2700
+ for P_C in triple_sorting_dict[(A, C)]:
2701
+ for upd_path in self._get_potentially_directed_uncovered_paths(A, P_C, ['-*>', 'o*>', 'o*o']):
2702
+
2703
+ # Run through all of these paths and check i) whether the second to last element is not adjacent to C (this requires it to
2704
+ # have a least three nodes, because else the second to last element would be A) and ii) whether the left edge of any 3-node
2705
+ # sub-path is into the middle nor or, if not, whether the middle node is in the separating set of the two end-point nodes
2706
+ # (of the 3-node) sub-path and iii) whether C is not element of the path. If path meets these conditions, add its second node
2707
+ # (the adjacent to A) to the set second_nodes
2708
+
2709
+ if len(upd_path) < 3 or C in upd_path or self._get_link(upd_path[-2], C) != "":
2710
+ continue
2711
+
2712
+ upd_path.append(C)
2713
+
2714
+ path_qualifies = True
2715
+ for i in range(len(upd_path) - 2):
2716
+ # We consider the unshielded triples upd_path[i] - upd_path[i+1] - upd_path[i+2]
2717
+
2718
+ # If the link between upd_path[i] and upd_path[i+1] is into the latter, the path qualifies
2719
+ left_link = self._get_link(upd_path[i], upd_path[i+1])
2720
+ if left_link[2] == ">":
2721
+ # The path qualifies, break the inner for loop
2722
+ break
2723
+
2724
+ # If not, then we need to continue with checking whether upd_path[i+1] in SepSet(upd_path[i+1], upd_path[i+2])
2725
+ if not self._B_in_SepSet_AC(upd_path[i], upd_path[i+1], upd_path[i+2]):
2726
+ # The path does not qualify, break the inner for loop
2727
+ path_qualifies = False
2728
+ break
2729
+
2730
+ # The path qualifies, add upd_path[i] to second_nodes and continue with the next upd_path
2731
+ if path_qualifies:
2732
+ relevant_paths.append(upd_path)
2733
+
2734
+ # The path does not qualify, continue with the next upd_path
2735
+
2736
+ # end for path in self._get_potentially_directed_uncovered_paths(A, P_C, ['-*>', 'o*>', 'o*o'])
2737
+ # end for P_C in triple_sorting_dict[(A, C)]
2738
+
2739
+ # Find all second nodes on the relevant paths
2740
+ second_nodes = list({path[1] for path in relevant_paths})
2741
+
2742
+ # Check whether there is any pair of non-adjacent nodes in second_nodes, such that A is in their separating set. If yes, mark the link
2743
+ # from A to C for orientation
2744
+ for i, j in product(range(len(second_nodes)), range(len(second_nodes))):
2745
+
2746
+ if i < j and self._get_link(second_nodes[i], second_nodes[j]) == "" and self._B_in_SepSet_AC(second_nodes[i], A, second_nodes[j]):
2747
+ # Append new link and break the for loop
2748
+ link_AC = self._get_link(A, C)
2749
+ new_link_AC = "-" + link_AC[1] + ">"
2750
+ out.append(self._get_pair_key_and_new_link(A, C, new_link_AC))
2751
+ break
2752
+
2753
+ # end for (A, C) in triple_sorting_dict.keys()
2754
+
2755
+ # Return the output list
2756
+ return out
2757
+
2758
+
2759
+ def _apply_ER00a(self, only_lagged):
2760
+ """Return all orientations implied by orientation rule R0^prime a"""
2761
+
2762
+ # Build the output list
2763
+ out = []
2764
+
2765
+ # Find all graphical structures that the rule applies to
2766
+ all_appropriate_triples = self._find_triples(pattern_ij='***', pattern_jk='***', pattern_ik='')
2767
+
2768
+ # Run through all appropriate graphical structures
2769
+ for (A, B, C) in all_appropriate_triples:
2770
+
2771
+ # Unpack A, B, C
2772
+ (i, lag_i) = A
2773
+ (j, lag_j) = B
2774
+ (k, lag_k) = C
2775
+
2776
+ if only_lagged and (A[1] == B[1] or B[1] == C[1]):
2777
+ continue
2778
+
2779
+ # Get all weakly minimal separating sets in SepSet(A, C)
2780
+ # Remark: The non weakly minimal separating sets may be larger, that's why we disfavor them
2781
+ sepsets = self._get_sepsets(A, C)
2782
+ sepsets = {Z for (Z, status) in sepsets if status == "wm"}
2783
+
2784
+ ###################################################################################
2785
+ ### Part 1) of the rule ###########################################################
2786
+
2787
+ remove_AB = False
2788
+ link_AB = self._get_link(A, B)
2789
+
2790
+ # i) Middle mark must not be "x" or "-"
2791
+ if link_AB[1] not in ['-', 'x']:
2792
+ # Test A indep B given union(SepSet(A, C), intersection(def-anc(B), adj(B))) setminus{A, B} setminus{future of both A and B}
2793
+
2794
+ # Conditioning on parents
2795
+ Z_add = self._get_parents(A, B).difference({A, B})
2796
+
2797
+ # Shift the lags appropriately
2798
+ if lag_i <= lag_j:
2799
+ X = (i, lag_i - lag_j) # A shifted
2800
+ Y = (j, 0) # B shifted
2801
+ delta_lag = lag_j
2802
+
2803
+ else:
2804
+ X = (j, lag_j - lag_i) # B shifted
2805
+ Y = (i, 0) # A shifted
2806
+ delta_lag = lag_i
2807
+
2808
+ # Run through all weakly minimal separating sets of A and C
2809
+ for Z in sepsets:
2810
+
2811
+ # Construct the conditioning set to test
2812
+ Z_test = Z.union(Z_add).difference({A, B})
2813
+ Z_test = {(var, lag - delta_lag) for (var, lag) in Z_test if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max}
2814
+ Z_add2 = {(var, lag - delta_lag) for (var, lag) in Z_add.difference({A, B}) if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max}
2815
+
2816
+ # Test conditional independence of X and Y given Z
2817
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z_test), tau_max = self.tau_max)
2818
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z_test),
2819
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
2820
+
2821
+ if self.verbosity >= 2:
2822
+ # print("ER00a(part1): %s _|_ %s | Z_test = %s: val = %.2f / pval = % .4f" %
2823
+ # (X, Y, ' '.join([str(z) for z in list(Z_test)]), val, pval))
2824
+ print("ER00a(part1): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" %
2825
+ (X, Y, ' '.join([str(z) for z in Z_add2]), ' '.join([str(z) for z in Z_test]), val, pval))
2826
+
2827
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic values and
2828
+ # conditioning set cardinalities
2829
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z_test))
2830
+
2831
+ # Check whether test result was significant
2832
+ if not dependent: # pval > self.pc_alpha:
2833
+
2834
+ # Mark the edge from X to Y for removal and save sepset
2835
+ remove_AB = True
2836
+ self._save_sepset(X, Y, (frozenset(Z_test), "nwm"))
2837
+
2838
+ if remove_AB:
2839
+
2840
+ # Remember the edge for removal
2841
+ pair_key, new_link = self._get_pair_key_and_new_link(A, B, "")
2842
+ out.append((pair_key, new_link))
2843
+
2844
+ ###################################################################################
2845
+ ### Part 2) of the rule ###########################################################
2846
+
2847
+ remove_CB = False
2848
+ link_CB = self._get_link(C, B)
2849
+
2850
+ # i) Middle mark must not be "x" or "-"
2851
+ if link_CB[1] not in ['-', 'x']:
2852
+ # Test C indep B given union(SepSet(A, C), intersection(def-anc(B), adj(B))) setminus{A, B} setminus{future of both C and B}
2853
+
2854
+ # Conditioning on parents
2855
+ Z_add = self._get_parents(C, B).difference({C, B})
2856
+
2857
+ # Shift the lags appropriately
2858
+ if lag_k <= lag_j:
2859
+ X = (k, lag_k - lag_j)
2860
+ Y = (j, 0)
2861
+ delta_lag = lag_j
2862
+ else:
2863
+ X = (j, lag_j - lag_k)
2864
+ Y = (k, 0)
2865
+ delta_lag = lag_k
2866
+
2867
+ # Run through all weakly minimal separating sets of A and C
2868
+ for Z in sepsets:
2869
+
2870
+ # Construct the conditioning set to test
2871
+ Z_test = Z.union(Z_add).difference({C, B})
2872
+ Z_test = {(var, lag - delta_lag) for (var, lag) in Z_test if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max}
2873
+ Z_add2 = {(var, lag - delta_lag) for (var, lag) in Z_add.difference({A, B}) if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max}
2874
+
2875
+ # Test conditional independence of X and Y given Z
2876
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z_test), tau_max = self.tau_max)
2877
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z_test),
2878
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
2879
+
2880
+ if self.verbosity >= 2:
2881
+ # print("ER00a(part2): %s _|_ %s | Z_test = %s: val = %.2f / pval = % .4f" %
2882
+ # (X, Y, ' '.join([str(z) for z in list(Z_test)]), val, pval))
2883
+ print("ER00a(part2): %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" %
2884
+ (X, Y, ' '.join([str(z) for z in Z_add2]), ' '.join([str(z) for z in Z_test]), val, pval))
2885
+
2886
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic values and
2887
+ # conditioning set cardinalities
2888
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z_test))
2889
+
2890
+ # Check whether test result was significant
2891
+ if not dependent: # pval > self.pc_alpha:
2892
+
2893
+ # Mark the edge from X to Y for removal and save sepset
2894
+ remove_CB = True
2895
+ self._save_sepset(X, Y, (frozenset(Z_test), "nwm"))
2896
+
2897
+ if remove_CB:
2898
+
2899
+ # Remember the edge for removal
2900
+ pair_key, new_link = self._get_pair_key_and_new_link(C, B, "")
2901
+ out.append((pair_key, new_link))
2902
+
2903
+ ###################################################################################
2904
+ ### Part 3) of the rule ###########################################################
2905
+
2906
+ if remove_AB or remove_CB or link_AB[2] in ["-", "x"] or link_CB[2] in ["-", "x"] or link_AB[1] == "x" or link_CB[1] == "x" or (link_AB[2] == ">" and link_CB[2] == ">"):
2907
+ continue
2908
+
2909
+ if self._B_not_in_SepSet_AC(A, B, C):
2910
+
2911
+ # Prepare the new links and save them to the output
2912
+ if link_AB[2] != ">":
2913
+ new_link_AB = link_AB[0] + link_AB[1] + ">"
2914
+ out.append(self._get_pair_key_and_new_link(A, B, new_link_AB))
2915
+
2916
+ new_link_CB = link_CB[0] + link_CB[1] + ">"
2917
+ if link_CB[2] != ">":
2918
+ out.append(self._get_pair_key_and_new_link(C, B, new_link_CB))
2919
+
2920
+ # end for (A, B, C) in all_appropriate_triples
2921
+
2922
+ # Return the output list
2923
+ return out
2924
+
2925
+
2926
+ def _apply_ER00b(self, only_lagged):
2927
+ """Return all orientations implied by orientation rule R0^prime b"""
2928
+
2929
+ # Build the output list
2930
+ out = []
2931
+
2932
+ # Find all graphical structures that the rule applies to
2933
+ triples_1 = self._find_triples(pattern_ij='**>', pattern_jk='o!+', pattern_ik='')
2934
+ triples_2 = [trip for trip in self._find_triples(pattern_ij='**>', pattern_jk='oR+', pattern_ik='') if self._is_smaller(trip[1], trip[2])]
2935
+ triples_3 = [trip for trip in self._find_triples(pattern_ij='**>', pattern_jk='oL+', pattern_ik='') if self._is_smaller(trip[2], trip[1])]
2936
+ all_appropriate_triples = set(triples_1).union(set(triples_2), set(triples_3))
2937
+
2938
+ # Run through all appropriate graphical structures
2939
+ for (A, B, C) in all_appropriate_triples:
2940
+
2941
+ # Unpack A, B, C
2942
+ (i, lag_i) = A
2943
+ (j, lag_j) = B
2944
+ (k, lag_k) = C
2945
+
2946
+ if only_lagged and A[1] == B[1]:
2947
+ continue
2948
+
2949
+ # Get all weakly minimal separating sets in SepSet(A, C)
2950
+ # Remark: The non weakly minimal separating sets may be larger, that's why we disfavor them
2951
+ sepsets = self._get_sepsets(A, C)
2952
+ sepsets = {Z for (Z, status) in sepsets if status == "wm"}
2953
+
2954
+ ###################################################################################
2955
+ ### Part 1) of the rule ###########################################################
2956
+
2957
+ remove_AB = False
2958
+ link_AB = self._get_link(A, B)
2959
+
2960
+ # i) Middle mark must not be "x" or "-"
2961
+ if link_AB[1] not in ['-', 'x']:
2962
+ # Test A indep B given union(SepSet(A, C), intersection(def-anc(B), adj(B))) setminus{A, B} setminus{future of both A and B}
2963
+
2964
+ # Conditioning on parents
2965
+ Z_add = self._get_parents(A, B).difference({A, B})
2966
+
2967
+ # Shift the lags appropriately
2968
+ if lag_i <= lag_j:
2969
+ X = (i, lag_i - lag_j)
2970
+ Y = (j, 0)
2971
+ delta_lag = lag_j
2972
+ else:
2973
+ X = (j, lag_j - lag_i)
2974
+ Y = (i, 0)
2975
+ delta_lag = lag_i
2976
+
2977
+ # Run through all weakly minimal separating sets of A and C
2978
+ for Z in sepsets:
2979
+
2980
+ # Construct the conditioning set to test
2981
+ Z_test = Z.union(Z_add).difference({A, B})
2982
+ Z_test = {(var, lag - delta_lag) for (var, lag) in Z_test if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max}
2983
+ Z_add2 = {(var, lag - delta_lag) for (var, lag) in Z_add.difference({A, B}) if lag - delta_lag <= 0 and lag - delta_lag >= -self.tau_max}
2984
+
2985
+ # Test conditional independence of X and Y given Z
2986
+ # val, pval = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z_test), tau_max = self.tau_max)
2987
+ val, pval, dependent = self.cond_ind_test.run_test(X = [X], Y = [Y], Z = list(Z_test),
2988
+ tau_max = self.tau_max, alpha_or_thres=self.pc_alpha)
2989
+
2990
+ if self.verbosity >= 2:
2991
+ # print("ER00b: %s _|_ %s | Z_test = %s: val = %.2f / pval = % .4f" %
2992
+ # (X, Y, ' '.join([str(z) for z in list(Z_test)]), val, pval))
2993
+ print("ER00b: %s _|_ %s | Z_add = %s, Z = %s: val = %.2f / pval = % .4f" %
2994
+ (X, Y, ' '.join([str(z) for z in Z_add2]), ' '.join([str(z) for z in Z_test]), val, pval))
2995
+
2996
+ # Accordingly update dictionaries that keep track of the maximal p-value and the corresponding test statistic values and
2997
+ # conditioning set cardinalities
2998
+ self._update_pval_val_card_dicts(X, Y, pval, val, len(Z_test))
2999
+
3000
+ # Check whether test result was significant
3001
+ if not dependent: # pval > self.pc_alpha:
3002
+
3003
+ # Mark the edge from X to Y for removal and save sepset
3004
+ remove_AB = True
3005
+ self._save_sepset(X, Y, (frozenset(Z_test), "nwm"))
3006
+
3007
+ if remove_AB:
3008
+ # Remember the edge for removal
3009
+ pair_key, new_link = self._get_pair_key_and_new_link(A, B, "")
3010
+ out.append((pair_key, new_link))
3011
+
3012
+ ###################################################################################
3013
+ ### Part 2) of the rule ###########################################################
3014
+
3015
+ if only_lagged and B[1] == C[1]:
3016
+ continue
3017
+
3018
+ if remove_AB or link_AB[1] == "x":
3019
+ continue
3020
+
3021
+ if self._B_not_in_SepSet_AC(A, B, C):
3022
+
3023
+ # Prepare the new link and save it to the output
3024
+ link_CB = self._get_link(C, B)
3025
+ new_link_CB = link_CB[0] + link_CB[1] + ">"
3026
+ out.append(self._get_pair_key_and_new_link(C, B, new_link_CB))
3027
+
3028
+ # end for (A, B, C) in all_appropriate_triples
3029
+
3030
+ # Return the output list
3031
+ return out
3032
+
3033
+
3034
+ def _apply_ER00c(self, only_lagged):
3035
+ """Return all orientations implied by orientation rule R0^prime c"""
3036
+
3037
+ # Build the output list
3038
+ out = []
3039
+
3040
+ # Find all graphical structures that the rule applies to
3041
+ triples_1 = self._find_triples(pattern_ij='*-*', pattern_jk='o!+', pattern_ik='')
3042
+ triples_2 = [trip for trip in self._find_triples(pattern_ij='*-*', pattern_jk='oR+', pattern_ik='') if self._is_smaller(trip[1], trip[2])]
3043
+ triples_3 = [trip for trip in self._find_triples(pattern_ij='*-*', pattern_jk='oL+', pattern_ik='')
3044
+ if self._is_smaller(trip[2], trip[1])]
3045
+ all_appropriate_triples = set(triples_1).union(set(triples_2), set(triples_3))
3046
+
3047
+ # Run through all appropriate graphical structures
3048
+ for (A, B, C) in all_appropriate_triples:
3049
+
3050
+ if only_lagged and B[1] == C[1]:
3051
+ continue
3052
+
3053
+ # Check whether the rule applies
3054
+ if self._B_not_in_SepSet_AC(A, B, C):
3055
+
3056
+ # Prepare the new link and append it to the output
3057
+ link_CB = self._get_link(C, B)
3058
+ new_link_CB = link_CB[0] + link_CB[1] + ">"
3059
+ out.append(self._get_pair_key_and_new_link(C, B, new_link_CB))
3060
+
3061
+ # end for (A, B, C) in all_appropriate_triples
3062
+
3063
+ # Return the output list
3064
+ return out
3065
+
3066
+
3067
+ def _apply_ER00d(self, only_lagged):
3068
+ """Return all orientations implied by orientation rule R0^prime d"""
3069
+
3070
+ # Build the output list
3071
+ out = []
3072
+
3073
+ # Find all graphical structures that the rule applies to
3074
+ triples_1 = self._find_triples(pattern_ij='*-o', pattern_jk='o-*', pattern_ik='')
3075
+ triples_2 = self._find_triples(pattern_ij='*->', pattern_jk='o-*', pattern_ik='')
3076
+ all_appropriate_triples = set(triples_1).union(set(triples_2))
3077
+
3078
+ # Run through all appropriate graphical structures
3079
+ for (A, B, C) in all_appropriate_triples:
3080
+
3081
+ if only_lagged and (A[1] == B[1] and B[1] == C[1]):
3082
+ continue
3083
+
3084
+ # Check whether the rule applies
3085
+ if self._B_not_in_SepSet_AC(A, B, C):
3086
+ # Prepare the new links and append them to the output
3087
+
3088
+ # From C to B
3089
+ if not only_lagged or B[1] != C[1]:
3090
+ link_CB = self._get_link(C, B)
3091
+ new_link_CB = link_CB[0] + link_CB[1] + ">"
3092
+ out.append(self._get_pair_key_and_new_link(C, B, new_link_CB))
3093
+
3094
+ # If needed, also fromA to B
3095
+ link_AB = self._get_link(A, B)
3096
+ if (not only_lagged or A[1] != B[1]) and link_AB[2] == "o":
3097
+ new_link_AB = link_AB[0] + link_AB[1] + ">"
3098
+ out.append(self._get_pair_key_and_new_link(A, B, new_link_AB))
3099
+
3100
+ # end for (A, B, C) in all_appropriate_triples
3101
+
3102
+ # Return the output list
3103
+ return out
3104
+
3105
+ ########################################################################################################################
3106
+ ########################################################################################################################
3107
+ ########################################################################################################################
3108
+
3109
+ def _print_graph_dict(self):
3110
+ """Print all links in graph_dict"""
3111
+
3112
+ for j in range(self.N):
3113
+ for ((i, lag_i), link) in self.graph_dict[j].items():
3114
+ if len(link) > 0 and (lag_i < 0 or i < j):
3115
+ print("({},{:2}) {} {}".format(i, lag_i, link, (j, 0)))
3116
+
3117
+
3118
+ def _get_link(self, A, B):
3119
+ """Get the current link from node A to B"""
3120
+
3121
+ (var_A, lag_A) = A
3122
+ (var_B, lag_B) = B
3123
+
3124
+ if abs(lag_A - lag_B) > self.tau_max:
3125
+ return ""
3126
+ elif lag_A <= lag_B:
3127
+ return self.graph_dict[var_B][(var_A, lag_A - lag_B)]
3128
+ else:
3129
+ return self._reverse_link(self.graph_dict[var_A][(var_B, lag_B - lag_A)])
3130
+
3131
+
3132
+ def _get_non_future_adj(self, node_list):
3133
+ """Return all non-future adjacencies of all nodes in node_list"""
3134
+
3135
+ # Build the output starting from an empty set
3136
+ out = set()
3137
+
3138
+ # For each node W in node_list ...
3139
+ for A in node_list:
3140
+ # Unpack A
3141
+ (var_A, lag_A) = A
3142
+ # Add all (current) non-future adjacencies of A to the set out
3143
+ out = out.union({(var, lag + lag_A) for ((var, lag), link) in self.graph_dict[var_A].items() if len(link) > 0 and lag + lag_A >= -self.tau_max})
3144
+
3145
+ # Return the desired set
3146
+ return out
3147
+
3148
+ def _update_pval_val_card_dicts(self, X, Y, pval, val, card):
3149
+ """If 'pval' is larger than the current maximal p-value across all previous independence tests for X and Y (stored in self.pval_max)
3150
+ then: Replace the current values stored in self.pval_max, self.pval_max_val, self.pval_max_card respectively by 'pval', 'val', and 'card'."""
3151
+
3152
+ if X[1] < 0 or X[0] < Y[0]:
3153
+ if pval > self.pval_max[Y[0]][X]:
3154
+ self.pval_max[Y[0]][X] = pval
3155
+ self.pval_max_val[Y[0]][X] = val
3156
+ self.pval_max_card[Y[0]][X] = card
3157
+ else:
3158
+ if pval > self.pval_max[X[0]][Y]:
3159
+ self.pval_max[X[0]][Y] = pval
3160
+ self.pval_max_val[X[0]][Y] = val
3161
+ self.pval_max_card[X[0]][Y] = card
3162
+
3163
+ def _save_sepset(self, X, Y, Z):
3164
+ """Save Z as separating sets of X and Y. Y is assumed to be at lag 0"""
3165
+
3166
+ # Unpack X and Y
3167
+ (i, lag_i) = X
3168
+ (j, lag_j) = Y
3169
+
3170
+ assert lag_j == 0
3171
+
3172
+ # Save the sepset
3173
+ if lag_i < 0 or i < j:
3174
+ self.sepsets[j][X].add(Z)
3175
+ else:
3176
+ self.sepsets[i][Y].add(Z)
3177
+
3178
+ def _reverse_link(self, link):
3179
+ """Reverse a given link, taking care to replace > with < and vice versa"""
3180
+
3181
+ if link == "":
3182
+ return ""
3183
+
3184
+ if link[2] == ">":
3185
+ left_mark = "<"
3186
+ else:
3187
+ left_mark = link[2]
3188
+
3189
+ if link[0] == "<":
3190
+ right_mark = ">"
3191
+ else:
3192
+ right_mark = link[0]
3193
+
3194
+ return left_mark + link[1] + right_mark
3195
+
3196
+
3197
+ def _write_link(self, A, B, new_link, verbosity = 0):
3198
+ """Write the information that the link from node A to node B takes the form of new_link into self.graph_dict. Neither is it assumed
3199
+ that at least of the nodes is at lag 0, nor must A be before B. If A and B are contemporaneous, also the link from B to A is written
3200
+ as the reverse of new_link"""
3201
+
3202
+ # Unpack A and B
3203
+ (var_A, lag_A) = A
3204
+ (var_B, lag_B) = B
3205
+
3206
+ # Write the link from A to B
3207
+ if lag_A < lag_B:
3208
+
3209
+ if verbosity >= 1:
3210
+ print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Writing:", var_A, lag_A - lag_B, self.graph_dict[var_B][(var_A, lag_A - lag_B)], var_B, 0, var_A, lag_A - lag_B, new_link, var_B, 0))
3211
+ #print("Replacing {:3} from ({},{:2}) to {} with {:3}".format(self.graph_dict[var_B][(var_A, lag_A - lag_B)], var_A, lag_A - lag_B, (var_B, 0), new_link))
3212
+
3213
+ self.graph_dict[var_B][(var_A, lag_A - lag_B)] = new_link
3214
+
3215
+
3216
+ elif lag_A == lag_B:
3217
+
3218
+ if verbosity >= 1:
3219
+ print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Writing:", var_A, lag_A - lag_B, self.graph_dict[var_B][(var_A, 0)], var_B, 0, var_A, 0, new_link, var_B, 0))
3220
+ #print("Replacing {:3} from ({},{:2}) to {} with {:3}".format(self.graph_dict[var_B][(var_A, 0)], var_A, 0, (var_B, 0), new_link))
3221
+ print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Writing:", var_B, 0, self.graph_dict[var_A][(var_B, 0)], var_A, 0, var_B, 0, self._reverse_link(new_link), var_A, 0))
3222
+ #print("Replacing {:3} from ({},{:2}) to {} with {:3}".format(self.graph_dict[var_A][(var_B, 0)], var_B, 0, (var_A, 0), self._reverse_link(new_link)))
3223
+
3224
+ self.graph_dict[var_B][(var_A, 0)] = new_link
3225
+ self.graph_dict[var_A][(var_B, 0)] = self._reverse_link(new_link)
3226
+
3227
+ else:
3228
+
3229
+ if verbosity >= 1:
3230
+ print("{:10} ({},{:2}) {:3} ({},{:2}) ==> ({},{:2}) {:3} ({},{:2}) ".format("Writing:", var_B, lag_B - lag_A, self.graph_dict[var_A][(var_B, lag_B - lag_A)], var_A, 0, var_B, lag_B - lag_A, self._reverse_link(new_link), var_A, 0))
3231
+ #print("Replacing {:3} from ({},{:2}) to {} with {:3}".format(self.graph_dict[var_A][(var_B, lag_B - lag_A)], var_B, lag_B - lag_A, (var_A, 0), self._reverse_link(new_link)))
3232
+
3233
+ self.graph_dict[var_A][(var_B, lag_B - lag_A)] = self._reverse_link(new_link)
3234
+
3235
+
3236
+ def _get_sepsets(self, A, B):
3237
+ """For two non-adjacent nodes, get the their separating stored in self.sepsets."""
3238
+
3239
+ (var_A, lag_A) = A
3240
+ (var_B, lag_B) = B
3241
+
3242
+ def _shift(Z, lag_B):
3243
+ return frozenset([(var, lag + lag_B) for (var, lag) in Z])
3244
+
3245
+ if lag_A < lag_B:
3246
+ out = {(_shift(Z, lag_B), status) for (Z, status) in self.sepsets[var_B][(var_A, lag_A - lag_B)]}
3247
+ elif lag_A > lag_B:
3248
+ out = {(_shift(Z, lag_A), status) for (Z, status) in self.sepsets[var_A][(var_B, lag_B - lag_A)]}
3249
+ else:
3250
+ out = {(_shift(Z, lag_A), status) for (Z, status) in self.sepsets[max(var_A, var_B)][(min(var_A, var_B), 0)]}
3251
+
3252
+ return out
3253
+
3254
+
3255
+ def _initialize_full_graph(self):
3256
+ """
3257
+ The function _get_na_pds_t() needs to know the future adjacencies of a given node, not only the non-future adjacencies that are
3258
+ stored in self.graph_dict. To aid this, this function initializes the dictionary graph_full_dict:
3259
+
3260
+ self.graph_full_dict[j][(i, -tau_i)] contains all adjacencies of (j, 0), in particular those for which tau_i < 0.
3261
+ """
3262
+
3263
+ # Build from an empty nested dictionary
3264
+ self.graph_full_dict = {j: {} for j in range(self.N)}
3265
+
3266
+ # Run through the entire nested dictionary self.graph_dict
3267
+ for j in range(self.N):
3268
+ for ((var, lag), link) in self.graph_dict[j].items():
3269
+
3270
+ if link != "":
3271
+ # Add non-future adjacencies
3272
+ self.graph_full_dict[j][(var, lag)] = link
3273
+
3274
+ # Add the future adjacencies
3275
+ if lag < 0:
3276
+ self.graph_full_dict[var][(j, -lag)] = self._reverse_link(link)
3277
+
3278
+ # Return nothing
3279
+ return None
3280
+
3281
+
3282
+ def _get_pair_key_and_new_link(self, A, B, link_AB):
3283
+ """The link from A to B takes the form link_AB. Bring this information into a form appropriate for the output of rule applications"""
3284
+
3285
+ (var_A, lag_A) = A
3286
+ (var_B, lag_B) = B
3287
+
3288
+ if lag_A <= lag_B:
3289
+ return ((var_A, var_B, lag_A - lag_B), link_AB)
3290
+ elif lag_A > lag_B:
3291
+ return ((var_B, var_A, lag_B - lag_A), self._reverse_link(link_AB))
3292
+
3293
+
3294
+ def _match_link(self, pattern, link):
3295
+ """Matches pattern including wildcards with link."""
3296
+
3297
+ if pattern == '' or link == '':
3298
+ return True if pattern == link else False
3299
+ else:
3300
+ left_mark, middle_mark, right_mark = pattern
3301
+ if left_mark != '*':
3302
+ if left_mark == '+':
3303
+ if link[0] not in ['<', 'o']: return False
3304
+ else:
3305
+ if link[0] != left_mark: return False
3306
+
3307
+ if right_mark != '*':
3308
+ if right_mark == '+':
3309
+ if link[2] not in ['>', 'o']: return False
3310
+ else:
3311
+ if link[2] != right_mark: return False
3312
+
3313
+ if middle_mark != '*' and link[1] != middle_mark: return False
3314
+
3315
+ return True
3316
+
3317
+
3318
+ def _dict2graph(self):
3319
+ """Convert self.graph_dict to graph array of shape (N, N, self.tau_max + 1)."""
3320
+
3321
+ graph = np.zeros((self.N, self.N, self.tau_max + 1), dtype='U3')
3322
+ for j in range(self.N):
3323
+ for adj in self.graph_dict[j]:
3324
+ (i, lag_i) = adj
3325
+ graph[i, j, abs(lag_i)] = self.graph_dict[j][adj]
3326
+
3327
+ return graph
3328
+
3329
+
3330
+ def _find_adj(self, graph, node, patterns, exclude=None, ignore_time_bounds=True):
3331
+ """Find adjacencies of node matching patterns."""
3332
+
3333
+ # Setup
3334
+ i, lag_i = node
3335
+ if exclude is None: exclude = []
3336
+ if type(patterns) == str:
3337
+ patterns = [patterns]
3338
+
3339
+ # Init
3340
+ adj = []
3341
+ # Find adjacencies going forward/contemp
3342
+ for k, lag_ik in zip(*np.where(graph[i,:,:])):
3343
+ matches = [self._match_link(patt, graph[i, k, lag_ik]) for patt in patterns]
3344
+ if np.any(matches):
3345
+ match = (k, lag_i + lag_ik)
3346
+ if match not in adj and (k, lag_i + lag_ik) not in exclude and (-self.tau_max <= lag_i + lag_ik <= 0 or ignore_time_bounds):
3347
+ adj.append(match)
3348
+
3349
+ # Find adjacencies going backward/contemp
3350
+ for k, lag_ki in zip(*np.where(graph[:,i,:])):
3351
+ matches = [self._match_link(self._reverse_link(patt), graph[k, i, lag_ki]) for patt in patterns]
3352
+ if np.any(matches):
3353
+ match = (k, lag_i - lag_ki)
3354
+ if match not in adj and (k, lag_i - lag_ki) not in exclude and (-self.tau_max <= lag_i - lag_ki <= 0 or ignore_time_bounds):
3355
+ adj.append(match)
3356
+
3357
+ return adj
3358
+
3359
+
3360
+ def _is_match(self, graph, X, Y, pattern_ij):
3361
+ """Check whether the link between X and Y agrees with pattern_ij"""
3362
+
3363
+ (i, lag_i) = X
3364
+ (j, lag_j) = Y
3365
+ tauij = lag_j - lag_i
3366
+ if abs(tauij) >= graph.shape[2]:
3367
+ return False
3368
+ return ((tauij >= 0 and self._match_link(pattern_ij, graph[i, j, tauij])) or
3369
+ (tauij < 0 and self._match_link(self._reverse_link(pattern_ij), graph[j, i, abs(tauij)])))
3370
+
3371
+
3372
+ def _find_triples(self, pattern_ij, pattern_jk, pattern_ik):
3373
+ """Find triples (i, lag_i), (j, lag_j), (k, lag_k) that match patterns."""
3374
+
3375
+ # Graph as array makes it easier to search forward AND backward in time
3376
+ graph = self._dict2graph()
3377
+
3378
+ # print(graph[:,:,0])
3379
+ # print(graph[:,:,1])
3380
+ # print("matching ", pattern_ij, pattern_jk, pattern_ik)
3381
+
3382
+ matched_triples = []
3383
+
3384
+ for i in range(self.N):
3385
+ # Set lag_i = 0 without loss of generality, will be adjusted at end
3386
+ lag_i = 0
3387
+ adjacencies_i = self._find_adj(graph, (i, lag_i), pattern_ij)
3388
+ # print(i, adjacencies_i)
3389
+ for (j, lag_j) in adjacencies_i:
3390
+
3391
+ adjacencies_j = self._find_adj(graph, (j, lag_j), pattern_jk,
3392
+ exclude=[(i, lag_i)])
3393
+ # print(j, adjacencies_j)
3394
+ for (k, lag_k) in adjacencies_j:
3395
+ if self._is_match(graph, (i, lag_i), (k, lag_k), pattern_ik):
3396
+ # Now use stationarity and shift triple such that the right-most
3397
+ # node (on a line t=..., -2, -1, 0, 1, 2, ...) is at lag 0
3398
+ righmost_lag = max(lag_i, lag_j, lag_k)
3399
+ match = ((i, lag_i - righmost_lag),
3400
+ (j, lag_j - righmost_lag),
3401
+ (k, lag_k - righmost_lag))
3402
+ largest_lag = min(lag_i - righmost_lag, lag_j - righmost_lag, lag_k - righmost_lag)
3403
+ if match not in matched_triples and \
3404
+ -self.tau_max <= largest_lag <= 0:
3405
+ matched_triples.append(match)
3406
+
3407
+ return matched_triples
3408
+
3409
+
3410
+ def _find_quadruples(self, pattern_ij, pattern_jk, pattern_ik,
3411
+ pattern_il, pattern_jl, pattern_kl):
3412
+ """Find quadruples (i, lag_i), (j, lag_j), (k, lag_k), (l, lag_l) that match patterns."""
3413
+
3414
+ # We assume this later
3415
+ assert pattern_il != ''
3416
+
3417
+ # Graph as array makes it easier to search forward AND backward in time
3418
+ graph = self._dict2graph()
3419
+
3420
+ matched_quadruples = []
3421
+
3422
+ # First get triple ijk
3423
+ ijk_triples = self._find_triples(pattern_ij, pattern_jk, pattern_ik)
3424
+
3425
+ for triple in ijk_triples:
3426
+ # Unpack triple
3427
+ (i, lag_i), (j, lag_j), (k, lag_k) = triple
3428
+
3429
+ # Search through adjacencies
3430
+ adjacencies = set(self._find_adj(graph, (i, lag_i), pattern_il,
3431
+ exclude=[(j, lag_j), (k, lag_k)]))
3432
+ if pattern_jl != '':
3433
+ adjacencies = adjacencies.intersection(set(
3434
+ self._find_adj(graph, (j, lag_j), pattern_jl,
3435
+ exclude=[(i, lag_i), (k, lag_k)])))
3436
+ else:
3437
+ adjacencies = set([adj for adj in adjacencies
3438
+ if self._is_match(graph, (j, lag_j), adj, '')])
3439
+
3440
+ if pattern_kl != '':
3441
+ adjacencies = adjacencies.intersection(set(
3442
+ self._find_adj(graph, (k, lag_k), pattern_kl,
3443
+ exclude=[(i, lag_i), (j, lag_j)])))
3444
+ else:
3445
+ adjacencies = set([adj for adj in adjacencies
3446
+ if self._is_match(graph, (k, lag_k), adj, '')])
3447
+
3448
+ for adj in adjacencies:
3449
+ (l, lag_l) = adj
3450
+
3451
+ # Now use stationarity and shift quadruple such that the right-most
3452
+ # node (on a line t=..., -2, -1, 0, 1, 2, ...) is at lag 0
3453
+ righmost_lag = max(lag_i, lag_j, lag_k, lag_l)
3454
+ match = ((i, lag_i - righmost_lag),
3455
+ (j, lag_j - righmost_lag),
3456
+ (k, lag_k - righmost_lag),
3457
+ (l, lag_l - righmost_lag),
3458
+ )
3459
+ largest_lag = min(lag_i - righmost_lag,
3460
+ lag_j - righmost_lag,
3461
+ lag_k - righmost_lag,
3462
+ lag_l - righmost_lag,
3463
+ )
3464
+ if match not in matched_quadruples and \
3465
+ -self.tau_max <= largest_lag <= 0:
3466
+ matched_quadruples.append(match)
3467
+
3468
+ return matched_quadruples
3469
+
3470
+
3471
+ def _get_R4_discriminating_paths(self, triple, max_length = np.inf):
3472
+ """Find all discriminating paths starting from triple"""
3473
+
3474
+ def _search(path_taken, max_length):
3475
+
3476
+ # Get the last visited node and its link to Y
3477
+ last_node = path_taken[-1]
3478
+ link_to_Y = self._get_link(last_node, path_taken[0])
3479
+
3480
+ # Base Case: If the current path is a discriminating path, return it as single entry of a list
3481
+ if len(path_taken) > 3 and link_to_Y == "":
3482
+ return [path_taken]
3483
+
3484
+ # If the current path is not a discriminating path, continue the path
3485
+ paths = []
3486
+
3487
+ if self._get_link(last_node, path_taken[-2])[0] == "<" and link_to_Y == "-->" and len(path_taken) < max_length:
3488
+
3489
+ # Search through all adjacencies of the last node
3490
+ for (var, lag) in self.graph_full_dict[last_node[0]].keys():
3491
+
3492
+ # Build the next node and get its link to the previous
3493
+ next_node = (var, lag + last_node[1])
3494
+ next_link = self._get_link(next_node, last_node)
3495
+
3496
+ # Check whether this node can be visited
3497
+ if next_node[1] <= 0 and next_node[1] >= -self.tau_max and next_node not in path_taken and self._match_link("*->", next_link):
3498
+
3499
+ # Recursive call
3500
+ paths.extend(_search(path_taken[:] + [next_node], max_length))
3501
+
3502
+ # Return the list of discriminating paths
3503
+ return paths
3504
+
3505
+ # Unpack the triple
3506
+ (W, V, Y) = triple
3507
+
3508
+ # Return all discriminating paths starting at this triple
3509
+ return _search([Y, V, W], max_length)
3510
+
3511
+
3512
+ def _get_potentially_directed_uncovered_paths(self, start_node, end_node, initial_allowed_patterns):
3513
+ """Find all potentiall directed uncoverged paths from start_node to end_node whose first link takes one the forms specified by
3514
+ initial_allowed_patters"""
3515
+
3516
+ assert start_node != end_node
3517
+
3518
+ # Function for recursive search of potentially directed uncovered paths
3519
+ def _search(end_node, path_taken, allowed_patterns):
3520
+
3521
+ # List for outputting potentially directed uncovered paths
3522
+ paths = []
3523
+
3524
+ # The last visited note becomes the new start_node
3525
+ start_node = path_taken[-1]
3526
+
3527
+ # Base case: End node has been reached
3528
+ if start_node == end_node:
3529
+ paths.append(path_taken)
3530
+
3531
+ # Recursive build case
3532
+ else:
3533
+ # Run through the adjacencies of start_node
3534
+ #for next_node in self.graph_full_dict[start_node[0]]:
3535
+ for (var, lag) in self.graph_full_dict[start_node[0]].keys():
3536
+
3537
+ next_node = (var, lag + start_node[1])
3538
+
3539
+ # Consider only nodes that ...
3540
+ # ... are within the allowed time frame
3541
+ if next_node[1] < -self.tau_max or next_node[1] > 0:
3542
+ continue
3543
+ # ... have not been visited yet
3544
+ if next_node in path_taken:
3545
+ continue
3546
+ # ... are non-adjacent to the node before start_node
3547
+ if len(path_taken) >= 2 and self._get_link(path_taken[-2], next_node) != "":
3548
+ continue
3549
+ # ... whose link with start_node matches one of the allowed patters
3550
+ link = self._get_link(start_node, next_node)
3551
+ if not any([self._match_link(pattern = pattern, link = link) for pattern in allowed_patterns]):
3552
+ continue
3553
+
3554
+ # Determine the allowed patters for the next recursive call
3555
+ if self._match_link(pattern='o*o', link=link):
3556
+ new_allowed_patters = ["o*o", "o*>", "-*>"]
3557
+ elif self._match_link(pattern='o*>', link=link) or self._match_link(pattern='-*>', link=link):
3558
+ new_allowed_patters = ["-*>"]
3559
+
3560
+ # Determine the new path taken
3561
+ new_path_taken = path_taken[:] + [next_node]
3562
+
3563
+ # Recursive call
3564
+ paths.extend(_search(end_node, new_path_taken, new_allowed_patters))
3565
+
3566
+ # Output list of potentially directed uncovered paths
3567
+ return paths
3568
+
3569
+ # end def _search(end_node, path_taken, allowed_patterns)
3570
+
3571
+ # Output potentially directed uncovered paths
3572
+ paths = _search(end_node, [start_node], initial_allowed_patterns)
3573
+ return [path for path in paths if len(path) > 2]
3574
+
3575
+
3576
+ def _sort_search_set(self, search_set, reference_node):
3577
+ """Sort the nodes in search_set by their values in self.pval_max_val with respect to the reference_node. Nodes with higher absolute
3578
+ values appear earlier"""
3579
+
3580
+ sort_by_potential_minus_infs = [self._get_pval_max_val(node, reference_node) for node in search_set]
3581
+ sort_by = [(np.abs(value) if value != -np.inf else 0) for value in sort_by_potential_minus_infs]
3582
+
3583
+ return [x for _, x in sorted(zip(sort_by, search_set), reverse = True)]
3584
+
3585
+ def _get_pval_max_val(self, X, Y):
3586
+ """Return the test statistic value of that independence test for X and Y which, among all such tests, has the largest p-value."""
3587
+
3588
+ if X[1] < 0 or X[0] < Y[0]:
3589
+ return self.pval_max_val[Y[0]][X]
3590
+ else:
3591
+ return self.pval_max_val[X[0]][Y]
3592
+
3593
+ def _delete_sepsets(self, X, Y):
3594
+ """Delete all separating sets of X and Y. Y is assumed to be at lag 0"""
3595
+
3596
+ # Unpack X and Y
3597
+ (i, lag_i) = X
3598
+ (j, lag_j) = Y
3599
+
3600
+ assert lag_j == 0
3601
+
3602
+ # Save the sepset
3603
+ if lag_i < 0 or i < j:
3604
+ self.sepsets[j][X] = set()
3605
+ else:
3606
+ self.sepsets[i][Y] = set()
3607
+
3608
+
3609
+ if __name__ == '__main__':
3610
+
3611
+ from tigramite.independence_tests.parcorr import ParCorr
3612
+ import tigramite.data_processing as pp
3613
+ from tigramite.toymodels import structural_causal_processes as toys
3614
+ import tigramite.plotting as tp
3615
+ from matplotlib import pyplot as plt
3616
+
3617
+ # Example process to play around with
3618
+ # Each key refers to a variable and the incoming links are supplied
3619
+ # as a list of format [((var, -lag), coeff, function), ...]
3620
+ def lin_f(x): return x
3621
+ def nonlin_f(x): return (x + 5. * x ** 2 * np.exp(-x ** 2 / 20.))
3622
+
3623
+ links = {0: [((0, -1), 0.9, lin_f), ((3, -1), -0.6, lin_f)],
3624
+ 1: [((1, -1), 0.9, lin_f), ((3, -1), 0.6, lin_f)],
3625
+ 2: [((2, -1), 0.9, lin_f), ((1, -1), 0.6, lin_f)],
3626
+ 3: [],
3627
+ }
3628
+
3629
+ full_data, nonstat = toys.structural_causal_process(links,
3630
+ T=1000, seed=7)
3631
+
3632
+ # We now remove variable 3 which plays the role of a hidden confounder
3633
+ data = full_data[:, [0, 1, 2]]
3634
+
3635
+ # Data must be array of shape (time, variables)
3636
+ print(data.shape)
3637
+ dataframe = pp.DataFrame(data)
3638
+ cond_ind_test = ParCorr(significance='fixed_thres')
3639
+ lpcmci = LPCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test)
3640
+ results = lpcmci.run_lpcmci(tau_max=2, pc_alpha=0.01)
3641
+
3642
+ # # For a proper causal interpretation of the graph see the paper!
3643
+ # print(results['graph'])
3644
+ # tp.plot_graph(graph=results['graph'], val_matrix=results['val_matrix'])
3645
+ # plt.show()
3646
+
3647
+ # results = lpcmci.run_sliding_window_of(
3648
+ # window_step=499, window_length=500,
3649
+ # method='run_lpcmci', method_args={'tau_max':1})