tigramite-fast 5.2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tigramite/__init__.py +0 -0
- tigramite/causal_effects.py +1525 -0
- tigramite/causal_mediation.py +1592 -0
- tigramite/data_processing.py +1574 -0
- tigramite/graphs.py +1509 -0
- tigramite/independence_tests/LBFGS.py +1114 -0
- tigramite/independence_tests/__init__.py +0 -0
- tigramite/independence_tests/cmiknn.py +661 -0
- tigramite/independence_tests/cmiknn_mixed.py +1397 -0
- tigramite/independence_tests/cmisymb.py +286 -0
- tigramite/independence_tests/gpdc.py +664 -0
- tigramite/independence_tests/gpdc_torch.py +820 -0
- tigramite/independence_tests/gsquared.py +190 -0
- tigramite/independence_tests/independence_tests_base.py +1310 -0
- tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
- tigramite/independence_tests/pairwise_CI.py +383 -0
- tigramite/independence_tests/parcorr.py +369 -0
- tigramite/independence_tests/parcorr_mult.py +485 -0
- tigramite/independence_tests/parcorr_wls.py +451 -0
- tigramite/independence_tests/regressionCI.py +403 -0
- tigramite/independence_tests/robust_parcorr.py +403 -0
- tigramite/jpcmciplus.py +966 -0
- tigramite/lpcmci.py +3649 -0
- tigramite/models.py +2257 -0
- tigramite/pcmci.py +3935 -0
- tigramite/pcmci_base.py +1218 -0
- tigramite/plotting.py +4735 -0
- tigramite/rpcmci.py +467 -0
- tigramite/toymodels/__init__.py +0 -0
- tigramite/toymodels/context_model.py +261 -0
- tigramite/toymodels/non_additive.py +1231 -0
- tigramite/toymodels/structural_causal_processes.py +1201 -0
- tigramite/toymodels/surrogate_generator.py +319 -0
- tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
- tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
- tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
- tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
- tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
tigramite/pcmci.py
ADDED
|
@@ -0,0 +1,3935 @@
|
|
|
1
|
+
"""Tigramite causal discovery for time series."""
|
|
2
|
+
|
|
3
|
+
# Author: Jakob Runge <jakob@jakob-runge.com>
|
|
4
|
+
#
|
|
5
|
+
# License: GNU General Public License v3.0
|
|
6
|
+
|
|
7
|
+
from __future__ import print_function
|
|
8
|
+
import warnings
|
|
9
|
+
import itertools
|
|
10
|
+
from collections import defaultdict
|
|
11
|
+
from copy import deepcopy
|
|
12
|
+
import numpy as np
|
|
13
|
+
import scipy.stats
|
|
14
|
+
|
|
15
|
+
from tigramite.pcmci_base import PCMCIbase
|
|
16
|
+
|
|
17
|
+
def _create_nested_dictionary(depth=0, lowest_type=dict):
|
|
18
|
+
"""Create a series of nested dictionaries to a maximum depth. The first
|
|
19
|
+
depth - 1 nested dictionaries are defaultdicts, the last is a normal
|
|
20
|
+
dictionary.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
depth : int
|
|
25
|
+
Maximum depth argument.
|
|
26
|
+
lowest_type: callable (optional)
|
|
27
|
+
Type contained in leaves of tree. Ex: list, dict, tuple, int, float ...
|
|
28
|
+
"""
|
|
29
|
+
new_depth = depth - 1
|
|
30
|
+
if new_depth <= 0:
|
|
31
|
+
return defaultdict(lowest_type)
|
|
32
|
+
return defaultdict(lambda: _create_nested_dictionary(new_depth))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _nested_to_normal(nested_dict):
|
|
36
|
+
"""Transforms the nested default dictionary into a standard dictionaries
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
nested_dict : default dictionary of default dictionaries of ... etc.
|
|
41
|
+
"""
|
|
42
|
+
if isinstance(nested_dict, defaultdict):
|
|
43
|
+
nested_dict = {k: _nested_to_normal(v) for k, v in nested_dict.items()}
|
|
44
|
+
return nested_dict
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class PCMCI(PCMCIbase):
|
|
48
|
+
r"""PCMCI causal discovery for time series datasets.
|
|
49
|
+
|
|
50
|
+
PCMCI is a causal discovery framework for large-scale time series
|
|
51
|
+
datasets. This class contains several methods. The standard PCMCI method
|
|
52
|
+
addresses time-lagged causal discovery and is described in Ref [1] where
|
|
53
|
+
also further sub-variants are discussed. Lagged as well as contemporaneous
|
|
54
|
+
causal discovery is addressed with PCMCIplus and described in [5]. See the
|
|
55
|
+
tutorials for guidance in applying these methods.
|
|
56
|
+
|
|
57
|
+
PCMCI has:
|
|
58
|
+
|
|
59
|
+
* different conditional independence tests adapted to linear or
|
|
60
|
+
nonlinear dependencies, and continuously-valued or discrete data (
|
|
61
|
+
implemented in ``tigramite.independence_tests``)
|
|
62
|
+
* (mostly) hyperparameter optimization
|
|
63
|
+
* easy parallelization (separate script)
|
|
64
|
+
* handling of masked time series data
|
|
65
|
+
* false discovery control and confidence interval estimation
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
Notes
|
|
69
|
+
-----
|
|
70
|
+
|
|
71
|
+
.. image:: mci_schematic.*
|
|
72
|
+
:width: 200pt
|
|
73
|
+
|
|
74
|
+
In the PCMCI framework, the dependency structure of a set of time series
|
|
75
|
+
variables is represented in a *time series graph* as shown in the Figure.
|
|
76
|
+
The nodes of a time series graph are defined as the variables at
|
|
77
|
+
different times and a link indicates a conditional dependency that can be
|
|
78
|
+
interpreted as a causal dependency under certain assumptions (see paper).
|
|
79
|
+
Assuming stationarity, the links are repeated in time. The parents
|
|
80
|
+
:math:`\mathcal{P}` of a variable are defined as the set of all nodes
|
|
81
|
+
with a link towards it (blue and red boxes in Figure).
|
|
82
|
+
|
|
83
|
+
The different PCMCI methods estimate causal links by iterative
|
|
84
|
+
conditional independence testing. PCMCI can be flexibly combined with
|
|
85
|
+
any kind of conditional independence test statistic adapted to the kind
|
|
86
|
+
of data (continuous or discrete) and its assumed dependency types.
|
|
87
|
+
These are available in ``tigramite.independence_tests``.
|
|
88
|
+
|
|
89
|
+
NOTE: MCI test statistic values define a particular measure of causal
|
|
90
|
+
strength depending on the test statistic used. For example, ParCorr()
|
|
91
|
+
results in normalized values between -1 and 1. However, if you are
|
|
92
|
+
interested in quantifying causal effects, i.e., the effect of
|
|
93
|
+
hypothetical interventions, you may better look at the causal effect
|
|
94
|
+
estimation functionality of Tigramite.
|
|
95
|
+
|
|
96
|
+
References
|
|
97
|
+
----------
|
|
98
|
+
|
|
99
|
+
[1] J. Runge, P. Nowack, M. Kretschmer, S. Flaxman, D. Sejdinovic,
|
|
100
|
+
Detecting and quantifying causal associations in large nonlinear time
|
|
101
|
+
series datasets. Sci. Adv. 5, eaau4996 (2019)
|
|
102
|
+
https://advances.sciencemag.org/content/5/11/eaau4996
|
|
103
|
+
|
|
104
|
+
[5] J. Runge,
|
|
105
|
+
Discovering contemporaneous and lagged causal relations in
|
|
106
|
+
autocorrelated nonlinear time series datasets
|
|
107
|
+
http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
dataframe : data object
|
|
112
|
+
This is the Tigramite dataframe object. Among others, it has the
|
|
113
|
+
attributes dataframe.values yielding a numpy array of shape (
|
|
114
|
+
observations T, variables N) and optionally a mask of the same shape.
|
|
115
|
+
cond_ind_test : conditional independence test object
|
|
116
|
+
This can be ParCorr or other classes from
|
|
117
|
+
``tigramite.independence_tests`` or an external test passed as a
|
|
118
|
+
callable. This test can be based on the class
|
|
119
|
+
tigramite.independence_tests.CondIndTest.
|
|
120
|
+
verbosity : int, optional (default: 0)
|
|
121
|
+
Verbose levels 0, 1, ...
|
|
122
|
+
|
|
123
|
+
Attributes
|
|
124
|
+
----------
|
|
125
|
+
all_parents : dictionary
|
|
126
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
127
|
+
the conditioning-parents estimated with PC algorithm.
|
|
128
|
+
val_min : dictionary
|
|
129
|
+
Dictionary of form val_min[j][(i, -tau)] = float
|
|
130
|
+
containing the minimum absolute test statistic value for each link estimated in
|
|
131
|
+
the PC algorithm.
|
|
132
|
+
pval_max : dictionary
|
|
133
|
+
Dictionary of form pval_max[j][(i, -tau)] = float containing the maximum
|
|
134
|
+
p-value for each link estimated in the PC algorithm.
|
|
135
|
+
iterations : dictionary
|
|
136
|
+
Dictionary containing further information on algorithm steps.
|
|
137
|
+
N : int
|
|
138
|
+
Number of variables.
|
|
139
|
+
T : dict
|
|
140
|
+
Time series sample length of dataset(s).
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
def __init__(self, dataframe,
|
|
144
|
+
cond_ind_test,
|
|
145
|
+
verbosity=0):
|
|
146
|
+
|
|
147
|
+
# Init base class
|
|
148
|
+
PCMCIbase.__init__(self, dataframe=dataframe,
|
|
149
|
+
cond_ind_test=cond_ind_test,
|
|
150
|
+
verbosity=verbosity)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _iter_conditions(self, parent, conds_dim, all_parents):
|
|
154
|
+
"""Yield next condition.
|
|
155
|
+
|
|
156
|
+
Yields next condition from lexicographically ordered conditions.
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
parent : tuple
|
|
161
|
+
Tuple of form (i, -tau).
|
|
162
|
+
conds_dim : int
|
|
163
|
+
Cardinality in current step.
|
|
164
|
+
all_parents : list
|
|
165
|
+
List of form [(0, -1), (3, -2), ...].
|
|
166
|
+
|
|
167
|
+
Yields
|
|
168
|
+
-------
|
|
169
|
+
cond : list
|
|
170
|
+
List of form [(0, -1), (3, -2), ...] for the next condition.
|
|
171
|
+
"""
|
|
172
|
+
all_parents_excl_current = [p for p in all_parents if p != parent]
|
|
173
|
+
for cond in itertools.combinations(all_parents_excl_current, conds_dim):
|
|
174
|
+
yield list(cond)
|
|
175
|
+
|
|
176
|
+
def _sort_parents(self, parents_vals):
|
|
177
|
+
"""Sort current parents according to test statistic values.
|
|
178
|
+
|
|
179
|
+
Sorting is from strongest to weakest absolute values.
|
|
180
|
+
|
|
181
|
+
Parameters
|
|
182
|
+
---------
|
|
183
|
+
parents_vals : dict
|
|
184
|
+
Dictionary of form {(0, -1):float, ...} containing the minimum test
|
|
185
|
+
statistic value of a link.
|
|
186
|
+
|
|
187
|
+
Returns
|
|
188
|
+
-------
|
|
189
|
+
parents : list
|
|
190
|
+
List of form [(0, -1), (3, -2), ...] containing sorted parents.
|
|
191
|
+
"""
|
|
192
|
+
if self.verbosity > 1:
|
|
193
|
+
print("\n Sorting parents in decreasing order with "
|
|
194
|
+
"\n weight(i-tau->j) = min_{iterations} |val_{ij}(tau)| ")
|
|
195
|
+
# Get the absolute value for all the test statistics
|
|
196
|
+
abs_values = {k: np.abs(parents_vals[k]) for k in list(parents_vals)}
|
|
197
|
+
return sorted(abs_values, key=abs_values.get, reverse=True)
|
|
198
|
+
|
|
199
|
+
def _print_link_info(self, j, index_parent, parent, num_parents,
|
|
200
|
+
already_removed=False):
|
|
201
|
+
"""Print info about the current link being tested.
|
|
202
|
+
|
|
203
|
+
Parameters
|
|
204
|
+
----------
|
|
205
|
+
j : int
|
|
206
|
+
Index of current node being tested.
|
|
207
|
+
index_parent : int
|
|
208
|
+
Index of the current parent.
|
|
209
|
+
parent : tuple
|
|
210
|
+
Standard (i, tau) tuple of parent node id and time delay
|
|
211
|
+
num_parents : int
|
|
212
|
+
Total number of parents.
|
|
213
|
+
already_removed : bool
|
|
214
|
+
Whether parent was already removed.
|
|
215
|
+
"""
|
|
216
|
+
link_marker = {True:"o?o", False:"-?>"}
|
|
217
|
+
|
|
218
|
+
abstau = abs(parent[1])
|
|
219
|
+
if self.verbosity > 1:
|
|
220
|
+
print("\n Link (%s % d) %s %s (%d/%d):" % (
|
|
221
|
+
self.var_names[parent[0]], parent[1], link_marker[abstau==0],
|
|
222
|
+
self.var_names[j],
|
|
223
|
+
index_parent + 1, num_parents))
|
|
224
|
+
|
|
225
|
+
if already_removed:
|
|
226
|
+
print(" Already removed.")
|
|
227
|
+
|
|
228
|
+
def _print_cond_info(self, Z, comb_index, pval, val):
|
|
229
|
+
"""Print info about the condition
|
|
230
|
+
|
|
231
|
+
Parameters
|
|
232
|
+
----------
|
|
233
|
+
Z : list
|
|
234
|
+
The current condition being tested.
|
|
235
|
+
comb_index : int
|
|
236
|
+
Index of the combination yielding this condition.
|
|
237
|
+
pval : float
|
|
238
|
+
p-value from this condition.
|
|
239
|
+
val : float
|
|
240
|
+
value from this condition.
|
|
241
|
+
"""
|
|
242
|
+
var_name_z = ""
|
|
243
|
+
for i, tau in Z:
|
|
244
|
+
var_name_z += "(%s % .2s) " % (self.var_names[i], tau)
|
|
245
|
+
if len(Z) == 0: var_name_z = "()"
|
|
246
|
+
print(" Subset %d: %s gives pval = %.5f / val = % .3f" %
|
|
247
|
+
(comb_index, var_name_z, pval, val))
|
|
248
|
+
|
|
249
|
+
def _print_a_pc_result(self, nonsig, conds_dim, max_combinations):
|
|
250
|
+
"""Print the results from the current iteration of conditions.
|
|
251
|
+
|
|
252
|
+
Parameters
|
|
253
|
+
----------
|
|
254
|
+
nonsig : bool
|
|
255
|
+
Indicate non-significance.
|
|
256
|
+
conds_dim : int
|
|
257
|
+
Cardinality of the current step.
|
|
258
|
+
max_combinations : int
|
|
259
|
+
Maximum number of combinations of conditions of current cardinality
|
|
260
|
+
to test.
|
|
261
|
+
"""
|
|
262
|
+
# Start with an indent
|
|
263
|
+
print_str = " "
|
|
264
|
+
# Determine the body of the text
|
|
265
|
+
if nonsig:
|
|
266
|
+
print_str += "Non-significance detected."
|
|
267
|
+
elif conds_dim > max_combinations:
|
|
268
|
+
print_str += "Still subsets of dimension" + \
|
|
269
|
+
" %d left," % (conds_dim) + \
|
|
270
|
+
" but q_max = %d reached." % (max_combinations)
|
|
271
|
+
else:
|
|
272
|
+
print_str += "No conditions of dimension %d left." % (conds_dim)
|
|
273
|
+
# Print the message
|
|
274
|
+
print(print_str)
|
|
275
|
+
|
|
276
|
+
def _print_converged_pc_single(self, converged, j, max_conds_dim):
|
|
277
|
+
"""
|
|
278
|
+
Print statement about the convergence of the pc_stable_single algorithm.
|
|
279
|
+
|
|
280
|
+
Parameters
|
|
281
|
+
----------
|
|
282
|
+
convergence : bool
|
|
283
|
+
true if convergence was reached.
|
|
284
|
+
j : int
|
|
285
|
+
Variable index.
|
|
286
|
+
max_conds_dim : int
|
|
287
|
+
Maximum number of conditions to test.
|
|
288
|
+
"""
|
|
289
|
+
if converged:
|
|
290
|
+
print("\nAlgorithm converged for variable %s" %
|
|
291
|
+
self.var_names[j])
|
|
292
|
+
else:
|
|
293
|
+
print(
|
|
294
|
+
"\nAlgorithm not yet converged, but max_conds_dim = %d"
|
|
295
|
+
" reached." % max_conds_dim)
|
|
296
|
+
|
|
297
|
+
def _run_pc_stable_single(self, j,
|
|
298
|
+
link_assumptions_j=None,
|
|
299
|
+
tau_min=1,
|
|
300
|
+
tau_max=1,
|
|
301
|
+
save_iterations=False,
|
|
302
|
+
pc_alpha=0.2,
|
|
303
|
+
max_conds_dim=None,
|
|
304
|
+
max_combinations=1):
|
|
305
|
+
"""Lagged PC algorithm for estimating lagged parents of single variable.
|
|
306
|
+
|
|
307
|
+
Parameters
|
|
308
|
+
----------
|
|
309
|
+
j : int
|
|
310
|
+
Variable index.
|
|
311
|
+
link_assumptions_j : dict
|
|
312
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
313
|
+
assumptions about links. This initializes the graph with entries
|
|
314
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
315
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
316
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
317
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
318
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
319
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
320
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
321
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
322
|
+
if link_assumptions is not None, then all links have to be specified
|
|
323
|
+
or the links are assumed absent.
|
|
324
|
+
tau_min : int, optional (default: 1)
|
|
325
|
+
Minimum time lag to test. Useful for variable selection in
|
|
326
|
+
multi-step ahead predictions. Must be greater zero.
|
|
327
|
+
tau_max : int, optional (default: 1)
|
|
328
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
329
|
+
save_iterations : bool, optional (default: False)
|
|
330
|
+
Whether to save iteration step results such as conditions used.
|
|
331
|
+
pc_alpha : float or None, optional (default: 0.2)
|
|
332
|
+
Significance level in algorithm. If a list is given, pc_alpha is
|
|
333
|
+
optimized using model selection criteria provided in the
|
|
334
|
+
cond_ind_test class as get_model_selection_criterion(). If None,
|
|
335
|
+
a default list of values is used.
|
|
336
|
+
max_conds_dim : int, optional (default: None)
|
|
337
|
+
Maximum number of conditions to test. If None is passed, this number
|
|
338
|
+
is unrestricted.
|
|
339
|
+
max_combinations : int, optional (default: 1)
|
|
340
|
+
Maximum number of combinations of conditions of current cardinality
|
|
341
|
+
to test in PC1 step.
|
|
342
|
+
|
|
343
|
+
Returns
|
|
344
|
+
-------
|
|
345
|
+
parents : list
|
|
346
|
+
List of estimated parents.
|
|
347
|
+
val_min : dict
|
|
348
|
+
Dictionary of form {(0, -1):float, ...} containing the minimum absolute
|
|
349
|
+
test statistic value of a link.
|
|
350
|
+
pval_max : dict
|
|
351
|
+
Dictionary of form {(0, -1):float, ...} containing the maximum
|
|
352
|
+
p-value of a link across different conditions.
|
|
353
|
+
iterations : dict
|
|
354
|
+
Dictionary containing further information on algorithm steps.
|
|
355
|
+
"""
|
|
356
|
+
|
|
357
|
+
if pc_alpha < 0. or pc_alpha > 1.:
|
|
358
|
+
raise ValueError("Choose 0 <= pc_alpha <= 1")
|
|
359
|
+
|
|
360
|
+
# Initialize the dictionaries for the pval_max, val_dict, val_min
|
|
361
|
+
# results
|
|
362
|
+
pval_max = dict()
|
|
363
|
+
val_dict = dict()
|
|
364
|
+
val_min = dict()
|
|
365
|
+
# Initialize the parents values from the selected links, copying to
|
|
366
|
+
# ensure this initial argument is unchanged.
|
|
367
|
+
parents = []
|
|
368
|
+
for itau in link_assumptions_j:
|
|
369
|
+
link_type = link_assumptions_j[itau]
|
|
370
|
+
if itau != (j, 0) and link_type not in ['<--', '<?-']:
|
|
371
|
+
parents.append(itau)
|
|
372
|
+
|
|
373
|
+
val_dict = {(p[0], p[1]): None for p in parents}
|
|
374
|
+
pval_max = {(p[0], p[1]): None for p in parents}
|
|
375
|
+
|
|
376
|
+
# Define a nested defaultdict of depth 4 to save all information about
|
|
377
|
+
# iterations
|
|
378
|
+
iterations = _create_nested_dictionary(4)
|
|
379
|
+
# Ensure tau_min is at least 1
|
|
380
|
+
tau_min = max(1, tau_min)
|
|
381
|
+
|
|
382
|
+
# Loop over all possible condition dimensions
|
|
383
|
+
max_conds_dim = self._set_max_condition_dim(max_conds_dim,
|
|
384
|
+
tau_min, tau_max)
|
|
385
|
+
# Iteration through increasing number of conditions, i.e. from
|
|
386
|
+
# [0, max_conds_dim] inclusive
|
|
387
|
+
converged = False
|
|
388
|
+
for conds_dim in range(max_conds_dim + 1):
|
|
389
|
+
# (Re)initialize the list of non-significant links
|
|
390
|
+
nonsig_parents = list()
|
|
391
|
+
# Check if the algorithm has converged
|
|
392
|
+
if len(parents) - 1 < conds_dim:
|
|
393
|
+
converged = True
|
|
394
|
+
break
|
|
395
|
+
# Print information about
|
|
396
|
+
if self.verbosity > 1:
|
|
397
|
+
print("\nTesting condition sets of dimension %d:" % conds_dim)
|
|
398
|
+
|
|
399
|
+
# Iterate through all possible pairs (that have not converged yet)
|
|
400
|
+
for index_parent, parent in enumerate(parents):
|
|
401
|
+
# Print info about this link
|
|
402
|
+
if self.verbosity > 1:
|
|
403
|
+
self._print_link_info(j, index_parent, parent, len(parents))
|
|
404
|
+
# Iterate through all possible combinations
|
|
405
|
+
nonsig = False
|
|
406
|
+
for comb_index, Z in \
|
|
407
|
+
enumerate(self._iter_conditions(parent, conds_dim,
|
|
408
|
+
parents)):
|
|
409
|
+
# Break if we try too many combinations
|
|
410
|
+
if comb_index >= max_combinations:
|
|
411
|
+
break
|
|
412
|
+
# Perform independence test
|
|
413
|
+
if link_assumptions_j[parent] == '-->':
|
|
414
|
+
val = 1.
|
|
415
|
+
pval = 0.
|
|
416
|
+
dependent = True
|
|
417
|
+
else:
|
|
418
|
+
val, pval, dependent = self.cond_ind_test.run_test(X=[parent],
|
|
419
|
+
Y=[(j, 0)],
|
|
420
|
+
Z=Z,
|
|
421
|
+
tau_max=tau_max,
|
|
422
|
+
alpha_or_thres=pc_alpha,
|
|
423
|
+
)
|
|
424
|
+
# Print some information if needed
|
|
425
|
+
if self.verbosity > 1:
|
|
426
|
+
self._print_cond_info(Z, comb_index, pval, val)
|
|
427
|
+
# Keep track of maximum p-value and minimum estimated value
|
|
428
|
+
# for each pair (across any condition)
|
|
429
|
+
val_min[parent] = \
|
|
430
|
+
min(np.abs(val), val_min.get(parent,
|
|
431
|
+
float("inf")))
|
|
432
|
+
|
|
433
|
+
if pval_max[parent] is None or pval > pval_max[parent]:
|
|
434
|
+
pval_max[parent] = pval
|
|
435
|
+
val_dict[parent] = val
|
|
436
|
+
|
|
437
|
+
# Save the iteration if we need to
|
|
438
|
+
if save_iterations:
|
|
439
|
+
a_iter = iterations['iterations'][conds_dim][parent]
|
|
440
|
+
a_iter[comb_index]['conds'] = list(Z)
|
|
441
|
+
a_iter[comb_index]['val'] = val
|
|
442
|
+
a_iter[comb_index]['pval'] = pval
|
|
443
|
+
# Delete link later and break while-loop if non-significant
|
|
444
|
+
if not dependent: #pval > pc_alpha:
|
|
445
|
+
nonsig_parents.append((j, parent))
|
|
446
|
+
nonsig = True
|
|
447
|
+
break
|
|
448
|
+
|
|
449
|
+
# Print the results if needed
|
|
450
|
+
if self.verbosity > 1:
|
|
451
|
+
self._print_a_pc_result(nonsig,
|
|
452
|
+
conds_dim, max_combinations)
|
|
453
|
+
|
|
454
|
+
# Remove non-significant links
|
|
455
|
+
for _, parent in nonsig_parents:
|
|
456
|
+
del val_min[parent]
|
|
457
|
+
# Return the parents list sorted by the test metric so that the
|
|
458
|
+
# updated parents list is given to the next cond_dim loop
|
|
459
|
+
parents = self._sort_parents(val_min)
|
|
460
|
+
# Print information about the change in possible parents
|
|
461
|
+
if self.verbosity > 1:
|
|
462
|
+
print("\nUpdating parents:")
|
|
463
|
+
self._print_parents_single(j, parents, val_min, pval_max)
|
|
464
|
+
|
|
465
|
+
# Print information about if convergence was reached
|
|
466
|
+
if self.verbosity > 1:
|
|
467
|
+
self._print_converged_pc_single(converged, j, max_conds_dim)
|
|
468
|
+
# Return the results
|
|
469
|
+
return {'parents': parents,
|
|
470
|
+
'val_min': val_min,
|
|
471
|
+
'val_dict': val_dict,
|
|
472
|
+
'pval_max': pval_max,
|
|
473
|
+
'iterations': _nested_to_normal(iterations)}
|
|
474
|
+
|
|
475
|
+
def _print_pc_params(self, link_assumptions, tau_min, tau_max, pc_alpha,
|
|
476
|
+
max_conds_dim, max_combinations):
|
|
477
|
+
"""Print the setup of the current pc_stable run.
|
|
478
|
+
|
|
479
|
+
Parameters
|
|
480
|
+
----------
|
|
481
|
+
link_assumptions : dict or None
|
|
482
|
+
Dictionary of form specifying which links should be tested.
|
|
483
|
+
tau_min : int, default: 1
|
|
484
|
+
Minimum time lag to test.
|
|
485
|
+
tau_max : int, default: 1
|
|
486
|
+
Maximum time lag to test.
|
|
487
|
+
pc_alpha : float or list of floats
|
|
488
|
+
Significance level in algorithm.
|
|
489
|
+
max_conds_dim : int
|
|
490
|
+
Maximum number of conditions to test.
|
|
491
|
+
max_combinations : int
|
|
492
|
+
Maximum number of combinations of conditions to test.
|
|
493
|
+
"""
|
|
494
|
+
print("\n##\n## Step 1: PC1 algorithm for selecting lagged conditions\n##"
|
|
495
|
+
"\n\nParameters:")
|
|
496
|
+
if link_assumptions is not None:
|
|
497
|
+
print("link_assumptions = %s" % str(link_assumptions))
|
|
498
|
+
print("independence test = %s" % self.cond_ind_test.measure
|
|
499
|
+
+ "\ntau_min = %d" % tau_min
|
|
500
|
+
+ "\ntau_max = %d" % tau_max
|
|
501
|
+
+ "\npc_alpha = %s" % pc_alpha
|
|
502
|
+
+ "\nmax_conds_dim = %s" % max_conds_dim
|
|
503
|
+
+ "\nmax_combinations = %d" % max_combinations)
|
|
504
|
+
print("\n")
|
|
505
|
+
|
|
506
|
+
def _print_pc_sel_results(self, pc_alpha, results, j, score, optimal_alpha):
|
|
507
|
+
"""Print the results from the pc_alpha selection.
|
|
508
|
+
|
|
509
|
+
Parameters
|
|
510
|
+
----------
|
|
511
|
+
pc_alpha : list
|
|
512
|
+
Tested significance levels in algorithm.
|
|
513
|
+
results : dict
|
|
514
|
+
Results from the tested pc_alphas.
|
|
515
|
+
score : array of floats
|
|
516
|
+
scores from each pc_alpha.
|
|
517
|
+
j : int
|
|
518
|
+
Index of current variable.
|
|
519
|
+
optimal_alpha : float
|
|
520
|
+
Optimal value of pc_alpha.
|
|
521
|
+
"""
|
|
522
|
+
print("\n# Condition selection results:")
|
|
523
|
+
for iscore, pc_alpha_here in enumerate(pc_alpha):
|
|
524
|
+
names_parents = "[ "
|
|
525
|
+
for pari in results[pc_alpha_here]['parents']:
|
|
526
|
+
names_parents += "(%s % d) " % (
|
|
527
|
+
self.var_names[pari[0]], pari[1])
|
|
528
|
+
names_parents += "]"
|
|
529
|
+
print(" pc_alpha=%s got score %.4f with parents %s" %
|
|
530
|
+
(pc_alpha_here, score[iscore], names_parents))
|
|
531
|
+
print("\n==> optimal pc_alpha for variable %s is %s" %
|
|
532
|
+
(self.var_names[j], optimal_alpha))
|
|
533
|
+
|
|
534
|
+
def _check_tau_limits(self, tau_min, tau_max):
|
|
535
|
+
"""Check the tau limits adhere to 0 <= tau_min <= tau_max.
|
|
536
|
+
|
|
537
|
+
Parameters
|
|
538
|
+
----------
|
|
539
|
+
tau_min : float
|
|
540
|
+
Minimum tau value.
|
|
541
|
+
tau_max : float
|
|
542
|
+
Maximum tau value.
|
|
543
|
+
"""
|
|
544
|
+
if not 0 <= tau_min <= tau_max:
|
|
545
|
+
raise ValueError("tau_max = %d, " % (tau_max) + \
|
|
546
|
+
"tau_min = %d, " % (tau_min) + \
|
|
547
|
+
"but 0 <= tau_min <= tau_max")
|
|
548
|
+
|
|
549
|
+
def _set_max_condition_dim(self, max_conds_dim, tau_min, tau_max):
|
|
550
|
+
"""
|
|
551
|
+
Set the maximum dimension of the conditions. Defaults to self.N*tau_max.
|
|
552
|
+
|
|
553
|
+
Parameters
|
|
554
|
+
----------
|
|
555
|
+
max_conds_dim : int
|
|
556
|
+
Input maximum condition dimension.
|
|
557
|
+
tau_max : int
|
|
558
|
+
Maximum tau.
|
|
559
|
+
|
|
560
|
+
Returns
|
|
561
|
+
-------
|
|
562
|
+
max_conds_dim : int
|
|
563
|
+
Input maximum condition dimension or default.
|
|
564
|
+
"""
|
|
565
|
+
# Check if an input was given
|
|
566
|
+
if max_conds_dim is None:
|
|
567
|
+
max_conds_dim = self.N * (tau_max - tau_min + 1)
|
|
568
|
+
# Check this is a valid
|
|
569
|
+
if max_conds_dim < 0:
|
|
570
|
+
raise ValueError("maximum condition dimension must be >= 0")
|
|
571
|
+
return max_conds_dim
|
|
572
|
+
|
|
573
|
+
def run_pc_stable(self,
|
|
574
|
+
selected_links=None,
|
|
575
|
+
link_assumptions=None,
|
|
576
|
+
tau_min=1,
|
|
577
|
+
tau_max=1,
|
|
578
|
+
save_iterations=False,
|
|
579
|
+
pc_alpha=0.2,
|
|
580
|
+
max_conds_dim=None,
|
|
581
|
+
max_combinations=1):
|
|
582
|
+
"""Lagged PC algorithm for estimating lagged parents of all variables.
|
|
583
|
+
|
|
584
|
+
Parents are made available as self.all_parents
|
|
585
|
+
|
|
586
|
+
Parameters
|
|
587
|
+
----------
|
|
588
|
+
selected_links : dict or None
|
|
589
|
+
Deprecated, replaced by link_assumptions
|
|
590
|
+
link_assumptions : dict
|
|
591
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
592
|
+
assumptions about links. This initializes the graph with entries
|
|
593
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
594
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
595
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
596
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
597
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
598
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
599
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
600
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
601
|
+
if link_assumptions is not None, then all links have to be specified
|
|
602
|
+
or the links are assumed absent.
|
|
603
|
+
tau_min : int, default: 1
|
|
604
|
+
Minimum time lag to test. Useful for multi-step ahead predictions.
|
|
605
|
+
Must be greater zero.
|
|
606
|
+
tau_max : int, default: 1
|
|
607
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
608
|
+
save_iterations : bool, default: False
|
|
609
|
+
Whether to save iteration step results such as conditions used.
|
|
610
|
+
pc_alpha : float or list of floats, default: 0.2
|
|
611
|
+
Significance level in algorithm. If a list or None is passed, the
|
|
612
|
+
pc_alpha level is optimized for every variable across the given
|
|
613
|
+
pc_alpha values using the score computed in
|
|
614
|
+
cond_ind_test.get_model_selection_criterion().
|
|
615
|
+
max_conds_dim : int or None
|
|
616
|
+
Maximum number of conditions to test. If None is passed, this number
|
|
617
|
+
is unrestricted.
|
|
618
|
+
max_combinations : int, default: 1
|
|
619
|
+
Maximum number of combinations of conditions of current cardinality
|
|
620
|
+
to test in PC1 step.
|
|
621
|
+
|
|
622
|
+
Returns
|
|
623
|
+
-------
|
|
624
|
+
all_parents : dict
|
|
625
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
|
|
626
|
+
containing estimated parents.
|
|
627
|
+
"""
|
|
628
|
+
if selected_links is not None:
|
|
629
|
+
raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
|
|
630
|
+
|
|
631
|
+
# Create an internal copy of pc_alpha
|
|
632
|
+
_int_pc_alpha = deepcopy(pc_alpha)
|
|
633
|
+
# Check if we are selecting an optimal alpha value
|
|
634
|
+
select_optimal_alpha = True
|
|
635
|
+
# Set the default values for pc_alpha
|
|
636
|
+
if _int_pc_alpha is None:
|
|
637
|
+
_int_pc_alpha = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5]
|
|
638
|
+
elif not isinstance(_int_pc_alpha, (list, tuple, np.ndarray)):
|
|
639
|
+
_int_pc_alpha = [_int_pc_alpha]
|
|
640
|
+
select_optimal_alpha = False
|
|
641
|
+
# Check the limits on tau_min
|
|
642
|
+
self._check_tau_limits(tau_min, tau_max)
|
|
643
|
+
tau_min = max(1, tau_min)
|
|
644
|
+
# Check that the maximum combinations variable is correct
|
|
645
|
+
if max_combinations <= 0:
|
|
646
|
+
raise ValueError("max_combinations must be > 0")
|
|
647
|
+
# Implement defaultdict for all pval_max, val_max, and iterations
|
|
648
|
+
pval_max = defaultdict(dict)
|
|
649
|
+
val_min = defaultdict(dict)
|
|
650
|
+
val_dict = defaultdict(dict)
|
|
651
|
+
iterations = defaultdict(dict)
|
|
652
|
+
|
|
653
|
+
if self.verbosity > 0:
|
|
654
|
+
self._print_pc_params(link_assumptions, tau_min, tau_max,
|
|
655
|
+
_int_pc_alpha, max_conds_dim,
|
|
656
|
+
max_combinations)
|
|
657
|
+
|
|
658
|
+
# Set the selected links
|
|
659
|
+
# _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max,
|
|
660
|
+
# remove_contemp=True)
|
|
661
|
+
_int_link_assumptions = self._set_link_assumptions(link_assumptions,
|
|
662
|
+
tau_min, tau_max, remove_contemp=True)
|
|
663
|
+
|
|
664
|
+
# Initialize all parents
|
|
665
|
+
all_parents = dict()
|
|
666
|
+
# Set the maximum condition dimension
|
|
667
|
+
max_conds_dim = self._set_max_condition_dim(max_conds_dim,
|
|
668
|
+
tau_min, tau_max)
|
|
669
|
+
|
|
670
|
+
# Loop through the selected variables
|
|
671
|
+
for j in range(self.N):
|
|
672
|
+
# Print the status of this variable
|
|
673
|
+
if self.verbosity > 1:
|
|
674
|
+
print("\n## Variable %s" % self.var_names[j])
|
|
675
|
+
print("\nIterating through pc_alpha = %s:" % _int_pc_alpha)
|
|
676
|
+
# Initialize the scores for selecting the optimal alpha
|
|
677
|
+
score = np.zeros_like(_int_pc_alpha)
|
|
678
|
+
# Initialize the result
|
|
679
|
+
results = {}
|
|
680
|
+
for iscore, pc_alpha_here in enumerate(_int_pc_alpha):
|
|
681
|
+
# Print statement about the pc_alpha being tested
|
|
682
|
+
if self.verbosity > 1:
|
|
683
|
+
print("\n# pc_alpha = %s (%d/%d):" % (pc_alpha_here,
|
|
684
|
+
iscore + 1,
|
|
685
|
+
score.shape[0]))
|
|
686
|
+
# Get the results for this alpha value
|
|
687
|
+
results[pc_alpha_here] = \
|
|
688
|
+
self._run_pc_stable_single(j,
|
|
689
|
+
link_assumptions_j=_int_link_assumptions[j],
|
|
690
|
+
tau_min=tau_min,
|
|
691
|
+
tau_max=tau_max,
|
|
692
|
+
save_iterations=save_iterations,
|
|
693
|
+
pc_alpha=pc_alpha_here,
|
|
694
|
+
max_conds_dim=max_conds_dim,
|
|
695
|
+
max_combinations=max_combinations)
|
|
696
|
+
# Figure out the best score if there is more than one pc_alpha
|
|
697
|
+
# value
|
|
698
|
+
if select_optimal_alpha:
|
|
699
|
+
score[iscore] = \
|
|
700
|
+
self.cond_ind_test.get_model_selection_criterion(
|
|
701
|
+
j, results[pc_alpha_here]['parents'], tau_max)
|
|
702
|
+
# Record the optimal alpha value
|
|
703
|
+
optimal_alpha = _int_pc_alpha[score.argmin()]
|
|
704
|
+
# Only print the selection results if there is more than one
|
|
705
|
+
# pc_alpha
|
|
706
|
+
if self.verbosity > 1 and select_optimal_alpha:
|
|
707
|
+
self._print_pc_sel_results(_int_pc_alpha, results, j,
|
|
708
|
+
score, optimal_alpha)
|
|
709
|
+
# Record the results for this variable
|
|
710
|
+
all_parents[j] = results[optimal_alpha]['parents']
|
|
711
|
+
val_min[j] = results[optimal_alpha]['val_min']
|
|
712
|
+
val_dict[j] = results[optimal_alpha]['val_dict']
|
|
713
|
+
pval_max[j] = results[optimal_alpha]['pval_max']
|
|
714
|
+
iterations[j] = results[optimal_alpha]['iterations']
|
|
715
|
+
# Only save the optimal alpha if there is more than one pc_alpha
|
|
716
|
+
if select_optimal_alpha:
|
|
717
|
+
iterations[j]['optimal_pc_alpha'] = optimal_alpha
|
|
718
|
+
# Save the results in the current status of the algorithm
|
|
719
|
+
self.all_parents = all_parents
|
|
720
|
+
self.val_matrix = self._dict_to_matrix(val_dict, tau_max, self.N,
|
|
721
|
+
default=0.)
|
|
722
|
+
self.p_matrix = self._dict_to_matrix(pval_max, tau_max, self.N,
|
|
723
|
+
default=1.)
|
|
724
|
+
self.iterations = iterations
|
|
725
|
+
self.val_min = val_min
|
|
726
|
+
self.pval_max = pval_max
|
|
727
|
+
# Print the results
|
|
728
|
+
if self.verbosity > 0:
|
|
729
|
+
print("\n## Resulting lagged parent (super)sets:")
|
|
730
|
+
self._print_parents(all_parents, val_min, pval_max)
|
|
731
|
+
# Return the parents
|
|
732
|
+
return all_parents
|
|
733
|
+
|
|
734
|
+
def _print_parents_single(self, j, parents, val_min, pval_max):
|
|
735
|
+
"""Print current parents for variable j.
|
|
736
|
+
|
|
737
|
+
Parameters
|
|
738
|
+
----------
|
|
739
|
+
j : int
|
|
740
|
+
Index of current variable.
|
|
741
|
+
parents : list
|
|
742
|
+
List of form [(0, -1), (3, -2), ...].
|
|
743
|
+
val_min : dict
|
|
744
|
+
Dictionary of form {(0, -1):float, ...} containing the minimum absolute
|
|
745
|
+
test statistic value of a link.
|
|
746
|
+
pval_max : dict
|
|
747
|
+
Dictionary of form {(0, -1):float, ...} containing the maximum
|
|
748
|
+
p-value of a link across different conditions.
|
|
749
|
+
"""
|
|
750
|
+
if len(parents) < 20 or hasattr(self, 'iterations'):
|
|
751
|
+
print("\n Variable %s has %d link(s):" % (
|
|
752
|
+
self.var_names[j], len(parents)))
|
|
753
|
+
if (hasattr(self, 'iterations')
|
|
754
|
+
and 'optimal_pc_alpha' in list(self.iterations[j])):
|
|
755
|
+
print(" [pc_alpha = %s]" % (
|
|
756
|
+
self.iterations[j]['optimal_pc_alpha']))
|
|
757
|
+
if val_min is None or pval_max is None:
|
|
758
|
+
for p in parents:
|
|
759
|
+
print(" (%s % .d)" % (
|
|
760
|
+
self.var_names[p[0]], p[1]))
|
|
761
|
+
else:
|
|
762
|
+
for p in parents:
|
|
763
|
+
print(" (%s % .d): max_pval = %.5f, |min_val| = % .3f" % (
|
|
764
|
+
self.var_names[p[0]], p[1], pval_max[p],
|
|
765
|
+
abs(val_min[p])))
|
|
766
|
+
else:
|
|
767
|
+
print("\n Variable %s has %d link(s):" % (
|
|
768
|
+
self.var_names[j], len(parents)))
|
|
769
|
+
|
|
770
|
+
def _print_parents(self, all_parents, val_min, pval_max):
|
|
771
|
+
"""Print current parents.
|
|
772
|
+
|
|
773
|
+
Parameters
|
|
774
|
+
----------
|
|
775
|
+
all_parents : dictionary
|
|
776
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
777
|
+
the conditioning-parents estimated with PC algorithm.
|
|
778
|
+
val_min : dict
|
|
779
|
+
Dictionary of form {0:{(0, -1):float, ...}} containing the minimum
|
|
780
|
+
absolute test statistic value of a link.
|
|
781
|
+
pval_max : dict
|
|
782
|
+
Dictionary of form {0:{(0, -1):float, ...}} containing the maximum
|
|
783
|
+
p-value of a link across different conditions.
|
|
784
|
+
"""
|
|
785
|
+
for j in [var for var in list(all_parents)]:
|
|
786
|
+
if val_min is None or pval_max is None:
|
|
787
|
+
self._print_parents_single(j, all_parents[j],
|
|
788
|
+
None, None)
|
|
789
|
+
else:
|
|
790
|
+
self._print_parents_single(j, all_parents[j],
|
|
791
|
+
val_min[j], pval_max[j])
|
|
792
|
+
|
|
793
|
+
def _mci_condition_to_string(self, conds):
|
|
794
|
+
"""Convert the list of conditions into a string.
|
|
795
|
+
|
|
796
|
+
Parameters
|
|
797
|
+
----------
|
|
798
|
+
conds : list
|
|
799
|
+
List of conditions.
|
|
800
|
+
"""
|
|
801
|
+
cond_string = "[ "
|
|
802
|
+
for k, tau_k in conds:
|
|
803
|
+
cond_string += "(%s % d) " % (self.var_names[k], tau_k)
|
|
804
|
+
cond_string += "]"
|
|
805
|
+
return cond_string
|
|
806
|
+
|
|
807
|
+
def _print_mci_conditions(self, conds_y, conds_x_lagged,
|
|
808
|
+
j, i, tau, count, n_parents):
|
|
809
|
+
"""Print information about the conditions for the MCI algorithm.
|
|
810
|
+
|
|
811
|
+
Parameters
|
|
812
|
+
----------
|
|
813
|
+
conds_y : list
|
|
814
|
+
Conditions on node.
|
|
815
|
+
conds_x_lagged : list
|
|
816
|
+
Conditions on parent.
|
|
817
|
+
j : int
|
|
818
|
+
Current node.
|
|
819
|
+
i : int
|
|
820
|
+
Parent node.
|
|
821
|
+
tau : int
|
|
822
|
+
Parent time delay.
|
|
823
|
+
count : int
|
|
824
|
+
Index of current parent.
|
|
825
|
+
n_parents : int
|
|
826
|
+
Total number of parents.
|
|
827
|
+
"""
|
|
828
|
+
# Remove the current parent from the conditions
|
|
829
|
+
conds_y_no_i = [node for node in conds_y if node != (i, tau)]
|
|
830
|
+
# Get the condition string for parent
|
|
831
|
+
condy_str = self._mci_condition_to_string(conds_y_no_i)
|
|
832
|
+
# Get the condition string for node
|
|
833
|
+
condx_str = self._mci_condition_to_string(conds_x_lagged)
|
|
834
|
+
# Formate and print the information
|
|
835
|
+
link_marker = {True:"o?o", False:"-?>"}
|
|
836
|
+
indent = "\n "
|
|
837
|
+
print_str = indent + "link (%s % d) " % (self.var_names[i], tau)
|
|
838
|
+
print_str += "%s %s (%d/%d):" % (link_marker[tau==0],
|
|
839
|
+
self.var_names[j], count + 1, n_parents)
|
|
840
|
+
print_str += indent + "with conds_y = %s" % (condy_str)
|
|
841
|
+
print_str += indent + "with conds_x = %s" % (condx_str)
|
|
842
|
+
print(print_str)
|
|
843
|
+
|
|
844
|
+
def _print_pcmciplus_conditions(self, lagged_parents, i, j, abstau,
|
|
845
|
+
max_conds_py, max_conds_px,
|
|
846
|
+
max_conds_px_lagged):
|
|
847
|
+
"""Print information about the conditions for PCMCIplus.
|
|
848
|
+
|
|
849
|
+
Parameters
|
|
850
|
+
----------
|
|
851
|
+
lagged_parents : dictionary of lists
|
|
852
|
+
Dictionary of lagged parents for each node.
|
|
853
|
+
j : int
|
|
854
|
+
Current node.
|
|
855
|
+
i : int
|
|
856
|
+
Parent node.
|
|
857
|
+
abstau : int
|
|
858
|
+
Parent time delay.
|
|
859
|
+
max_conds_py : int
|
|
860
|
+
Max number of parents for node j.
|
|
861
|
+
max_conds_px : int
|
|
862
|
+
Max number of parents for lagged node i.
|
|
863
|
+
max_conds_px_lagged : int
|
|
864
|
+
Maximum number of lagged conditions of X when X is lagged in MCI
|
|
865
|
+
tests. If None is passed, this number is equal to max_conds_px.
|
|
866
|
+
"""
|
|
867
|
+
conds_y = lagged_parents[j][:max_conds_py]
|
|
868
|
+
conds_y_no_i = [node for node in conds_y if node != (i, -abstau)]
|
|
869
|
+
if abstau == 0:
|
|
870
|
+
conds_x = lagged_parents[i][:max_conds_px]
|
|
871
|
+
else:
|
|
872
|
+
if max_conds_px_lagged is None:
|
|
873
|
+
conds_x = lagged_parents[i][:max_conds_px]
|
|
874
|
+
else:
|
|
875
|
+
conds_x = lagged_parents[i][:max_conds_px_lagged]
|
|
876
|
+
|
|
877
|
+
# Shift the conditions for X by tau
|
|
878
|
+
conds_x_lagged = [(k, -abstau + k_tau) for k, k_tau in conds_x]
|
|
879
|
+
condy_str = self._mci_condition_to_string(conds_y_no_i)
|
|
880
|
+
condx_str = self._mci_condition_to_string(conds_x_lagged)
|
|
881
|
+
print_str = " with conds_y = %s" % (condy_str)
|
|
882
|
+
print_str += "\n with conds_x = %s" % (condx_str)
|
|
883
|
+
print(print_str)
|
|
884
|
+
|
|
885
|
+
def _get_int_parents(self, parents):
|
|
886
|
+
"""Get the input parents dictionary.
|
|
887
|
+
|
|
888
|
+
Parameters
|
|
889
|
+
----------
|
|
890
|
+
parents : dict or None
|
|
891
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
|
|
892
|
+
specifying the conditions for each variable. If None is
|
|
893
|
+
passed, no conditions are used.
|
|
894
|
+
|
|
895
|
+
Returns
|
|
896
|
+
-------
|
|
897
|
+
int_parents : defaultdict of lists
|
|
898
|
+
Internal copy of parents, respecting default options
|
|
899
|
+
"""
|
|
900
|
+
int_parents = deepcopy(parents)
|
|
901
|
+
if int_parents is None:
|
|
902
|
+
int_parents = defaultdict(list)
|
|
903
|
+
else:
|
|
904
|
+
int_parents = defaultdict(list, int_parents)
|
|
905
|
+
return int_parents
|
|
906
|
+
|
|
907
|
+
def _iter_indep_conds(self,
|
|
908
|
+
parents,
|
|
909
|
+
_int_link_assumptions,
|
|
910
|
+
max_conds_py,
|
|
911
|
+
max_conds_px):
|
|
912
|
+
"""Iterate through the conditions dictated by the arguments, yielding
|
|
913
|
+
the needed arguments for conditional independence functions.
|
|
914
|
+
|
|
915
|
+
Parameters
|
|
916
|
+
----------
|
|
917
|
+
parents : dict
|
|
918
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
|
|
919
|
+
specifying the conditions for each variable.
|
|
920
|
+
_int_link_assumptions : dict
|
|
921
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
922
|
+
assumptions about links. This initializes the graph with entries
|
|
923
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
924
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
925
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
926
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
927
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
928
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
929
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
930
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
931
|
+
if link_assumptions is not None, then all links have to be specified
|
|
932
|
+
or the links are assumed absent.
|
|
933
|
+
max_conds_py : int
|
|
934
|
+
Maximum number of conditions of Y to use.
|
|
935
|
+
max_conds_px : int
|
|
936
|
+
Maximum number of conditions of Z to use.
|
|
937
|
+
|
|
938
|
+
Yields
|
|
939
|
+
------
|
|
940
|
+
i, j, tau, Z : list of tuples
|
|
941
|
+
(i, tau) is the parent node, (j, 0) is the current node, and Z is of
|
|
942
|
+
the form [(var, tau + tau')] and specifies the condition to test
|
|
943
|
+
"""
|
|
944
|
+
# Loop over the selected variables
|
|
945
|
+
for j in range(self.N):
|
|
946
|
+
# Get the conditions for node j
|
|
947
|
+
conds_y = parents[j][:max_conds_py]
|
|
948
|
+
# Create a parent list from links seperated in time and by node
|
|
949
|
+
# parent_list = [(i, tau) for i, tau in _int_link_assumptions[j]
|
|
950
|
+
# if (i, tau) != (j, 0)]
|
|
951
|
+
parent_list = []
|
|
952
|
+
for itau in _int_link_assumptions[j]:
|
|
953
|
+
link_type = _int_link_assumptions[j][itau]
|
|
954
|
+
if itau != (j, 0) and link_type not in ['<--', '<?-']:
|
|
955
|
+
parent_list.append(itau)
|
|
956
|
+
# Iterate through parents (except those in conditions)
|
|
957
|
+
for cnt, (i, tau) in enumerate(parent_list):
|
|
958
|
+
# Get the conditions for node i
|
|
959
|
+
conds_x = parents[i][:max_conds_px]
|
|
960
|
+
# Shift the conditions for X by tau
|
|
961
|
+
conds_x_lagged = [(k, tau + k_tau) for k, k_tau in conds_x]
|
|
962
|
+
# Print information about the mci conditions if requested
|
|
963
|
+
if self.verbosity > 1:
|
|
964
|
+
self._print_mci_conditions(conds_y, conds_x_lagged, j, i,
|
|
965
|
+
tau, cnt, len(parent_list))
|
|
966
|
+
# Construct lists of tuples for estimating
|
|
967
|
+
# I(X_t-tau; Y_t | Z^Y_t, Z^X_t-tau)
|
|
968
|
+
# with conditions for X shifted by tau
|
|
969
|
+
Z = [node for node in conds_y if node != (i, tau)]
|
|
970
|
+
# Remove overlapped nodes between conds_x_lagged and conds_y
|
|
971
|
+
Z += [node for node in conds_x_lagged if node not in Z]
|
|
972
|
+
# Yield these list
|
|
973
|
+
yield j, i, tau, Z
|
|
974
|
+
|
|
975
|
+
def _run_mci_or_variants(self,
|
|
976
|
+
selected_links=None,
|
|
977
|
+
link_assumptions=None,
|
|
978
|
+
tau_min=0,
|
|
979
|
+
tau_max=1,
|
|
980
|
+
parents=None,
|
|
981
|
+
max_conds_py=None,
|
|
982
|
+
max_conds_px=None,
|
|
983
|
+
val_only=False,
|
|
984
|
+
alpha_level=0.05,
|
|
985
|
+
fdr_method='none'):
|
|
986
|
+
"""Base function for MCI method and variants.
|
|
987
|
+
|
|
988
|
+
Returns the matrices of test statistic values, (optionally corrected)
|
|
989
|
+
p-values, and (optionally) confidence intervals. Also (new in 4.3)
|
|
990
|
+
returns graph based on alpha_level (and optional FDR-correction).
|
|
991
|
+
|
|
992
|
+
Parameters
|
|
993
|
+
----------
|
|
994
|
+
selected_links : dict or None
|
|
995
|
+
Deprecated, replaced by link_assumptions
|
|
996
|
+
link_assumptions : dict
|
|
997
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
998
|
+
assumptions about links. This initializes the graph with entries
|
|
999
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
1000
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
1001
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
1002
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
1003
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
1004
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
1005
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
1006
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
1007
|
+
if link_assumptions is not None, then all links have to be specified
|
|
1008
|
+
or the links are assumed absent.
|
|
1009
|
+
tau_min : int, default: 0
|
|
1010
|
+
Minimum time lag to test. Note that zero-lags are undirected.
|
|
1011
|
+
tau_max : int, default: 1
|
|
1012
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
1013
|
+
parents : dict or None
|
|
1014
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
|
|
1015
|
+
specifying the conditions for each variable. If None is
|
|
1016
|
+
passed, no conditions are used.
|
|
1017
|
+
max_conds_py : int or None
|
|
1018
|
+
Maximum number of conditions of Y to use. If None is passed, this
|
|
1019
|
+
number is unrestricted.
|
|
1020
|
+
max_conds_px : int or None
|
|
1021
|
+
Maximum number of conditions of Z to use. If None is passed, this
|
|
1022
|
+
number is unrestricted.
|
|
1023
|
+
val_only : bool, default: False
|
|
1024
|
+
Option to only compute dependencies and not p-values.
|
|
1025
|
+
alpha_level : float, optional (default: 0.05)
|
|
1026
|
+
Significance level at which the p_matrix is thresholded to
|
|
1027
|
+
get graph.
|
|
1028
|
+
fdr_method : str, optional (default: 'none')
|
|
1029
|
+
Correction method, currently implemented is Benjamini-Hochberg
|
|
1030
|
+
False Discovery Rate method ('fdr_bh').
|
|
1031
|
+
|
|
1032
|
+
Returns
|
|
1033
|
+
-------
|
|
1034
|
+
graph : array of shape [N, N, tau_max+1]
|
|
1035
|
+
Causal graph, see description above for interpretation.
|
|
1036
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
1037
|
+
Estimated matrix of test statistic values.
|
|
1038
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
1039
|
+
Estimated matrix of p-values, optionally adjusted if fdr_method is
|
|
1040
|
+
not 'none'.
|
|
1041
|
+
conf_matrix : array of shape [N, N, tau_max+1,2]
|
|
1042
|
+
Estimated matrix of confidence intervals of test statistic values.
|
|
1043
|
+
Only computed if set in cond_ind_test, where also the percentiles
|
|
1044
|
+
are set.
|
|
1045
|
+
"""
|
|
1046
|
+
if selected_links is not None:
|
|
1047
|
+
raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
|
|
1048
|
+
|
|
1049
|
+
# Check the limits on tau
|
|
1050
|
+
self._check_tau_limits(tau_min, tau_max)
|
|
1051
|
+
# Set the selected links
|
|
1052
|
+
# _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max)
|
|
1053
|
+
_int_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max)
|
|
1054
|
+
|
|
1055
|
+
# Set the maximum condition dimension for Y and X
|
|
1056
|
+
max_conds_py = self._set_max_condition_dim(max_conds_py,
|
|
1057
|
+
tau_min, tau_max)
|
|
1058
|
+
max_conds_px = self._set_max_condition_dim(max_conds_px,
|
|
1059
|
+
tau_min, tau_max)
|
|
1060
|
+
# Get the parents that will be checked
|
|
1061
|
+
_int_parents = self._get_int_parents(parents)
|
|
1062
|
+
# Initialize the return values
|
|
1063
|
+
val_matrix = np.zeros((self.N, self.N, tau_max + 1))
|
|
1064
|
+
p_matrix = np.ones((self.N, self.N, tau_max + 1))
|
|
1065
|
+
# Initialize the optional return of the confidance matrix
|
|
1066
|
+
conf_matrix = None
|
|
1067
|
+
if self.cond_ind_test.confidence is not None:
|
|
1068
|
+
conf_matrix = np.zeros((self.N, self.N, tau_max + 1, 2))
|
|
1069
|
+
|
|
1070
|
+
# Get the conditions as implied by the input arguments
|
|
1071
|
+
for j, i, tau, Z in self._iter_indep_conds(_int_parents,
|
|
1072
|
+
_int_link_assumptions,
|
|
1073
|
+
max_conds_py,
|
|
1074
|
+
max_conds_px):
|
|
1075
|
+
# Set X and Y (for clarity of code)
|
|
1076
|
+
X = [(i, tau)]
|
|
1077
|
+
Y = [(j, 0)]
|
|
1078
|
+
|
|
1079
|
+
if val_only is False:
|
|
1080
|
+
# Run the independence tests and record the results
|
|
1081
|
+
if ((i, -abs(tau)) in _int_link_assumptions[j]
|
|
1082
|
+
and _int_link_assumptions[j][(i, -abs(tau))] in ['-->', 'o-o']):
|
|
1083
|
+
val = 1.
|
|
1084
|
+
pval = 0.
|
|
1085
|
+
else:
|
|
1086
|
+
val, pval, _ = self.cond_ind_test.run_test(X, Y, Z=Z,
|
|
1087
|
+
tau_max=tau_max,
|
|
1088
|
+
alpha_or_thres=alpha_level,
|
|
1089
|
+
)
|
|
1090
|
+
val_matrix[i, j, abs(tau)] = val
|
|
1091
|
+
p_matrix[i, j, abs(tau)] = pval
|
|
1092
|
+
else:
|
|
1093
|
+
val = self.cond_ind_test.get_measure(X, Y, Z=Z, tau_max=tau_max)
|
|
1094
|
+
val_matrix[i, j, abs(tau)] = val
|
|
1095
|
+
|
|
1096
|
+
# Get the confidence value, returns None if cond_ind_test.confidence
|
|
1097
|
+
# is False
|
|
1098
|
+
conf = self.cond_ind_test.get_confidence(X, Y, Z=Z, tau_max=tau_max)
|
|
1099
|
+
# Record the value if the conditional independence requires it
|
|
1100
|
+
if self.cond_ind_test.confidence:
|
|
1101
|
+
conf_matrix[i, j, abs(tau)] = conf
|
|
1102
|
+
|
|
1103
|
+
if val_only:
|
|
1104
|
+
results = {'val_matrix':val_matrix,
|
|
1105
|
+
'conf_matrix':conf_matrix}
|
|
1106
|
+
self.results = results
|
|
1107
|
+
return results
|
|
1108
|
+
|
|
1109
|
+
# Correct the p_matrix if there is a fdr_method
|
|
1110
|
+
if fdr_method != 'none':
|
|
1111
|
+
if self.cond_ind_test.significance == 'fixed_thres':
|
|
1112
|
+
raise ValueError("FDR-correction not compatible with significance == 'fixed_thres'")
|
|
1113
|
+
p_matrix = self.get_corrected_pvalues(p_matrix=p_matrix, tau_min=tau_min,
|
|
1114
|
+
tau_max=tau_max,
|
|
1115
|
+
link_assumptions=_int_link_assumptions,
|
|
1116
|
+
fdr_method=fdr_method)
|
|
1117
|
+
|
|
1118
|
+
# Threshold p_matrix to get graph (or val_matrix for significance == 'fixed_thres')
|
|
1119
|
+
if self.cond_ind_test.significance == 'fixed_thres':
|
|
1120
|
+
if self.cond_ind_test.two_sided:
|
|
1121
|
+
final_graph = np.abs(val_matrix) >= np.abs(alpha_level)
|
|
1122
|
+
else:
|
|
1123
|
+
final_graph = val_matrix >= alpha_level
|
|
1124
|
+
else:
|
|
1125
|
+
final_graph = p_matrix <= alpha_level
|
|
1126
|
+
|
|
1127
|
+
# Convert to string graph representation
|
|
1128
|
+
graph = self.convert_to_string_graph(final_graph)
|
|
1129
|
+
|
|
1130
|
+
# Symmetrize p_matrix and val_matrix
|
|
1131
|
+
symmetrized_results = self.symmetrize_p_and_val_matrix(
|
|
1132
|
+
p_matrix=p_matrix,
|
|
1133
|
+
val_matrix=val_matrix,
|
|
1134
|
+
link_assumptions=_int_link_assumptions,
|
|
1135
|
+
conf_matrix=conf_matrix)
|
|
1136
|
+
|
|
1137
|
+
if self.verbosity > 0:
|
|
1138
|
+
self.print_significant_links(
|
|
1139
|
+
graph = graph,
|
|
1140
|
+
p_matrix = symmetrized_results['p_matrix'],
|
|
1141
|
+
val_matrix = symmetrized_results['val_matrix'],
|
|
1142
|
+
conf_matrix = symmetrized_results['conf_matrix'],
|
|
1143
|
+
alpha_level = alpha_level)
|
|
1144
|
+
|
|
1145
|
+
# Return the values as a dictionary and store in class
|
|
1146
|
+
results = {
|
|
1147
|
+
'graph': graph,
|
|
1148
|
+
'p_matrix': symmetrized_results['p_matrix'],
|
|
1149
|
+
'val_matrix': symmetrized_results['val_matrix'],
|
|
1150
|
+
'conf_matrix': symmetrized_results['conf_matrix'],
|
|
1151
|
+
}
|
|
1152
|
+
self.results = results
|
|
1153
|
+
return results
|
|
1154
|
+
|
|
1155
|
+
def run_mci(self,
|
|
1156
|
+
selected_links=None,
|
|
1157
|
+
link_assumptions=None,
|
|
1158
|
+
tau_min=0,
|
|
1159
|
+
tau_max=1,
|
|
1160
|
+
parents=None,
|
|
1161
|
+
max_conds_py=None,
|
|
1162
|
+
max_conds_px=None,
|
|
1163
|
+
val_only=False,
|
|
1164
|
+
alpha_level=0.05,
|
|
1165
|
+
fdr_method='none'):
|
|
1166
|
+
"""MCI conditional independence tests.
|
|
1167
|
+
|
|
1168
|
+
Implements the MCI test (Algorithm 2 in [1]_).
|
|
1169
|
+
|
|
1170
|
+
Returns the matrices of test statistic values, (optionally corrected)
|
|
1171
|
+
p-values, and (optionally) confidence intervals. Also (new in 4.3)
|
|
1172
|
+
returns graph based on alpha_level (and optional FDR-correction).
|
|
1173
|
+
|
|
1174
|
+
Parameters
|
|
1175
|
+
----------
|
|
1176
|
+
selected_links : dict or None
|
|
1177
|
+
Deprecated, replaced by link_assumptions
|
|
1178
|
+
link_assumptions : dict
|
|
1179
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
1180
|
+
assumptions about links. This initializes the graph with entries
|
|
1181
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
1182
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
1183
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
1184
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
1185
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
1186
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
1187
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
1188
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
1189
|
+
if link_assumptions is not None, then all links have to be specified
|
|
1190
|
+
or the links are assumed absent.
|
|
1191
|
+
tau_min : int, default: 0
|
|
1192
|
+
Minimum time lag to test. Note that zero-lags are undirected.
|
|
1193
|
+
tau_max : int, default: 1
|
|
1194
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
1195
|
+
parents : dict or None
|
|
1196
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
|
|
1197
|
+
specifying the conditions for each variable. If None is
|
|
1198
|
+
passed, no conditions are used.
|
|
1199
|
+
max_conds_py : int or None
|
|
1200
|
+
Maximum number of conditions of Y to use. If None is passed, this
|
|
1201
|
+
number is unrestricted.
|
|
1202
|
+
max_conds_px : int or None
|
|
1203
|
+
Maximum number of conditions of Z to use. If None is passed, this
|
|
1204
|
+
number is unrestricted.
|
|
1205
|
+
val_only : bool, default: False
|
|
1206
|
+
Option to only compute dependencies and not p-values.
|
|
1207
|
+
alpha_level : float, optional (default: 0.05)
|
|
1208
|
+
Significance level at which the p_matrix is thresholded to
|
|
1209
|
+
get graph.
|
|
1210
|
+
fdr_method : str, optional (default: 'none')
|
|
1211
|
+
Correction method, currently implemented is Benjamini-Hochberg
|
|
1212
|
+
False Discovery Rate method ('fdr_bh').
|
|
1213
|
+
|
|
1214
|
+
Returns
|
|
1215
|
+
-------
|
|
1216
|
+
graph : array of shape [N, N, tau_max+1]
|
|
1217
|
+
Causal graph, see description above for interpretation.
|
|
1218
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
1219
|
+
Estimated matrix of test statistic values.
|
|
1220
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
1221
|
+
Estimated matrix of p-values, optionally adjusted if fdr_method is
|
|
1222
|
+
not 'none'.
|
|
1223
|
+
conf_matrix : array of shape [N, N, tau_max+1,2]
|
|
1224
|
+
Estimated matrix of confidence intervals of test statistic values.
|
|
1225
|
+
Only computed if set in cond_ind_test, where also the percentiles
|
|
1226
|
+
are set.
|
|
1227
|
+
"""
|
|
1228
|
+
|
|
1229
|
+
if selected_links is not None:
|
|
1230
|
+
raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
|
|
1231
|
+
|
|
1232
|
+
|
|
1233
|
+
if self.verbosity > 0:
|
|
1234
|
+
print("\n##\n## Step 2: MCI algorithm\n##"
|
|
1235
|
+
"\n\nParameters:")
|
|
1236
|
+
print("\nindependence test = %s" % self.cond_ind_test.measure
|
|
1237
|
+
+ "\ntau_min = %d" % tau_min
|
|
1238
|
+
+ "\ntau_max = %d" % tau_max
|
|
1239
|
+
+ "\nmax_conds_py = %s" % max_conds_py
|
|
1240
|
+
+ "\nmax_conds_px = %s" % max_conds_px)
|
|
1241
|
+
|
|
1242
|
+
return self._run_mci_or_variants(
|
|
1243
|
+
link_assumptions=link_assumptions,
|
|
1244
|
+
tau_min=tau_min,
|
|
1245
|
+
tau_max=tau_max,
|
|
1246
|
+
parents=parents,
|
|
1247
|
+
max_conds_py=max_conds_py,
|
|
1248
|
+
max_conds_px=max_conds_px,
|
|
1249
|
+
val_only=val_only,
|
|
1250
|
+
alpha_level=alpha_level,
|
|
1251
|
+
fdr_method=fdr_method)
|
|
1252
|
+
|
|
1253
|
+
def get_lagged_dependencies(self,
|
|
1254
|
+
selected_links=None,
|
|
1255
|
+
link_assumptions=None,
|
|
1256
|
+
tau_min=0,
|
|
1257
|
+
tau_max=1,
|
|
1258
|
+
val_only=False,
|
|
1259
|
+
alpha_level=0.05,
|
|
1260
|
+
fdr_method='none'):
|
|
1261
|
+
"""Unconditional lagged independence tests.
|
|
1262
|
+
|
|
1263
|
+
Implements the unconditional lagged independence test (see [ 1]_).
|
|
1264
|
+
|
|
1265
|
+
Returns the matrices of test statistic values, (optionally corrected)
|
|
1266
|
+
p-values, and (optionally) confidence intervals. Also (new in 4.3)
|
|
1267
|
+
returns graph based on alpha_level (and optional FDR-correction).
|
|
1268
|
+
|
|
1269
|
+
Parameters
|
|
1270
|
+
----------
|
|
1271
|
+
selected_links : dict or None
|
|
1272
|
+
Deprecated, replaced by link_assumptions
|
|
1273
|
+
link_assumptions : dict
|
|
1274
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
1275
|
+
assumptions about links. This initializes the graph with entries
|
|
1276
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
1277
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
1278
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
1279
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
1280
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
1281
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
1282
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
1283
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
1284
|
+
if link_assumptions is not None, then all links have to be specified
|
|
1285
|
+
or the links are assumed absent.
|
|
1286
|
+
tau_min : int, default: 0
|
|
1287
|
+
Minimum time lag to test. Note that zero-lags are undirected.
|
|
1288
|
+
tau_max : int, default: 1
|
|
1289
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
1290
|
+
val_only : bool, default: False
|
|
1291
|
+
Option to only compute dependencies and not p-values.
|
|
1292
|
+
alpha_level : float, optional (default: 0.05)
|
|
1293
|
+
Significance level at which the p_matrix is thresholded to
|
|
1294
|
+
get graph.
|
|
1295
|
+
fdr_method : str, optional (default: 'none')
|
|
1296
|
+
Correction method, currently implemented is Benjamini-Hochberg
|
|
1297
|
+
False Discovery Rate method ('fdr_bh').
|
|
1298
|
+
|
|
1299
|
+
Returns
|
|
1300
|
+
-------
|
|
1301
|
+
graph : array of shape [N, N, tau_max+1]
|
|
1302
|
+
Causal graph, see description above for interpretation.
|
|
1303
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
1304
|
+
Estimated matrix of test statistic values.
|
|
1305
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
1306
|
+
Estimated matrix of p-values, optionally adjusted if fdr_method is
|
|
1307
|
+
not 'none'.
|
|
1308
|
+
conf_matrix : array of shape [N, N, tau_max+1,2]
|
|
1309
|
+
Estimated matrix of confidence intervals of test statistic values.
|
|
1310
|
+
Only computed if set in cond_ind_test, where also the percentiles
|
|
1311
|
+
are set.
|
|
1312
|
+
"""
|
|
1313
|
+
|
|
1314
|
+
if selected_links is not None:
|
|
1315
|
+
raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
|
|
1316
|
+
|
|
1317
|
+
if self.verbosity > 0:
|
|
1318
|
+
print("\n##\n## Estimating lagged dependencies \n##"
|
|
1319
|
+
"\n\nParameters:")
|
|
1320
|
+
print("\nindependence test = %s" % self.cond_ind_test.measure
|
|
1321
|
+
+ "\ntau_min = %d" % tau_min
|
|
1322
|
+
+ "\ntau_max = %d" % tau_max)
|
|
1323
|
+
|
|
1324
|
+
return self._run_mci_or_variants(
|
|
1325
|
+
link_assumptions=link_assumptions,
|
|
1326
|
+
tau_min=tau_min,
|
|
1327
|
+
tau_max=tau_max,
|
|
1328
|
+
parents=None,
|
|
1329
|
+
max_conds_py=0,
|
|
1330
|
+
max_conds_px=0,
|
|
1331
|
+
val_only=val_only,
|
|
1332
|
+
alpha_level=alpha_level,
|
|
1333
|
+
fdr_method=fdr_method)
|
|
1334
|
+
|
|
1335
|
+
def run_fullci(self,
|
|
1336
|
+
selected_links=None,
|
|
1337
|
+
link_assumptions=None,
|
|
1338
|
+
tau_min=0,
|
|
1339
|
+
tau_max=1,
|
|
1340
|
+
val_only=False,
|
|
1341
|
+
alpha_level=0.05,
|
|
1342
|
+
fdr_method='none'):
|
|
1343
|
+
"""FullCI conditional independence tests.
|
|
1344
|
+
|
|
1345
|
+
Implements the FullCI test (see [1]_).
|
|
1346
|
+
|
|
1347
|
+
Returns the matrices of test statistic values, (optionally corrected)
|
|
1348
|
+
p-values, and (optionally) confidence intervals. Also (new in 4.3)
|
|
1349
|
+
returns graph based on alpha_level (and optional FDR-correction).
|
|
1350
|
+
|
|
1351
|
+
Parameters
|
|
1352
|
+
----------
|
|
1353
|
+
selected_links : dict or None
|
|
1354
|
+
Deprecated, replaced by link_assumptions
|
|
1355
|
+
link_assumptions : dict
|
|
1356
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
1357
|
+
assumptions about links. This initializes the graph with entries
|
|
1358
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
1359
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
1360
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
1361
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
1362
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
1363
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
1364
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
1365
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
1366
|
+
if link_assumptions is not None, then all links have to be specified
|
|
1367
|
+
or the links are assumed absent.
|
|
1368
|
+
tau_min : int, default: 0
|
|
1369
|
+
Minimum time lag to test. Note that zero-lags are undirected.
|
|
1370
|
+
tau_max : int, default: 1
|
|
1371
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
1372
|
+
val_only : bool, default: False
|
|
1373
|
+
Option to only compute dependencies and not p-values.
|
|
1374
|
+
alpha_level : float, optional (default: 0.05)
|
|
1375
|
+
Significance level at which the p_matrix is thresholded to
|
|
1376
|
+
get graph.
|
|
1377
|
+
fdr_method : str, optional (default: 'none')
|
|
1378
|
+
Correction method, currently implemented is Benjamini-Hochberg
|
|
1379
|
+
False Discovery Rate method ('fdr_bh').
|
|
1380
|
+
|
|
1381
|
+
Returns
|
|
1382
|
+
-------
|
|
1383
|
+
graph : array of shape [N, N, tau_max+1]
|
|
1384
|
+
Causal graph, see description above for interpretation.
|
|
1385
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
1386
|
+
Estimated matrix of test statistic values.
|
|
1387
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
1388
|
+
Estimated matrix of p-values, optionally adjusted if fdr_method is
|
|
1389
|
+
not 'none'.
|
|
1390
|
+
conf_matrix : array of shape [N, N, tau_max+1,2]
|
|
1391
|
+
Estimated matrix of confidence intervals of test statistic values.
|
|
1392
|
+
Only computed if set in cond_ind_test, where also the percentiles
|
|
1393
|
+
are set.
|
|
1394
|
+
"""
|
|
1395
|
+
|
|
1396
|
+
if selected_links is not None:
|
|
1397
|
+
raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
|
|
1398
|
+
|
|
1399
|
+
|
|
1400
|
+
if self.verbosity > 0:
|
|
1401
|
+
print("\n##\n## Running Tigramite FullCI algorithm\n##"
|
|
1402
|
+
"\n\nParameters:")
|
|
1403
|
+
print("\nindependence test = %s" % self.cond_ind_test.measure
|
|
1404
|
+
+ "\ntau_min = %d" % tau_min
|
|
1405
|
+
+ "\ntau_max = %d" % tau_max)
|
|
1406
|
+
|
|
1407
|
+
full_past = dict([(j, [(i, -tau)
|
|
1408
|
+
for i in range(self.N)
|
|
1409
|
+
for tau in range(max(1, tau_min), tau_max + 1)])
|
|
1410
|
+
for j in range(self.N)])
|
|
1411
|
+
|
|
1412
|
+
return self._run_mci_or_variants(
|
|
1413
|
+
link_assumptions=link_assumptions,
|
|
1414
|
+
tau_min=tau_min,
|
|
1415
|
+
tau_max=tau_max,
|
|
1416
|
+
parents=full_past,
|
|
1417
|
+
max_conds_py=None,
|
|
1418
|
+
max_conds_px=0,
|
|
1419
|
+
val_only=val_only,
|
|
1420
|
+
alpha_level=alpha_level,
|
|
1421
|
+
fdr_method=fdr_method)
|
|
1422
|
+
|
|
1423
|
+
def run_bivci(self,
|
|
1424
|
+
selected_links=None,
|
|
1425
|
+
link_assumptions=None,
|
|
1426
|
+
tau_min=0,
|
|
1427
|
+
tau_max=1,
|
|
1428
|
+
val_only=False,
|
|
1429
|
+
alpha_level=0.05,
|
|
1430
|
+
fdr_method='none'):
|
|
1431
|
+
"""BivCI conditional independence tests.
|
|
1432
|
+
|
|
1433
|
+
Implements the BivCI test (see [1]_).
|
|
1434
|
+
|
|
1435
|
+
Returns the matrices of test statistic values, (optionally corrected)
|
|
1436
|
+
p-values, and (optionally) confidence intervals. Also (new in 4.3)
|
|
1437
|
+
returns graph based on alpha_level (and optional FDR-correction).
|
|
1438
|
+
|
|
1439
|
+
Parameters
|
|
1440
|
+
----------
|
|
1441
|
+
selected_links : dict or None
|
|
1442
|
+
Deprecated, replaced by link_assumptions
|
|
1443
|
+
link_assumptions : dict
|
|
1444
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
1445
|
+
assumptions about links. This initializes the graph with entries
|
|
1446
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
1447
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
1448
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
1449
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
1450
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
1451
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
1452
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
1453
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
1454
|
+
if link_assumptions is not None, then all links have to be specified
|
|
1455
|
+
or the links are assumed absent.
|
|
1456
|
+
tau_min : int, default: 0
|
|
1457
|
+
Minimum time lag to test. Note that zero-lags are undirected.
|
|
1458
|
+
tau_max : int, default: 1
|
|
1459
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
1460
|
+
val_only : bool, default: False
|
|
1461
|
+
Option to only compute dependencies and not p-values.
|
|
1462
|
+
alpha_level : float, optional (default: 0.05)
|
|
1463
|
+
Significance level at which the p_matrix is thresholded to
|
|
1464
|
+
get graph.
|
|
1465
|
+
fdr_method : str, optional (default: 'fdr_bh')
|
|
1466
|
+
Correction method, currently implemented is Benjamini-Hochberg
|
|
1467
|
+
False Discovery Rate method.
|
|
1468
|
+
|
|
1469
|
+
Returns
|
|
1470
|
+
-------
|
|
1471
|
+
graph : array of shape [N, N, tau_max+1]
|
|
1472
|
+
Causal graph, see description above for interpretation.
|
|
1473
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
1474
|
+
Estimated matrix of test statistic values.
|
|
1475
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
1476
|
+
Estimated matrix of p-values, optionally adjusted if fdr_method is
|
|
1477
|
+
not 'none'.
|
|
1478
|
+
conf_matrix : array of shape [N, N, tau_max+1,2]
|
|
1479
|
+
Estimated matrix of confidence intervals of test statistic values.
|
|
1480
|
+
Only computed if set in cond_ind_test, where also the percentiles
|
|
1481
|
+
are set.
|
|
1482
|
+
"""
|
|
1483
|
+
|
|
1484
|
+
if selected_links is not None:
|
|
1485
|
+
raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
|
|
1486
|
+
|
|
1487
|
+
if self.verbosity > 0:
|
|
1488
|
+
print("\n##\n## Running Tigramite BivCI algorithm\n##"
|
|
1489
|
+
"\n\nParameters:")
|
|
1490
|
+
print("\nindependence test = %s" % self.cond_ind_test.measure
|
|
1491
|
+
+ "\ntau_min = %d" % tau_min
|
|
1492
|
+
+ "\ntau_max = %d" % tau_max)
|
|
1493
|
+
|
|
1494
|
+
auto_past = dict([(j, [(j, -tau)
|
|
1495
|
+
for tau in range(max(1, tau_min), tau_max + 1)])
|
|
1496
|
+
for j in range(self.N)])
|
|
1497
|
+
|
|
1498
|
+
return self._run_mci_or_variants(
|
|
1499
|
+
link_assumptions=link_assumptions,
|
|
1500
|
+
tau_min=tau_min,
|
|
1501
|
+
tau_max=tau_max,
|
|
1502
|
+
parents=auto_past,
|
|
1503
|
+
max_conds_py=None,
|
|
1504
|
+
max_conds_px=0,
|
|
1505
|
+
val_only=val_only,
|
|
1506
|
+
alpha_level=alpha_level,
|
|
1507
|
+
fdr_method=fdr_method)
|
|
1508
|
+
|
|
1509
|
+
def get_graph_from_pmatrix(self, p_matrix, alpha_level,
|
|
1510
|
+
tau_min, tau_max, link_assumptions=None):
|
|
1511
|
+
"""Construct graph from thresholding the p_matrix at an alpha-level.
|
|
1512
|
+
|
|
1513
|
+
Allows to take into account link_assumptions.
|
|
1514
|
+
|
|
1515
|
+
Parameters
|
|
1516
|
+
----------
|
|
1517
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
1518
|
+
Estimated matrix of p-values, optionally adjusted if fdr_method is
|
|
1519
|
+
not 'none'.
|
|
1520
|
+
alpha_level : float, optional (default: 0.05)
|
|
1521
|
+
Significance level at which the p_matrix is thresholded to
|
|
1522
|
+
get graph.
|
|
1523
|
+
tau_mix : int
|
|
1524
|
+
Minimum time delay to test.
|
|
1525
|
+
tau_max : int
|
|
1526
|
+
Maximum time delay to test.
|
|
1527
|
+
link_assumptions : dict or None
|
|
1528
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
1529
|
+
assumptions about links. This initializes the graph with entries
|
|
1530
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
1531
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
1532
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
1533
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
1534
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
1535
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
1536
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
1537
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
1538
|
+
if link_assumptions is not None, then all links have to be specified
|
|
1539
|
+
or the links are assumed absent.
|
|
1540
|
+
Returns
|
|
1541
|
+
-------
|
|
1542
|
+
graph : array of shape [N, N, tau_max+1]
|
|
1543
|
+
Causal graph, see description above for interpretation.
|
|
1544
|
+
"""
|
|
1545
|
+
|
|
1546
|
+
# _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max)
|
|
1547
|
+
_int_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max)
|
|
1548
|
+
|
|
1549
|
+
if link_assumptions != None:
|
|
1550
|
+
# Create a mask for these values
|
|
1551
|
+
mask = np.zeros((self.N, self.N, tau_max + 1), dtype='bool')
|
|
1552
|
+
# for node1, links_ in _int_sel_links.items():
|
|
1553
|
+
# for node2, lag in links_:
|
|
1554
|
+
# mask[node2, node1, abs(lag)] = True
|
|
1555
|
+
for j, links_ in _int_link_assumptions.items():
|
|
1556
|
+
for i, lag in links_:
|
|
1557
|
+
if _int_link_assumptions[j][(i, lag)] not in ["<--", "<?-"]:
|
|
1558
|
+
mask[i, j, abs(lag)] = True
|
|
1559
|
+
|
|
1560
|
+
else:
|
|
1561
|
+
# Create a mask for these values
|
|
1562
|
+
mask = np.ones((self.N, self.N, tau_max + 1), dtype='bool')
|
|
1563
|
+
|
|
1564
|
+
# Set all p-values of absent links to 1.
|
|
1565
|
+
p_matrix[mask==False] == 1.
|
|
1566
|
+
|
|
1567
|
+
# Threshold p_matrix to get graph
|
|
1568
|
+
graph_bool = p_matrix <= alpha_level
|
|
1569
|
+
|
|
1570
|
+
# Convert to string graph representation
|
|
1571
|
+
graph = self.convert_to_string_graph(graph_bool)
|
|
1572
|
+
|
|
1573
|
+
# Return the graph
|
|
1574
|
+
return graph
|
|
1575
|
+
|
|
1576
|
+
def return_parents_dict(self, graph,
|
|
1577
|
+
val_matrix,
|
|
1578
|
+
include_lagzero_parents=False):
|
|
1579
|
+
"""Returns dictionary of parents sorted by val_matrix.
|
|
1580
|
+
|
|
1581
|
+
If parents are unclear (edgemarks with 'o' or 'x', or middle mark '?'),
|
|
1582
|
+
then no parent is returned.
|
|
1583
|
+
|
|
1584
|
+
Parameters
|
|
1585
|
+
----------
|
|
1586
|
+
graph : array of shape [N, N, tau_max+1]
|
|
1587
|
+
Causal graph, see description above for interpretation.
|
|
1588
|
+
val_matrix : array-like
|
|
1589
|
+
Matrix of test statistic values. Must be of shape (N, N, tau_max +
|
|
1590
|
+
1).
|
|
1591
|
+
include_lagzero_parents : bool (default: False)
|
|
1592
|
+
Whether the dictionary should also return parents at lag
|
|
1593
|
+
zero.
|
|
1594
|
+
|
|
1595
|
+
Returns
|
|
1596
|
+
-------
|
|
1597
|
+
parents_dict : dict
|
|
1598
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...}
|
|
1599
|
+
containing estimated parents.
|
|
1600
|
+
"""
|
|
1601
|
+
|
|
1602
|
+
# Initialize the return value
|
|
1603
|
+
parents_dict = dict()
|
|
1604
|
+
for j in range(self.N):
|
|
1605
|
+
# Get the good links
|
|
1606
|
+
if include_lagzero_parents:
|
|
1607
|
+
good_links = np.argwhere(graph[:, j, :] == "-->")
|
|
1608
|
+
# Build a dictionary from these links to their values
|
|
1609
|
+
links = {(i, -tau): np.abs(val_matrix[i, j, abs(tau)])
|
|
1610
|
+
for i, tau in good_links}
|
|
1611
|
+
else:
|
|
1612
|
+
good_links = np.argwhere(graph[:, j, 1:] == "-->")
|
|
1613
|
+
# Build a dictionary from these links to their values
|
|
1614
|
+
links = {(i, -tau - 1): np.abs(val_matrix[i, j, abs(tau) + 1])
|
|
1615
|
+
for i, tau in good_links}
|
|
1616
|
+
# Sort by value
|
|
1617
|
+
parents_dict[j] = sorted(links, key=links.get, reverse=True)
|
|
1618
|
+
|
|
1619
|
+
return parents_dict
|
|
1620
|
+
|
|
1621
|
+
|
|
1622
|
+
def return_significant_links(self, pq_matrix,
|
|
1623
|
+
val_matrix,
|
|
1624
|
+
alpha_level=0.05,
|
|
1625
|
+
include_lagzero_links=False):
|
|
1626
|
+
"""Returns list of significant links as well as a boolean matrix.
|
|
1627
|
+
|
|
1628
|
+
DEPRECATED. Will be removed in future.
|
|
1629
|
+
"""
|
|
1630
|
+
print("return_significant_links() is DEPRECATED: now run_pcmci(), "
|
|
1631
|
+
" run_mci()"
|
|
1632
|
+
" and all variants directly return the graph based on thresholding "
|
|
1633
|
+
"the p_matrix at alpha_level. The graph can also be updated "
|
|
1634
|
+
"based on a (potentially further adjusted) p_matrix using "
|
|
1635
|
+
"get_graph_from_pmatrix(). "
|
|
1636
|
+
"A dictionary of parents can be obtained "
|
|
1637
|
+
"with return_parents_dict().")
|
|
1638
|
+
return None
|
|
1639
|
+
|
|
1640
|
+
def print_significant_links(self,
|
|
1641
|
+
p_matrix,
|
|
1642
|
+
val_matrix,
|
|
1643
|
+
conf_matrix=None,
|
|
1644
|
+
graph=None,
|
|
1645
|
+
ambiguous_triples=None,
|
|
1646
|
+
alpha_level=0.05):
|
|
1647
|
+
"""Prints significant links.
|
|
1648
|
+
|
|
1649
|
+
Used for output of PCMCI and PCMCIplus. For the latter also information
|
|
1650
|
+
on ambiguous links and conflicts is returned.
|
|
1651
|
+
|
|
1652
|
+
Parameters
|
|
1653
|
+
----------
|
|
1654
|
+
alpha_level : float, optional (default: 0.05)
|
|
1655
|
+
Significance level.
|
|
1656
|
+
p_matrix : array-like
|
|
1657
|
+
Must be of shape (N, N, tau_max + 1).
|
|
1658
|
+
val_matrix : array-like
|
|
1659
|
+
Must be of shape (N, N, tau_max + 1).
|
|
1660
|
+
conf_matrix : array-like, optional (default: None)
|
|
1661
|
+
Matrix of confidence intervals of shape (N, N, tau_max+1, 2).
|
|
1662
|
+
graph : array-like
|
|
1663
|
+
Must be of shape (N, N, tau_max + 1).
|
|
1664
|
+
ambiguous_triples : list
|
|
1665
|
+
List of ambiguous triples.
|
|
1666
|
+
"""
|
|
1667
|
+
if graph is not None:
|
|
1668
|
+
sig_links = (graph != "")*(graph != "<--")
|
|
1669
|
+
else:
|
|
1670
|
+
sig_links = (p_matrix <= alpha_level)
|
|
1671
|
+
|
|
1672
|
+
print("\n## Significant links at alpha = %s:" % alpha_level)
|
|
1673
|
+
for j in range(self.N):
|
|
1674
|
+
links = {(p[0], -p[1]): np.abs(val_matrix[p[0], j, abs(p[1])])
|
|
1675
|
+
for p in zip(*np.where(sig_links[:, j, :]))}
|
|
1676
|
+
# Sort by value
|
|
1677
|
+
sorted_links = sorted(links, key=links.get, reverse=True)
|
|
1678
|
+
n_links = len(links)
|
|
1679
|
+
string = ("\n Variable %s has %d "
|
|
1680
|
+
"link(s):" % (self.var_names[j], n_links))
|
|
1681
|
+
for p in sorted_links:
|
|
1682
|
+
string += ("\n (%s % d): pval = %.5f" %
|
|
1683
|
+
(self.var_names[p[0]], p[1],
|
|
1684
|
+
p_matrix[p[0], j, abs(p[1])]))
|
|
1685
|
+
string += " | val = % .3f" % (
|
|
1686
|
+
val_matrix[p[0], j, abs(p[1])])
|
|
1687
|
+
if conf_matrix is not None:
|
|
1688
|
+
string += " | conf = (%.3f, %.3f)" % (
|
|
1689
|
+
conf_matrix[p[0], j, abs(p[1])][0],
|
|
1690
|
+
conf_matrix[p[0], j, abs(p[1])][1])
|
|
1691
|
+
if graph is not None:
|
|
1692
|
+
if p[1] == 0 and graph[j, p[0], 0] == "o-o":
|
|
1693
|
+
string += " | unoriented link"
|
|
1694
|
+
if graph[p[0], j, abs(p[1])] == "x-x":
|
|
1695
|
+
string += " | unclear orientation due to conflict"
|
|
1696
|
+
print(string)
|
|
1697
|
+
|
|
1698
|
+
# link_marker = {True:"o-o", False:"-->"}
|
|
1699
|
+
|
|
1700
|
+
if ambiguous_triples is not None and len(ambiguous_triples) > 0:
|
|
1701
|
+
print("\n## Ambiguous triples (not used for orientation):\n")
|
|
1702
|
+
for triple in ambiguous_triples:
|
|
1703
|
+
(i, tau), k, j = triple
|
|
1704
|
+
print(" [(%s % d), %s, %s]" % (
|
|
1705
|
+
self.var_names[i], tau,
|
|
1706
|
+
self.var_names[k],
|
|
1707
|
+
self.var_names[j]))
|
|
1708
|
+
|
|
1709
|
+
def print_results(self,
|
|
1710
|
+
return_dict,
|
|
1711
|
+
alpha_level=0.05):
|
|
1712
|
+
"""Prints significant parents from output of MCI or PCMCI algorithms.
|
|
1713
|
+
|
|
1714
|
+
Parameters
|
|
1715
|
+
----------
|
|
1716
|
+
return_dict : dict
|
|
1717
|
+
Dictionary of return values, containing keys
|
|
1718
|
+
* 'p_matrix'
|
|
1719
|
+
* 'val_matrix'
|
|
1720
|
+
* 'conf_matrix'
|
|
1721
|
+
|
|
1722
|
+
alpha_level : float, optional (default: 0.05)
|
|
1723
|
+
Significance level.
|
|
1724
|
+
"""
|
|
1725
|
+
# Check if conf_matrix is defined
|
|
1726
|
+
conf_matrix = None
|
|
1727
|
+
conf_key = 'conf_matrix'
|
|
1728
|
+
if conf_key in return_dict:
|
|
1729
|
+
conf_matrix = return_dict[conf_key]
|
|
1730
|
+
# Wrap the already defined function
|
|
1731
|
+
if 'graph' in return_dict:
|
|
1732
|
+
graph = return_dict['graph']
|
|
1733
|
+
else:
|
|
1734
|
+
graph = None
|
|
1735
|
+
if 'ambiguous_triples' in return_dict:
|
|
1736
|
+
ambiguous_triples = return_dict['ambiguous_triples']
|
|
1737
|
+
else:
|
|
1738
|
+
ambiguous_triples = None
|
|
1739
|
+
self.print_significant_links(return_dict['p_matrix'],
|
|
1740
|
+
return_dict['val_matrix'],
|
|
1741
|
+
conf_matrix=conf_matrix,
|
|
1742
|
+
graph=graph,
|
|
1743
|
+
ambiguous_triples=ambiguous_triples,
|
|
1744
|
+
alpha_level=alpha_level)
|
|
1745
|
+
|
|
1746
|
+
def run_pcmci(self,
|
|
1747
|
+
selected_links=None,
|
|
1748
|
+
link_assumptions=None,
|
|
1749
|
+
tau_min=0,
|
|
1750
|
+
tau_max=1,
|
|
1751
|
+
save_iterations=False,
|
|
1752
|
+
pc_alpha=0.2,
|
|
1753
|
+
max_conds_dim=None,
|
|
1754
|
+
max_combinations=1,
|
|
1755
|
+
max_conds_py=None,
|
|
1756
|
+
max_conds_px=None,
|
|
1757
|
+
alpha_level=0.05,
|
|
1758
|
+
fdr_method='none'):
|
|
1759
|
+
r"""Runs PCMCI time-lagged causal discovery for time series.
|
|
1760
|
+
|
|
1761
|
+
Wrapper around PC-algorithm function and MCI function.
|
|
1762
|
+
|
|
1763
|
+
Notes
|
|
1764
|
+
-----
|
|
1765
|
+
|
|
1766
|
+
The PCMCI causal discovery method is comprehensively described in [
|
|
1767
|
+
1]_, where also analytical and numerical results are presented. Here
|
|
1768
|
+
we briefly summarize the method.
|
|
1769
|
+
|
|
1770
|
+
PCMCI estimates time-lagged causal links by a two-step procedure:
|
|
1771
|
+
|
|
1772
|
+
1. Condition-selection: For each variable :math:`j`, estimate a
|
|
1773
|
+
*superset* of parents :math:`\\tilde{\mathcal{P}}(X^j_t)` with the
|
|
1774
|
+
iterative PC1 algorithm, implemented as ``run_pc_stable``. The
|
|
1775
|
+
condition-selection step reduces the dimensionality and avoids
|
|
1776
|
+
conditioning on irrelevant variables.
|
|
1777
|
+
|
|
1778
|
+
2. *Momentary conditional independence* (MCI)
|
|
1779
|
+
|
|
1780
|
+
.. math:: X^i_{t-\\tau} \perp X^j_{t} | \\tilde{\\mathcal{P}}(
|
|
1781
|
+
X^j_t), \\tilde{\mathcal{P}}(X^i_{t-\\tau})
|
|
1782
|
+
|
|
1783
|
+
here implemented as ``run_mci``. This step estimates the p-values and
|
|
1784
|
+
test statistic values for all links accounting for common drivers,
|
|
1785
|
+
indirect links, and autocorrelation.
|
|
1786
|
+
|
|
1787
|
+
NOTE: MCI test statistic values define a particular measure of causal
|
|
1788
|
+
strength depending on the test statistic used. For example, ParCorr()
|
|
1789
|
+
results in normalized values between -1 and 1. However, if you are
|
|
1790
|
+
interested in quantifying causal effects, i.e., the effect of
|
|
1791
|
+
hypothetical interventions, you may better look at the causal effect
|
|
1792
|
+
estimation functionality of Tigramite.
|
|
1793
|
+
|
|
1794
|
+
PCMCI can be flexibly combined with any kind of conditional
|
|
1795
|
+
independence test statistic adapted to the kind of data (continuous
|
|
1796
|
+
or discrete) and its assumed dependency types. These are available in
|
|
1797
|
+
``tigramite.independence_tests``.
|
|
1798
|
+
|
|
1799
|
+
The main free parameters of PCMCI (in addition to free parameters of
|
|
1800
|
+
the conditional independence test statistic) are the maximum time
|
|
1801
|
+
delay :math:`\\tau_{\\max}` (``tau_max``) and the significance
|
|
1802
|
+
threshold in the condition-selection step :math:`\\alpha` (
|
|
1803
|
+
``pc_alpha``). The maximum time delay depends on the application and
|
|
1804
|
+
should be chosen according to the maximum causal time lag expected in
|
|
1805
|
+
the complex system. We recommend a rather large choice that includes
|
|
1806
|
+
peaks in the ``get_lagged_dependencies`` function. :math:`\\alpha`
|
|
1807
|
+
should not be seen as a significance test level in the
|
|
1808
|
+
condition-selection step since the iterative hypothesis tests do not
|
|
1809
|
+
allow for a precise assessment. :math:`\\alpha` rather takes the role
|
|
1810
|
+
of a regularization parameter in model-selection techniques. If a
|
|
1811
|
+
list of values is given or ``pc_alpha=None``, :math:`\\alpha` is
|
|
1812
|
+
optimized using model selection criteria implemented in the respective
|
|
1813
|
+
``tigramite.independence_tests``.
|
|
1814
|
+
|
|
1815
|
+
Further optional parameters are discussed in [1]_.
|
|
1816
|
+
|
|
1817
|
+
Examples
|
|
1818
|
+
--------
|
|
1819
|
+
>>> import numpy
|
|
1820
|
+
>>> from tigramite.pcmci import PCMCI
|
|
1821
|
+
>>> from tigramite.independence_tests import ParCorr
|
|
1822
|
+
>>> import tigramite.data_processing as pp
|
|
1823
|
+
>>> from tigramite.toymodels import structural_causal_processes as toys
|
|
1824
|
+
>>> numpy.random.seed(7)
|
|
1825
|
+
>>> # Example process to play around with
|
|
1826
|
+
>>> # Each key refers to a variable and the incoming links are supplied
|
|
1827
|
+
>>> # as a list of format [((driver, -lag), coeff), ...]
|
|
1828
|
+
>>> links_coeffs = {0: [((0, -1), 0.8)],
|
|
1829
|
+
1: [((1, -1), 0.8), ((0, -1), 0.5)],
|
|
1830
|
+
2: [((2, -1), 0.8), ((1, -2), -0.6)]}
|
|
1831
|
+
>>> data, _ = toys.var_process(links_coeffs, T=1000)
|
|
1832
|
+
>>> # Data must be array of shape (time, variables)
|
|
1833
|
+
>>> print (data.shape)
|
|
1834
|
+
(1000, 3)
|
|
1835
|
+
>>> dataframe = pp.DataFrame(data)
|
|
1836
|
+
>>> cond_ind_test = ParCorr()
|
|
1837
|
+
>>> pcmci = PCMCI(dataframe=dataframe, cond_ind_test=cond_ind_test)
|
|
1838
|
+
>>> results = pcmci.run_pcmci(tau_max=2, pc_alpha=None)
|
|
1839
|
+
>>> pcmci.print_significant_links(p_matrix=results['p_matrix'],
|
|
1840
|
+
val_matrix=results['val_matrix'],
|
|
1841
|
+
alpha_level=0.05)
|
|
1842
|
+
## Significant parents at alpha = 0.05:
|
|
1843
|
+
|
|
1844
|
+
Variable 0 has 1 link(s):
|
|
1845
|
+
(0 -1): pval = 0.00000 | val = 0.588
|
|
1846
|
+
|
|
1847
|
+
Variable 1 has 2 link(s):
|
|
1848
|
+
(1 -1): pval = 0.00000 | val = 0.606
|
|
1849
|
+
(0 -1): pval = 0.00000 | val = 0.447
|
|
1850
|
+
|
|
1851
|
+
Variable 2 has 2 link(s):
|
|
1852
|
+
(2 -1): pval = 0.00000 | val = 0.618
|
|
1853
|
+
(1 -2): pval = 0.00000 | val = -0.499
|
|
1854
|
+
|
|
1855
|
+
|
|
1856
|
+
Parameters
|
|
1857
|
+
----------
|
|
1858
|
+
selected_links : dict or None
|
|
1859
|
+
Deprecated, replaced by link_assumptions
|
|
1860
|
+
link_assumptions : dict
|
|
1861
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
1862
|
+
assumptions about links. This initializes the graph with entries
|
|
1863
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
1864
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
1865
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
1866
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
1867
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
1868
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
1869
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
1870
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
1871
|
+
if link_assumptions is not None, then all links have to be specified
|
|
1872
|
+
or the links are assumed absent.
|
|
1873
|
+
tau_min : int, optional (default: 0)
|
|
1874
|
+
Minimum time lag to test. Note that zero-lags are undirected.
|
|
1875
|
+
tau_max : int, optional (default: 1)
|
|
1876
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
1877
|
+
save_iterations : bool, optional (default: False)
|
|
1878
|
+
Whether to save iteration step results such as conditions used.
|
|
1879
|
+
pc_alpha : float, optional (default: 0.2)
|
|
1880
|
+
Significance level in PC1 algorithm.
|
|
1881
|
+
max_conds_dim : int, optional (default: None)
|
|
1882
|
+
Maximum number of conditions to test. If None is passed, this number
|
|
1883
|
+
is unrestricted.
|
|
1884
|
+
max_combinations : int, optional (default: 1)
|
|
1885
|
+
Maximum number of combinations of conditions of current cardinality
|
|
1886
|
+
to test in PC1 step.
|
|
1887
|
+
max_conds_py : int, optional (default: None)
|
|
1888
|
+
Maximum number of conditions of Y to use. If None is passed, this
|
|
1889
|
+
number is unrestricted.
|
|
1890
|
+
max_conds_px : int, optional (default: None)
|
|
1891
|
+
Maximum number of conditions of Z to use. If None is passed, this
|
|
1892
|
+
number is unrestricted.
|
|
1893
|
+
alpha_level : float, optional (default: 0.05)
|
|
1894
|
+
Significance level at which the p_matrix is thresholded to
|
|
1895
|
+
get graph.
|
|
1896
|
+
fdr_method : str, optional (default: 'fdr_bh')
|
|
1897
|
+
Correction method, currently implemented is Benjamini-Hochberg
|
|
1898
|
+
False Discovery Rate method.
|
|
1899
|
+
|
|
1900
|
+
Returns
|
|
1901
|
+
-------
|
|
1902
|
+
graph : array of shape [N, N, tau_max+1]
|
|
1903
|
+
Causal graph, see description above for interpretation.
|
|
1904
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
1905
|
+
Estimated matrix of test statistic values.
|
|
1906
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
1907
|
+
Estimated matrix of p-values, optionally adjusted if fdr_method is
|
|
1908
|
+
not 'none'.
|
|
1909
|
+
conf_matrix : array of shape [N, N, tau_max+1,2]
|
|
1910
|
+
Estimated matrix of confidence intervals of test statistic values.
|
|
1911
|
+
Only computed if set in cond_ind_test, where also the percentiles
|
|
1912
|
+
are set.
|
|
1913
|
+
|
|
1914
|
+
"""
|
|
1915
|
+
|
|
1916
|
+
if selected_links is not None:
|
|
1917
|
+
raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
|
|
1918
|
+
|
|
1919
|
+
|
|
1920
|
+
# Get the parents from run_pc_stable
|
|
1921
|
+
all_parents = self.run_pc_stable(link_assumptions=link_assumptions,
|
|
1922
|
+
tau_min=tau_min,
|
|
1923
|
+
tau_max=tau_max,
|
|
1924
|
+
save_iterations=save_iterations,
|
|
1925
|
+
pc_alpha=pc_alpha,
|
|
1926
|
+
max_conds_dim=max_conds_dim,
|
|
1927
|
+
max_combinations=max_combinations)
|
|
1928
|
+
|
|
1929
|
+
# Get the results from run_mci, using the parents as the input
|
|
1930
|
+
results = self.run_mci(link_assumptions=link_assumptions,
|
|
1931
|
+
tau_min=tau_min,
|
|
1932
|
+
tau_max=tau_max,
|
|
1933
|
+
parents=all_parents,
|
|
1934
|
+
max_conds_py=max_conds_py,
|
|
1935
|
+
max_conds_px=max_conds_px,
|
|
1936
|
+
alpha_level=alpha_level,
|
|
1937
|
+
fdr_method=fdr_method)
|
|
1938
|
+
|
|
1939
|
+
# Store the parents in the pcmci member
|
|
1940
|
+
self.all_parents = all_parents
|
|
1941
|
+
|
|
1942
|
+
# Print the information
|
|
1943
|
+
# if self.verbosity > 0:
|
|
1944
|
+
# self.print_results(results)
|
|
1945
|
+
# Return the dictionary
|
|
1946
|
+
self.results = results
|
|
1947
|
+
return results
|
|
1948
|
+
|
|
1949
|
+
def run_pcmciplus(self,
|
|
1950
|
+
selected_links=None,
|
|
1951
|
+
link_assumptions=None,
|
|
1952
|
+
tau_min=0,
|
|
1953
|
+
tau_max=1,
|
|
1954
|
+
pc_alpha=0.01,
|
|
1955
|
+
contemp_collider_rule='majority',
|
|
1956
|
+
conflict_resolution=True,
|
|
1957
|
+
reset_lagged_links=False,
|
|
1958
|
+
max_conds_dim=None,
|
|
1959
|
+
max_combinations=1,
|
|
1960
|
+
max_conds_py=None,
|
|
1961
|
+
max_conds_px=None,
|
|
1962
|
+
max_conds_px_lagged=None,
|
|
1963
|
+
fdr_method='none',
|
|
1964
|
+
):
|
|
1965
|
+
r"""Runs PCMCIplus time-lagged and contemporaneous causal discovery for
|
|
1966
|
+
time series.
|
|
1967
|
+
|
|
1968
|
+
Method described in [5]:
|
|
1969
|
+
http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf
|
|
1970
|
+
|
|
1971
|
+
[5] J. Runge, Discovering contemporaneous and lagged causal relations
|
|
1972
|
+
in autocorrelated nonlinear time series datasets
|
|
1973
|
+
http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf
|
|
1974
|
+
|
|
1975
|
+
Notes
|
|
1976
|
+
-----
|
|
1977
|
+
|
|
1978
|
+
The PCMCIplus causal discovery method is described in [5], where
|
|
1979
|
+
also analytical and numerical results are presented. In contrast to
|
|
1980
|
+
PCMCI, PCMCIplus can identify the full, lagged and contemporaneous,
|
|
1981
|
+
causal graph (up to the Markov equivalence class for contemporaneous
|
|
1982
|
+
links) under the standard assumptions of Causal Sufficiency,
|
|
1983
|
+
Faithfulness and the Markov condition.
|
|
1984
|
+
|
|
1985
|
+
PCMCIplus estimates time-lagged and contemporaneous causal links by a
|
|
1986
|
+
four-step procedure:
|
|
1987
|
+
|
|
1988
|
+
1. Condition-selection (same as for PCMCI): For each variable
|
|
1989
|
+
:math:`j`, estimate a *superset* of lagged parents :math:`\widehat{
|
|
1990
|
+
\mathcal{B}}_t^-( X^j_t)` with the iterative PC1 algorithm,
|
|
1991
|
+
implemented as ``run_pc_stable``. The condition-selection step
|
|
1992
|
+
reduces the dimensionality and avoids conditioning on irrelevant
|
|
1993
|
+
variables.
|
|
1994
|
+
|
|
1995
|
+
2. PC skeleton phase with contemporaneous conditions and *Momentary
|
|
1996
|
+
conditional independence* (MCI) tests: Iterate through subsets
|
|
1997
|
+
:math:`\\mathcal{S}` of contemporaneous adjacencies and conduct MCI
|
|
1998
|
+
conditional independence tests:
|
|
1999
|
+
|
|
2000
|
+
.. math:: X^i_{t-\\tau} ~\\perp~ X^j_{t} ~|~ \\mathcal{S},
|
|
2001
|
+
\\widehat{\\mathcal{B}}_t^-(X^j_t),
|
|
2002
|
+
\\widehat{\\mathcal{B}}_{t-\\tau}^-(X^i_{t-{\\tau}})
|
|
2003
|
+
|
|
2004
|
+
here implemented as ``run_pcalg``. This step estimates the p-values and
|
|
2005
|
+
test statistic values for all lagged and contemporaneous adjacencies
|
|
2006
|
+
accounting for common drivers, indirect links, and autocorrelation.
|
|
2007
|
+
|
|
2008
|
+
3. PC collider orientation phase: Orient contemporaneous collider
|
|
2009
|
+
motifs based on unshielded triples. Optionally apply conservative or
|
|
2010
|
+
majority rule (also based on MCI tests).
|
|
2011
|
+
|
|
2012
|
+
4. PC rule orientation phase: Orient remaining contemporaneous
|
|
2013
|
+
links based on PC rules.
|
|
2014
|
+
|
|
2015
|
+
In contrast to PCMCI, the relevant output of PCMCIplus is the
|
|
2016
|
+
array ``graph``. Its string entries are interpreted as follows:
|
|
2017
|
+
|
|
2018
|
+
* ``graph[i,j,tau]=-->`` for :math:`\\tau>0` denotes a directed, lagged
|
|
2019
|
+
causal link from :math:`i` to :math:`j` at lag :math:`\\tau`
|
|
2020
|
+
|
|
2021
|
+
* ``graph[i,j,0]=-->`` (and ``graph[j,i,0]=<--``) denotes a directed,
|
|
2022
|
+
contemporaneous causal link from :math:`i` to :math:`j`
|
|
2023
|
+
|
|
2024
|
+
* ``graph[i,j,0]=o-o`` (and ``graph[j,i,0]=o-o``) denotes an unoriented,
|
|
2025
|
+
contemporaneous adjacency between :math:`i` and :math:`j` indicating
|
|
2026
|
+
that the collider and orientation rules could not be applied (Markov
|
|
2027
|
+
equivalence)
|
|
2028
|
+
|
|
2029
|
+
* ``graph[i,j,0]=x-x`` and (``graph[j,i,0]=x-x``) denotes a conflicting,
|
|
2030
|
+
contemporaneous adjacency between :math:`i` and :math:`j` indicating
|
|
2031
|
+
that the directionality is undecided due to conflicting orientation
|
|
2032
|
+
rules
|
|
2033
|
+
|
|
2034
|
+
Importantly, ``p_matrix`` and ``val_matrix`` for PCMCIplus quantify
|
|
2035
|
+
the uncertainty and strength, respectively, only for the
|
|
2036
|
+
adjacencies, but not for the directionality of contemporaneous links.
|
|
2037
|
+
Note that lagged links are always oriented due to time order.
|
|
2038
|
+
|
|
2039
|
+
PCMCIplus can be flexibly combined with any kind of conditional
|
|
2040
|
+
independence test statistic adapted to the kind of data (continuous
|
|
2041
|
+
or discrete) and its assumed dependency types. These are available in
|
|
2042
|
+
``tigramite.independence_tests``.
|
|
2043
|
+
|
|
2044
|
+
The main free parameters of PCMCIplus (in addition to free parameters of
|
|
2045
|
+
the conditional independence tests) are the maximum time delay
|
|
2046
|
+
:math:`\\tau_{\\max}` (``tau_max``) and the significance threshold
|
|
2047
|
+
:math:`\\alpha` ( ``pc_alpha``).
|
|
2048
|
+
|
|
2049
|
+
If a list or None is passed for ``pc_alpha``, the significance level is
|
|
2050
|
+
optimized for every graph across the given ``pc_alpha`` values using the
|
|
2051
|
+
score computed in ``cond_ind_test.get_model_selection_criterion()``.
|
|
2052
|
+
Since PCMCIplus outputs not a DAG, but an equivalence class of DAGs,
|
|
2053
|
+
first one member of this class is computed and then the score is
|
|
2054
|
+
computed as the average over all models fits for each variable in ``[0,
|
|
2055
|
+
..., N]`` for that member. The score is the same for all members of the
|
|
2056
|
+
class.
|
|
2057
|
+
|
|
2058
|
+
The maximum time delay depends on the application and should be chosen
|
|
2059
|
+
according to the maximum causal time lag expected in the complex system.
|
|
2060
|
+
We recommend a rather large choice that includes peaks in the
|
|
2061
|
+
``get_lagged_dependencies`` function. Another important parameter is
|
|
2062
|
+
``contemp_collider_rule``. Only if set to ``majority`` or
|
|
2063
|
+
``conservative'' and together with ``conflict_resolution=True``,
|
|
2064
|
+
PCMCIplus is fully *order independent* meaning that the order of the N
|
|
2065
|
+
variables in the dataframe does not matter. Last, the default option
|
|
2066
|
+
``reset_lagged_links=False`` restricts the detection of lagged causal
|
|
2067
|
+
links in Step 2 to the significant adjacencies found in Step 1, given by
|
|
2068
|
+
:math:`\\widehat{ \\mathcal{B}}_t^-( X^j_t)`. For
|
|
2069
|
+
``reset_lagged_links=True``, *all* lagged links are considered again,
|
|
2070
|
+
which improves detection power for lagged links, but also leads to
|
|
2071
|
+
larger runtimes.
|
|
2072
|
+
|
|
2073
|
+
Further optional parameters are discussed in [5].
|
|
2074
|
+
|
|
2075
|
+
Parameters
|
|
2076
|
+
----------
|
|
2077
|
+
selected_links : dict or None
|
|
2078
|
+
Deprecated, replaced by link_assumptions
|
|
2079
|
+
link_assumptions : dict
|
|
2080
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
2081
|
+
assumptions about links. This initializes the graph with entries
|
|
2082
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
2083
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
2084
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
2085
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
2086
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
2087
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
2088
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
2089
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
2090
|
+
if link_assumptions is not None, then all links have to be specified
|
|
2091
|
+
or the links are assumed absent.
|
|
2092
|
+
tau_min : int, optional (default: 0)
|
|
2093
|
+
Minimum time lag to test.
|
|
2094
|
+
tau_max : int, optional (default: 1)
|
|
2095
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
2096
|
+
pc_alpha : float or list of floats, default: 0.01
|
|
2097
|
+
Significance level in algorithm. If a list or None is passed, the
|
|
2098
|
+
pc_alpha level is optimized for every graph across the given
|
|
2099
|
+
pc_alpha values ([0.001, 0.005, 0.01, 0.025, 0.05] for None) using
|
|
2100
|
+
the score computed in cond_ind_test.get_model_selection_criterion().
|
|
2101
|
+
contemp_collider_rule : {'majority', 'conservative', 'none'}
|
|
2102
|
+
Rule for collider phase to use. See the paper for details. Only
|
|
2103
|
+
'majority' and 'conservative' lead to an order-independent
|
|
2104
|
+
algorithm.
|
|
2105
|
+
conflict_resolution : bool, optional (default: True)
|
|
2106
|
+
Whether to mark conflicts in orientation rules. Only for True
|
|
2107
|
+
this leads to an order-independent algorithm.
|
|
2108
|
+
reset_lagged_links : bool, optional (default: False)
|
|
2109
|
+
Restricts the detection of lagged causal links in Step 2 to the
|
|
2110
|
+
significant adjacencies found in the PC1 algorithm in Step 1. For
|
|
2111
|
+
True, *all* lagged links are considered again, which improves
|
|
2112
|
+
detection power for lagged links, but also leads to larger
|
|
2113
|
+
runtimes.
|
|
2114
|
+
max_conds_dim : int, optional (default: None)
|
|
2115
|
+
Maximum number of conditions to test. If None is passed, this number
|
|
2116
|
+
is unrestricted.
|
|
2117
|
+
max_combinations : int, optional (default: 1)
|
|
2118
|
+
Maximum number of combinations of conditions of current cardinality
|
|
2119
|
+
to test in PC1 step.
|
|
2120
|
+
max_conds_py : int, optional (default: None)
|
|
2121
|
+
Maximum number of lagged conditions of Y to use in MCI tests. If
|
|
2122
|
+
None is passed, this number is unrestricted.
|
|
2123
|
+
max_conds_px : int, optional (default: None)
|
|
2124
|
+
Maximum number of lagged conditions of X to use in MCI tests. If
|
|
2125
|
+
None is passed, this number is unrestricted.
|
|
2126
|
+
max_conds_px_lagged : int, optional (default: None)
|
|
2127
|
+
Maximum number of lagged conditions of X when X is lagged in MCI
|
|
2128
|
+
tests. If None is passed, this number is equal to max_conds_px.
|
|
2129
|
+
fdr_method : str, optional (default: 'none')
|
|
2130
|
+
Correction method, default is Benjamini-Hochberg False Discovery
|
|
2131
|
+
Rate method.
|
|
2132
|
+
|
|
2133
|
+
Returns
|
|
2134
|
+
-------
|
|
2135
|
+
graph : array of shape [N, N, tau_max+1]
|
|
2136
|
+
Resulting causal graph, see description above for interpretation.
|
|
2137
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
2138
|
+
Estimated matrix of test statistic values regarding adjacencies.
|
|
2139
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
2140
|
+
Estimated matrix of p-values regarding adjacencies.
|
|
2141
|
+
sepsets : dictionary
|
|
2142
|
+
Separating sets. See paper for details.
|
|
2143
|
+
ambiguous_triples : list
|
|
2144
|
+
List of ambiguous triples, only relevant for 'majority' and
|
|
2145
|
+
'conservative' rules, see paper for details.
|
|
2146
|
+
"""
|
|
2147
|
+
|
|
2148
|
+
if selected_links is not None:
|
|
2149
|
+
raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
|
|
2150
|
+
|
|
2151
|
+
# Check if pc_alpha is chosen to optimze over a list
|
|
2152
|
+
if pc_alpha is None or isinstance(pc_alpha, (list, tuple, np.ndarray)):
|
|
2153
|
+
# Call optimizer wrapper around run_pcmciplus()
|
|
2154
|
+
return self._optimize_pcmciplus_alpha(
|
|
2155
|
+
link_assumptions=link_assumptions,
|
|
2156
|
+
tau_min=tau_min,
|
|
2157
|
+
tau_max=tau_max,
|
|
2158
|
+
pc_alpha=pc_alpha,
|
|
2159
|
+
contemp_collider_rule=contemp_collider_rule,
|
|
2160
|
+
conflict_resolution=conflict_resolution,
|
|
2161
|
+
reset_lagged_links=reset_lagged_links,
|
|
2162
|
+
max_conds_dim=max_conds_dim,
|
|
2163
|
+
max_combinations=max_combinations,
|
|
2164
|
+
max_conds_py=max_conds_py,
|
|
2165
|
+
max_conds_px=max_conds_px,
|
|
2166
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
2167
|
+
fdr_method=fdr_method)
|
|
2168
|
+
|
|
2169
|
+
elif pc_alpha < 0. or pc_alpha > 1:
|
|
2170
|
+
raise ValueError("Choose 0 <= pc_alpha <= 1")
|
|
2171
|
+
|
|
2172
|
+
# Check the limits on tau
|
|
2173
|
+
self._check_tau_limits(tau_min, tau_max)
|
|
2174
|
+
# Set the link assumption
|
|
2175
|
+
_int_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max)
|
|
2176
|
+
|
|
2177
|
+
|
|
2178
|
+
#
|
|
2179
|
+
# Phase 1: Get a superset of lagged parents from run_pc_stable
|
|
2180
|
+
#
|
|
2181
|
+
lagged_parents = self.run_pc_stable(link_assumptions=link_assumptions,
|
|
2182
|
+
tau_min=tau_min,
|
|
2183
|
+
tau_max=tau_max,
|
|
2184
|
+
pc_alpha=pc_alpha,
|
|
2185
|
+
max_conds_dim=max_conds_dim,
|
|
2186
|
+
max_combinations=max_combinations)
|
|
2187
|
+
# Extract p- and val-matrix
|
|
2188
|
+
p_matrix = self.p_matrix
|
|
2189
|
+
val_matrix = self.val_matrix
|
|
2190
|
+
|
|
2191
|
+
#
|
|
2192
|
+
# Phase 2: PC algorithm with contemp. conditions and MCI tests
|
|
2193
|
+
#
|
|
2194
|
+
if self.verbosity > 0:
|
|
2195
|
+
print("\n##\n## Step 2: PC algorithm with contemp. conditions "
|
|
2196
|
+
"and MCI tests\n##"
|
|
2197
|
+
"\n\nParameters:")
|
|
2198
|
+
if link_assumptions is not None:
|
|
2199
|
+
print("\nlink_assumptions = %s" % str(_int_link_assumptions))
|
|
2200
|
+
print("\nindependence test = %s" % self.cond_ind_test.measure
|
|
2201
|
+
+ "\ntau_min = %d" % tau_min
|
|
2202
|
+
+ "\ntau_max = %d" % tau_max
|
|
2203
|
+
+ "\npc_alpha = %s" % pc_alpha
|
|
2204
|
+
+ "\ncontemp_collider_rule = %s" % contemp_collider_rule
|
|
2205
|
+
+ "\nconflict_resolution = %s" % conflict_resolution
|
|
2206
|
+
+ "\nreset_lagged_links = %s" % reset_lagged_links
|
|
2207
|
+
+ "\nmax_conds_dim = %s" % max_conds_dim
|
|
2208
|
+
+ "\nmax_conds_py = %s" % max_conds_py
|
|
2209
|
+
+ "\nmax_conds_px = %s" % max_conds_px
|
|
2210
|
+
+ "\nmax_conds_px_lagged = %s" % max_conds_px_lagged
|
|
2211
|
+
+ "\nfdr_method = %s" % fdr_method
|
|
2212
|
+
)
|
|
2213
|
+
|
|
2214
|
+
skeleton_results = self._pcmciplus_mci_skeleton_phase(
|
|
2215
|
+
lagged_parents=lagged_parents,
|
|
2216
|
+
link_assumptions=_int_link_assumptions,
|
|
2217
|
+
pc_alpha=pc_alpha,
|
|
2218
|
+
tau_min=tau_min,
|
|
2219
|
+
tau_max=tau_max,
|
|
2220
|
+
max_conds_dim=max_conds_dim,
|
|
2221
|
+
max_combinations=None, # Otherwise MCI step is not consistent
|
|
2222
|
+
max_conds_py=max_conds_py,
|
|
2223
|
+
max_conds_px=max_conds_px,
|
|
2224
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
2225
|
+
reset_lagged_links=reset_lagged_links,
|
|
2226
|
+
fdr_method=fdr_method,
|
|
2227
|
+
p_matrix=p_matrix,
|
|
2228
|
+
val_matrix=val_matrix,
|
|
2229
|
+
)
|
|
2230
|
+
|
|
2231
|
+
#
|
|
2232
|
+
# Phase 3: Collider orientations (with MCI tests for default majority collider rule)
|
|
2233
|
+
#
|
|
2234
|
+
colliders_step_results = self._pcmciplus_collider_phase(
|
|
2235
|
+
skeleton_graph=skeleton_results['graph'],
|
|
2236
|
+
sepsets=skeleton_results['sepsets'],
|
|
2237
|
+
lagged_parents=lagged_parents,
|
|
2238
|
+
pc_alpha=pc_alpha,
|
|
2239
|
+
tau_min=tau_min,
|
|
2240
|
+
tau_max=tau_max,
|
|
2241
|
+
max_conds_py=max_conds_py,
|
|
2242
|
+
max_conds_px=max_conds_px,
|
|
2243
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
2244
|
+
conflict_resolution=conflict_resolution,
|
|
2245
|
+
contemp_collider_rule=contemp_collider_rule)
|
|
2246
|
+
|
|
2247
|
+
#
|
|
2248
|
+
# Phase 4: Meek rule orientations
|
|
2249
|
+
#
|
|
2250
|
+
final_graph = self._pcmciplus_rule_orientation_phase(
|
|
2251
|
+
collider_graph=colliders_step_results['graph'],
|
|
2252
|
+
ambiguous_triples=colliders_step_results['ambiguous_triples'],
|
|
2253
|
+
conflict_resolution=conflict_resolution)
|
|
2254
|
+
|
|
2255
|
+
# Store the parents in the pcmci member
|
|
2256
|
+
self.all_lagged_parents = lagged_parents
|
|
2257
|
+
|
|
2258
|
+
return_dict = {
|
|
2259
|
+
'graph': final_graph,
|
|
2260
|
+
'p_matrix': skeleton_results['p_matrix'],
|
|
2261
|
+
'val_matrix': skeleton_results['val_matrix'],
|
|
2262
|
+
'sepsets': colliders_step_results['sepsets'],
|
|
2263
|
+
'ambiguous_triples': colliders_step_results['ambiguous_triples'],
|
|
2264
|
+
}
|
|
2265
|
+
|
|
2266
|
+
# No confidence interval estimation here
|
|
2267
|
+
return_dict['conf_matrix'] = None
|
|
2268
|
+
|
|
2269
|
+
# Print the results
|
|
2270
|
+
if self.verbosity > 0:
|
|
2271
|
+
self.print_results(return_dict, alpha_level=pc_alpha)
|
|
2272
|
+
|
|
2273
|
+
# Return the dictionary
|
|
2274
|
+
self.results = return_dict
|
|
2275
|
+
|
|
2276
|
+
return return_dict
|
|
2277
|
+
|
|
2278
|
+
def _pcmciplus_mci_skeleton_phase(self,
|
|
2279
|
+
lagged_parents,
|
|
2280
|
+
link_assumptions,
|
|
2281
|
+
pc_alpha,
|
|
2282
|
+
tau_min,
|
|
2283
|
+
tau_max,
|
|
2284
|
+
max_conds_dim,
|
|
2285
|
+
max_combinations,
|
|
2286
|
+
max_conds_py,
|
|
2287
|
+
max_conds_px,
|
|
2288
|
+
max_conds_px_lagged,
|
|
2289
|
+
reset_lagged_links,
|
|
2290
|
+
fdr_method,
|
|
2291
|
+
p_matrix,
|
|
2292
|
+
val_matrix,
|
|
2293
|
+
):
|
|
2294
|
+
"""MCI Skeleton phase."""
|
|
2295
|
+
|
|
2296
|
+
# Set the maximum condition dimension for Y and X
|
|
2297
|
+
max_conds_py = self._set_max_condition_dim(max_conds_py,
|
|
2298
|
+
tau_min, tau_max)
|
|
2299
|
+
max_conds_px = self._set_max_condition_dim(max_conds_px,
|
|
2300
|
+
tau_min, tau_max)
|
|
2301
|
+
|
|
2302
|
+
if reset_lagged_links:
|
|
2303
|
+
# Run PCalg on full graph, ignoring that some lagged links
|
|
2304
|
+
# were determined as non-significant in PC1 step
|
|
2305
|
+
links_for_pc = deepcopy(link_assumptions)
|
|
2306
|
+
else:
|
|
2307
|
+
# Run PCalg only on lagged parents found with PC1
|
|
2308
|
+
# plus all contemporaneous links
|
|
2309
|
+
links_for_pc = {} #deepcopy(lagged_parents)
|
|
2310
|
+
for j in range(self.N):
|
|
2311
|
+
links_for_pc[j] = {}
|
|
2312
|
+
for parent in lagged_parents[j]:
|
|
2313
|
+
if link_assumptions[j][parent] in ['-?>', '-->']:
|
|
2314
|
+
links_for_pc[j][parent] = link_assumptions[j][parent]
|
|
2315
|
+
|
|
2316
|
+
# Add contemporaneous links
|
|
2317
|
+
for link in link_assumptions[j]:
|
|
2318
|
+
i, tau = link
|
|
2319
|
+
link_type = link_assumptions[j][link]
|
|
2320
|
+
if abs(tau) == 0:
|
|
2321
|
+
links_for_pc[j][(i, 0)] = link_type
|
|
2322
|
+
|
|
2323
|
+
|
|
2324
|
+
if max_conds_dim is None:
|
|
2325
|
+
max_conds_dim = self.N
|
|
2326
|
+
|
|
2327
|
+
if max_combinations is None:
|
|
2328
|
+
max_combinations = np.inf
|
|
2329
|
+
|
|
2330
|
+
initial_graph = self._dict_to_graph(links_for_pc, tau_max=tau_max)
|
|
2331
|
+
|
|
2332
|
+
skeleton_results = self._pcalg_skeleton(
|
|
2333
|
+
initial_graph=initial_graph,
|
|
2334
|
+
lagged_parents=lagged_parents,
|
|
2335
|
+
mode='contemp_conds',
|
|
2336
|
+
pc_alpha=pc_alpha,
|
|
2337
|
+
tau_min=tau_min,
|
|
2338
|
+
tau_max=tau_max,
|
|
2339
|
+
max_conds_dim=max_conds_dim,
|
|
2340
|
+
max_combinations=max_combinations,
|
|
2341
|
+
max_conds_py=max_conds_py,
|
|
2342
|
+
max_conds_px=max_conds_px,
|
|
2343
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
2344
|
+
)
|
|
2345
|
+
|
|
2346
|
+
# Symmetrize p_matrix and val_matrix coming from skeleton
|
|
2347
|
+
symmetrized_results = self.symmetrize_p_and_val_matrix(
|
|
2348
|
+
p_matrix=skeleton_results['p_matrix'],
|
|
2349
|
+
val_matrix=skeleton_results['val_matrix'],
|
|
2350
|
+
link_assumptions=links_for_pc,
|
|
2351
|
+
conf_matrix=None)
|
|
2352
|
+
|
|
2353
|
+
# Update p_matrix and val_matrix with values from skeleton phase
|
|
2354
|
+
# Contemporaneous entries (not filled in run_pc_stable lagged phase)
|
|
2355
|
+
p_matrix[:, :, 0] = symmetrized_results['p_matrix'][:, :, 0]
|
|
2356
|
+
val_matrix[:, :, 0] = symmetrized_results['val_matrix'][:, :, 0]
|
|
2357
|
+
|
|
2358
|
+
# Update all entries computed in the MCI step
|
|
2359
|
+
# (these are in links_for_pc); values for entries
|
|
2360
|
+
# that were removed in the lagged-condition phase are kept from before
|
|
2361
|
+
for j in range(self.N):
|
|
2362
|
+
for link in links_for_pc[j]:
|
|
2363
|
+
i, tau = link
|
|
2364
|
+
if links_for_pc[j][link] not in ['<--', '<?-']:
|
|
2365
|
+
p_matrix[i, j, abs(tau)] = symmetrized_results['p_matrix'][i, j, abs(tau)]
|
|
2366
|
+
val_matrix[i, j, abs(tau)] = symmetrized_results['val_matrix'][i, j,
|
|
2367
|
+
abs(tau)]
|
|
2368
|
+
|
|
2369
|
+
# Optionally correct the p_matrix
|
|
2370
|
+
if fdr_method != 'none':
|
|
2371
|
+
p_matrix = self.get_corrected_pvalues(p_matrix=p_matrix, tau_min=tau_min,
|
|
2372
|
+
tau_max=tau_max,
|
|
2373
|
+
link_assumptions=link_assumptions,
|
|
2374
|
+
fdr_method=fdr_method)
|
|
2375
|
+
|
|
2376
|
+
# Update matrices
|
|
2377
|
+
skeleton_results['p_matrix'] = p_matrix
|
|
2378
|
+
skeleton_results['val_matrix'] = val_matrix
|
|
2379
|
+
|
|
2380
|
+
return skeleton_results
|
|
2381
|
+
|
|
2382
|
+
|
|
2383
|
+
def _pcmciplus_collider_phase(self, skeleton_graph, sepsets, lagged_parents,
|
|
2384
|
+
pc_alpha, tau_min, tau_max, max_conds_py, max_conds_px, max_conds_px_lagged,
|
|
2385
|
+
conflict_resolution, contemp_collider_rule):
|
|
2386
|
+
"""MCI collider phase."""
|
|
2387
|
+
|
|
2388
|
+
# Set the maximum condition dimension for Y and X
|
|
2389
|
+
max_conds_py = self._set_max_condition_dim(max_conds_py,
|
|
2390
|
+
tau_min, tau_max)
|
|
2391
|
+
max_conds_px = self._set_max_condition_dim(max_conds_px,
|
|
2392
|
+
tau_min, tau_max)
|
|
2393
|
+
|
|
2394
|
+
# Now change assumed links marks
|
|
2395
|
+
skeleton_graph[skeleton_graph=='o?o'] = 'o-o'
|
|
2396
|
+
skeleton_graph[skeleton_graph=='-?>'] = '-->'
|
|
2397
|
+
skeleton_graph[skeleton_graph=='<?-'] = '<--'
|
|
2398
|
+
|
|
2399
|
+
colliders_step_results = self._pcalg_colliders(
|
|
2400
|
+
graph=skeleton_graph,
|
|
2401
|
+
sepsets=sepsets,
|
|
2402
|
+
lagged_parents=lagged_parents,
|
|
2403
|
+
mode='contemp_conds',
|
|
2404
|
+
pc_alpha=pc_alpha,
|
|
2405
|
+
tau_max=tau_max,
|
|
2406
|
+
max_conds_py=max_conds_py,
|
|
2407
|
+
max_conds_px=max_conds_px,
|
|
2408
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
2409
|
+
conflict_resolution=conflict_resolution,
|
|
2410
|
+
contemp_collider_rule=contemp_collider_rule,
|
|
2411
|
+
)
|
|
2412
|
+
|
|
2413
|
+
return colliders_step_results
|
|
2414
|
+
|
|
2415
|
+
def _pcmciplus_rule_orientation_phase(self, collider_graph,
|
|
2416
|
+
ambiguous_triples, conflict_resolution):
|
|
2417
|
+
"""MCI rule orientation phase."""
|
|
2418
|
+
|
|
2419
|
+
final_graph = self._pcalg_rules_timeseries(
|
|
2420
|
+
graph=collider_graph,
|
|
2421
|
+
ambiguous_triples=ambiguous_triples,
|
|
2422
|
+
conflict_resolution=conflict_resolution,
|
|
2423
|
+
)
|
|
2424
|
+
|
|
2425
|
+
return final_graph
|
|
2426
|
+
|
|
2427
|
+
|
|
2428
|
+
def run_pcalg(self,
|
|
2429
|
+
selected_links=None,
|
|
2430
|
+
link_assumptions=None,
|
|
2431
|
+
pc_alpha=0.01,
|
|
2432
|
+
tau_min=0,
|
|
2433
|
+
tau_max=1,
|
|
2434
|
+
max_conds_dim=None,
|
|
2435
|
+
max_combinations=None,
|
|
2436
|
+
lagged_parents=None,
|
|
2437
|
+
max_conds_py=None,
|
|
2438
|
+
max_conds_px=None,
|
|
2439
|
+
max_conds_px_lagged=None,
|
|
2440
|
+
mode='standard',
|
|
2441
|
+
contemp_collider_rule='majority',
|
|
2442
|
+
conflict_resolution=True):
|
|
2443
|
+
|
|
2444
|
+
"""Runs PC algorithm for time-lagged and contemporaneous causal
|
|
2445
|
+
discovery for time series.
|
|
2446
|
+
|
|
2447
|
+
For ``mode='contemp_conds'`` this implements Steps 2-4 of the
|
|
2448
|
+
PCMCIplus method described in [5]. For ``mode='standard'`` this
|
|
2449
|
+
implements the standard PC algorithm adapted to time series.
|
|
2450
|
+
|
|
2451
|
+
[5] J. Runge, Discovering contemporaneous and lagged causal relations
|
|
2452
|
+
in autocorrelated nonlinear time series datasets
|
|
2453
|
+
http://www.auai.org/~w-auai/uai2020/proceedings/579_main_paper.pdf
|
|
2454
|
+
|
|
2455
|
+
Parameters
|
|
2456
|
+
----------
|
|
2457
|
+
selected_links : dict or None
|
|
2458
|
+
Deprecated, replaced by link_assumptions
|
|
2459
|
+
link_assumptions : dict
|
|
2460
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
2461
|
+
assumptions about links. This initializes the graph with entries
|
|
2462
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
2463
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
2464
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
2465
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
2466
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
2467
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
2468
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
2469
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
2470
|
+
if link_assumptions is not None, then all links have to be specified
|
|
2471
|
+
or the links are assumed absent.
|
|
2472
|
+
lagged_parents : dictionary
|
|
2473
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
2474
|
+
additional conditions for each CI test. As part of PCMCIplus
|
|
2475
|
+
these are the superset of lagged parents estimated with the PC1
|
|
2476
|
+
algorithm.
|
|
2477
|
+
mode : {'standard', 'contemp_conds'}
|
|
2478
|
+
For ``mode='contemp_conds'`` this implements Steps 2-4 of the
|
|
2479
|
+
PCMCIplus method. For ``mode='standard'`` this implements the
|
|
2480
|
+
standard PC algorithm adapted to time series.
|
|
2481
|
+
tau_min : int, optional (default: 0)
|
|
2482
|
+
Minimum time lag to test.
|
|
2483
|
+
tau_max : int, optional (default: 1)
|
|
2484
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
2485
|
+
pc_alpha : float, optional (default: 0.01)
|
|
2486
|
+
Significance level.
|
|
2487
|
+
contemp_collider_rule : {'majority', 'conservative', 'none'}
|
|
2488
|
+
Rule for collider phase to use. See the paper for details. Only
|
|
2489
|
+
'majority' and 'conservative' lead to an order-independent
|
|
2490
|
+
algorithm.
|
|
2491
|
+
conflict_resolution : bool, optional (default: True)
|
|
2492
|
+
Whether to mark conflicts in orientation rules. Only for True
|
|
2493
|
+
this leads to an order-independent algorithm.
|
|
2494
|
+
max_conds_dim : int, optional (default: None)
|
|
2495
|
+
Maximum number of conditions to test. If None is passed, this number
|
|
2496
|
+
is unrestricted.
|
|
2497
|
+
max_combinations : int
|
|
2498
|
+
Maximum number of combinations of conditions of current cardinality
|
|
2499
|
+
to test. Must be infinite (default for max_combinations=1) for consistency.
|
|
2500
|
+
max_conds_py : int, optional (default: None)
|
|
2501
|
+
Maximum number of lagged conditions of Y to use in MCI tests. If
|
|
2502
|
+
None is passed, this number is unrestricted.
|
|
2503
|
+
max_conds_px : int, optional (default: None)
|
|
2504
|
+
Maximum number of lagged conditions of X to use in MCI tests. If
|
|
2505
|
+
None is passed, this number is unrestricted.
|
|
2506
|
+
max_conds_px_lagged : int, optional (default: None)
|
|
2507
|
+
Maximum number of lagged conditions of X when X is lagged in MCI
|
|
2508
|
+
tests. If None is passed, this number is equal to max_conds_px.
|
|
2509
|
+
|
|
2510
|
+
Returns
|
|
2511
|
+
-------
|
|
2512
|
+
graph : array of shape [N, N, tau_max+1]
|
|
2513
|
+
Resulting causal graph, see description above for interpretation.
|
|
2514
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
2515
|
+
Estimated matrix of test statistic values regarding adjacencies.
|
|
2516
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
2517
|
+
Estimated matrix of p-values regarding adjacencies.
|
|
2518
|
+
sepsets : dictionary
|
|
2519
|
+
Separating sets. See paper for details.
|
|
2520
|
+
ambiguous_triples : list
|
|
2521
|
+
List of ambiguous triples, only relevant for 'majority' and
|
|
2522
|
+
'conservative' rules, see paper for details.
|
|
2523
|
+
"""
|
|
2524
|
+
# TODO: save_iterations
|
|
2525
|
+
|
|
2526
|
+
if selected_links is not None:
|
|
2527
|
+
raise ValueError("selected_links is DEPRECATED, use link_assumptions instead.")
|
|
2528
|
+
|
|
2529
|
+
# Sanity checks
|
|
2530
|
+
if pc_alpha is None:
|
|
2531
|
+
raise ValueError("pc_alpha=None not supported in PC algorithm, "
|
|
2532
|
+
"choose 0 < pc_alpha < 1 (e.g., 0.01)")
|
|
2533
|
+
|
|
2534
|
+
if mode not in ['contemp_conds', 'standard']:
|
|
2535
|
+
raise ValueError("mode must be either 'contemp_conds' or "
|
|
2536
|
+
"'standard'")
|
|
2537
|
+
|
|
2538
|
+
# Check the limits on tau
|
|
2539
|
+
self._check_tau_limits(tau_min, tau_max)
|
|
2540
|
+
# Set the selected links
|
|
2541
|
+
# _int_sel_links = self._set_sel_links(selected_links, tau_min, tau_max)
|
|
2542
|
+
_int_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max)
|
|
2543
|
+
|
|
2544
|
+
if max_conds_dim is None:
|
|
2545
|
+
if mode == 'standard':
|
|
2546
|
+
max_conds_dim = self._set_max_condition_dim(max_conds_dim,
|
|
2547
|
+
tau_min, tau_max)
|
|
2548
|
+
elif mode == 'contemp_conds':
|
|
2549
|
+
max_conds_dim = self.N
|
|
2550
|
+
|
|
2551
|
+
if max_combinations is None:
|
|
2552
|
+
max_combinations = np.inf
|
|
2553
|
+
|
|
2554
|
+
initial_graph = self._dict_to_graph(_int_link_assumptions, tau_max=tau_max)
|
|
2555
|
+
|
|
2556
|
+
skeleton_results = self._pcalg_skeleton(
|
|
2557
|
+
initial_graph=initial_graph,
|
|
2558
|
+
lagged_parents=lagged_parents,
|
|
2559
|
+
mode=mode,
|
|
2560
|
+
pc_alpha=pc_alpha,
|
|
2561
|
+
tau_min=tau_min,
|
|
2562
|
+
tau_max=tau_max,
|
|
2563
|
+
max_conds_dim=max_conds_dim,
|
|
2564
|
+
max_combinations=max_combinations,
|
|
2565
|
+
max_conds_py=max_conds_py,
|
|
2566
|
+
max_conds_px=max_conds_px,
|
|
2567
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
2568
|
+
)
|
|
2569
|
+
|
|
2570
|
+
skeleton_graph = skeleton_results['graph']
|
|
2571
|
+
sepsets = skeleton_results['sepsets']
|
|
2572
|
+
|
|
2573
|
+
# Now change assumed links marks
|
|
2574
|
+
skeleton_graph[skeleton_graph=='o?o'] = 'o-o'
|
|
2575
|
+
skeleton_graph[skeleton_graph=='-?>'] = '-->'
|
|
2576
|
+
skeleton_graph[skeleton_graph=='<?-'] = '<--'
|
|
2577
|
+
|
|
2578
|
+
colliders_step_results = self._pcalg_colliders(
|
|
2579
|
+
graph=skeleton_graph,
|
|
2580
|
+
sepsets=sepsets,
|
|
2581
|
+
lagged_parents=lagged_parents,
|
|
2582
|
+
mode=mode,
|
|
2583
|
+
pc_alpha=pc_alpha,
|
|
2584
|
+
tau_max=tau_max,
|
|
2585
|
+
max_conds_py=max_conds_py,
|
|
2586
|
+
max_conds_px=max_conds_px,
|
|
2587
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
2588
|
+
conflict_resolution=conflict_resolution,
|
|
2589
|
+
contemp_collider_rule=contemp_collider_rule,
|
|
2590
|
+
)
|
|
2591
|
+
|
|
2592
|
+
collider_graph = colliders_step_results['graph']
|
|
2593
|
+
ambiguous_triples = colliders_step_results['ambiguous_triples']
|
|
2594
|
+
|
|
2595
|
+
final_graph = self._pcalg_rules_timeseries(
|
|
2596
|
+
graph=collider_graph,
|
|
2597
|
+
ambiguous_triples=ambiguous_triples,
|
|
2598
|
+
conflict_resolution=conflict_resolution,
|
|
2599
|
+
)
|
|
2600
|
+
|
|
2601
|
+
# Symmetrize p_matrix and val_matrix
|
|
2602
|
+
symmetrized_results = self.symmetrize_p_and_val_matrix(
|
|
2603
|
+
p_matrix=skeleton_results['p_matrix'],
|
|
2604
|
+
val_matrix=skeleton_results['val_matrix'],
|
|
2605
|
+
link_assumptions=_int_link_assumptions,
|
|
2606
|
+
conf_matrix=None)
|
|
2607
|
+
|
|
2608
|
+
# Convert numerical graph matrix to string
|
|
2609
|
+
graph_str = final_graph # self.convert_to_string_graph(final_graph)
|
|
2610
|
+
|
|
2611
|
+
pc_results = {
|
|
2612
|
+
'graph': graph_str,
|
|
2613
|
+
'p_matrix': symmetrized_results['p_matrix'],
|
|
2614
|
+
'val_matrix': symmetrized_results['val_matrix'],
|
|
2615
|
+
'sepsets': colliders_step_results['sepsets'],
|
|
2616
|
+
'ambiguous_triples': colliders_step_results['ambiguous_triples'],
|
|
2617
|
+
}
|
|
2618
|
+
|
|
2619
|
+
if self.verbosity > 1:
|
|
2620
|
+
print("\n-----------------------------")
|
|
2621
|
+
print("PCMCIplus algorithm finished.")
|
|
2622
|
+
print("-----------------------------")
|
|
2623
|
+
|
|
2624
|
+
self.pc_results = pc_results
|
|
2625
|
+
return pc_results
|
|
2626
|
+
|
|
2627
|
+
def run_pcalg_non_timeseries_data(self, pc_alpha=0.01,
|
|
2628
|
+
max_conds_dim=None, max_combinations=None,
|
|
2629
|
+
contemp_collider_rule='majority',
|
|
2630
|
+
conflict_resolution=True):
|
|
2631
|
+
|
|
2632
|
+
"""Runs PC algorithm for non-time series data.
|
|
2633
|
+
|
|
2634
|
+
Simply calls run_pcalg with tau_min = tau_max = 0.
|
|
2635
|
+
Removes lags from output dictionaries.
|
|
2636
|
+
|
|
2637
|
+
Parameters
|
|
2638
|
+
----------
|
|
2639
|
+
pc_alpha : float, optional (default: 0.01)
|
|
2640
|
+
Significance level.
|
|
2641
|
+
contemp_collider_rule : {'majority', 'conservative', 'none'}
|
|
2642
|
+
Rule for collider phase to use. See the paper for details. Only
|
|
2643
|
+
'majority' and 'conservative' lead to an order-independent
|
|
2644
|
+
algorithm.
|
|
2645
|
+
conflict_resolution : bool, optional (default: True)
|
|
2646
|
+
Whether to mark conflicts in orientation rules. Only for True
|
|
2647
|
+
this leads to an order-independent algorithm.
|
|
2648
|
+
max_conds_dim : int, optional (default: None)
|
|
2649
|
+
Maximum number of conditions to test. If None is passed, this number
|
|
2650
|
+
is unrestricted.
|
|
2651
|
+
max_combinations : int
|
|
2652
|
+
Maximum number of combinations of conditions of current cardinality
|
|
2653
|
+
to test. Must be infinite (default for max_combinations=1) for consistency.
|
|
2654
|
+
|
|
2655
|
+
Returns
|
|
2656
|
+
-------
|
|
2657
|
+
graph : array of shape [N, N, 1]
|
|
2658
|
+
Resulting causal graph, see description above for interpretation.
|
|
2659
|
+
val_matrix : array of shape [N, N, 1]
|
|
2660
|
+
Estimated matrix of test statistic values regarding adjacencies.
|
|
2661
|
+
p_matrix : array of shape [N, N, 1]
|
|
2662
|
+
Estimated matrix of p-values regarding adjacencies.
|
|
2663
|
+
sepsets : dictionary
|
|
2664
|
+
Separating sets. See paper for details.
|
|
2665
|
+
ambiguous_triples : list
|
|
2666
|
+
List of ambiguous triples, only relevant for 'majority' and
|
|
2667
|
+
'conservative' rules, see paper for details.
|
|
2668
|
+
"""
|
|
2669
|
+
|
|
2670
|
+
results = self.run_pcalg(pc_alpha=pc_alpha, tau_min=0, tau_max=0,
|
|
2671
|
+
max_conds_dim=max_conds_dim, max_combinations=max_combinations,
|
|
2672
|
+
mode='standard', contemp_collider_rule=contemp_collider_rule,
|
|
2673
|
+
conflict_resolution=conflict_resolution)
|
|
2674
|
+
|
|
2675
|
+
# Remove tau-dimension
|
|
2676
|
+
old_sepsets = results['sepsets'].copy()
|
|
2677
|
+
results['sepsets'] = {}
|
|
2678
|
+
for old_sepset in old_sepsets:
|
|
2679
|
+
new_sepset = (old_sepset[0][0], old_sepset[1])
|
|
2680
|
+
conds = [cond[0] for cond in old_sepsets[old_sepset]]
|
|
2681
|
+
|
|
2682
|
+
results['sepsets'][new_sepset] = conds
|
|
2683
|
+
|
|
2684
|
+
ambiguous_triples = results['ambiguous_triples'].copy()
|
|
2685
|
+
results['ambiguous_triples'] = []
|
|
2686
|
+
for triple in ambiguous_triples:
|
|
2687
|
+
new_triple = (triple[0][0], triple[1], triple[2])
|
|
2688
|
+
|
|
2689
|
+
results['ambiguous_triples'].append(new_triple)
|
|
2690
|
+
|
|
2691
|
+
self.pc_results = results
|
|
2692
|
+
return results
|
|
2693
|
+
|
|
2694
|
+
|
|
2695
|
+
def _run_pcalg_test(self, graph, i, abstau, j, S, lagged_parents, max_conds_py,
|
|
2696
|
+
max_conds_px, max_conds_px_lagged, tau_max, alpha_or_thres=None):
|
|
2697
|
+
"""MCI conditional independence tests within PCMCIplus or PC algorithm.
|
|
2698
|
+
|
|
2699
|
+
Parameters
|
|
2700
|
+
----------
|
|
2701
|
+
graph : array
|
|
2702
|
+
...
|
|
2703
|
+
i : int
|
|
2704
|
+
Variable index.
|
|
2705
|
+
abstau : int
|
|
2706
|
+
Time lag (absolute value).
|
|
2707
|
+
j : int
|
|
2708
|
+
Variable index.
|
|
2709
|
+
S : list
|
|
2710
|
+
List of contemporaneous conditions.
|
|
2711
|
+
lagged_parents : dictionary of lists
|
|
2712
|
+
Dictionary of lagged parents for each node.
|
|
2713
|
+
max_conds_py : int
|
|
2714
|
+
Max number of lagged parents for node j.
|
|
2715
|
+
max_conds_px : int
|
|
2716
|
+
Max number of lagged parents for lagged node i.
|
|
2717
|
+
max_conds_px_lagged : int
|
|
2718
|
+
Maximum number of lagged conditions of X when X is lagged in MCI
|
|
2719
|
+
tests. If None is passed, this number is equal to max_conds_px.
|
|
2720
|
+
tau_max : int
|
|
2721
|
+
Maximum time lag.
|
|
2722
|
+
alpha_or_thres : float
|
|
2723
|
+
Significance level (if significance='analytic' or 'shuffle_test') or
|
|
2724
|
+
threshold (if significance='fixed_thres'). If given, run_test returns
|
|
2725
|
+
the test decision dependent=True/False.
|
|
2726
|
+
|
|
2727
|
+
Returns
|
|
2728
|
+
-------
|
|
2729
|
+
val, pval, Z, [dependent] : Tuple of floats, list, and bool
|
|
2730
|
+
The test statistic value and the p-value and list of conditions. If alpha_or_thres is
|
|
2731
|
+
given, run_test also returns the test decision dependent=True/False.
|
|
2732
|
+
"""
|
|
2733
|
+
|
|
2734
|
+
# Perform independence test adding lagged parents
|
|
2735
|
+
if lagged_parents is not None:
|
|
2736
|
+
conds_y = lagged_parents[j][:max_conds_py]
|
|
2737
|
+
# Get the conditions for node i
|
|
2738
|
+
if abstau == 0:
|
|
2739
|
+
conds_x = lagged_parents[i][:max_conds_px]
|
|
2740
|
+
else:
|
|
2741
|
+
if max_conds_px_lagged is None:
|
|
2742
|
+
conds_x = lagged_parents[i][:max_conds_px]
|
|
2743
|
+
else:
|
|
2744
|
+
conds_x = lagged_parents[i][:max_conds_px_lagged]
|
|
2745
|
+
|
|
2746
|
+
else:
|
|
2747
|
+
conds_y = conds_x = []
|
|
2748
|
+
# Shift the conditions for X by tau
|
|
2749
|
+
conds_x_lagged = [(k, -abstau + k_tau) for k, k_tau in conds_x]
|
|
2750
|
+
|
|
2751
|
+
Z = [node for node in S]
|
|
2752
|
+
Z += [node for node in conds_y if
|
|
2753
|
+
node != (i, -abstau) and node not in Z]
|
|
2754
|
+
# Remove overlapping nodes between conds_x_lagged and conds_y
|
|
2755
|
+
Z += [node for node in conds_x_lagged if node not in Z]
|
|
2756
|
+
|
|
2757
|
+
# If middle mark is '-', then set pval=0
|
|
2758
|
+
if graph[i,j,abstau] != "" and graph[i,j,abstau][1] == '-':
|
|
2759
|
+
val = 1.
|
|
2760
|
+
pval = 0.
|
|
2761
|
+
dependent = True
|
|
2762
|
+
else:
|
|
2763
|
+
val, pval, dependent = self.cond_ind_test.run_test(X=[(i, -abstau)], Y=[(j, 0)],
|
|
2764
|
+
Z=Z, tau_max=tau_max,
|
|
2765
|
+
alpha_or_thres=alpha_or_thres,
|
|
2766
|
+
)
|
|
2767
|
+
|
|
2768
|
+
return val, pval, Z, dependent
|
|
2769
|
+
|
|
2770
|
+
def _print_triple_info(self, triple, index, n_triples):
|
|
2771
|
+
"""Print info about the current triple being tested.
|
|
2772
|
+
|
|
2773
|
+
Parameters
|
|
2774
|
+
----------
|
|
2775
|
+
triple : tuple
|
|
2776
|
+
Standard ((i, tau), k, j) tuple of nodes and time delays.
|
|
2777
|
+
index : int
|
|
2778
|
+
Index of triple.
|
|
2779
|
+
n_triples : int
|
|
2780
|
+
Total number of triples.
|
|
2781
|
+
"""
|
|
2782
|
+
(i, tau), k, j = triple
|
|
2783
|
+
link_marker = {True:"o-o", False:"-->"}
|
|
2784
|
+
|
|
2785
|
+
print("\n Triple (%s % d) %s %s o-o %s (%d/%d)" % (
|
|
2786
|
+
self.var_names[i], tau, link_marker[tau==0], self.var_names[k],
|
|
2787
|
+
self.var_names[j], index + 1, n_triples))
|
|
2788
|
+
|
|
2789
|
+
|
|
2790
|
+
def _tests_remaining(self, i, j, abstau, graph, adjt, p):
|
|
2791
|
+
"""Helper function returning whether a certain pair still needs to be
|
|
2792
|
+
tested."""
|
|
2793
|
+
return graph[i, j, abstau] != "" and len(
|
|
2794
|
+
[a for a in adjt[j] if a != (i, -abstau)]) >= p
|
|
2795
|
+
|
|
2796
|
+
def _any_tests_remaining(self, graph, adjt, tau_min, tau_max, p):
|
|
2797
|
+
"""Helper function returning whether any pair still needs to be
|
|
2798
|
+
tested."""
|
|
2799
|
+
remaining_pairs = self._remaining_pairs(graph, adjt, tau_min, tau_max,
|
|
2800
|
+
p)
|
|
2801
|
+
|
|
2802
|
+
if len(remaining_pairs) > 0:
|
|
2803
|
+
return True
|
|
2804
|
+
else:
|
|
2805
|
+
return False
|
|
2806
|
+
|
|
2807
|
+
def _remaining_pairs(self, graph, adjt, tau_min, tau_max, p):
|
|
2808
|
+
"""Helper function returning the remaining pairs that still need to be
|
|
2809
|
+
tested."""
|
|
2810
|
+
N = graph.shape[0]
|
|
2811
|
+
pairs = []
|
|
2812
|
+
for (i, j) in itertools.product(range(N), range(N)):
|
|
2813
|
+
for abstau in range(tau_min, tau_max + 1):
|
|
2814
|
+
if (graph[i, j, abstau] != ""
|
|
2815
|
+
and len(
|
|
2816
|
+
[a for a in adjt[j] if a != (i, -abstau)]) >= p):
|
|
2817
|
+
pairs.append((i, j, abstau))
|
|
2818
|
+
|
|
2819
|
+
return pairs
|
|
2820
|
+
|
|
2821
|
+
def _pcalg_skeleton(self,
|
|
2822
|
+
initial_graph,
|
|
2823
|
+
lagged_parents,
|
|
2824
|
+
mode,
|
|
2825
|
+
pc_alpha,
|
|
2826
|
+
tau_min,
|
|
2827
|
+
tau_max,
|
|
2828
|
+
max_conds_dim,
|
|
2829
|
+
max_combinations,
|
|
2830
|
+
max_conds_py,
|
|
2831
|
+
max_conds_px,
|
|
2832
|
+
max_conds_px_lagged,
|
|
2833
|
+
):
|
|
2834
|
+
"""Implements the skeleton discovery step of the PC algorithm for
|
|
2835
|
+
time series.
|
|
2836
|
+
|
|
2837
|
+
Parameters
|
|
2838
|
+
----------
|
|
2839
|
+
initial_graph : array of shape (N, N, tau_max+1) or None
|
|
2840
|
+
Initial graph.
|
|
2841
|
+
lagged_parents : dictionary
|
|
2842
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
2843
|
+
additional conditions for each CI test. As part of PCMCIplus
|
|
2844
|
+
these are the superset of lagged parents estimated with the PC1
|
|
2845
|
+
algorithm.
|
|
2846
|
+
mode : {'standard', 'contemp_conds'}
|
|
2847
|
+
For ``mode='contemp_conds'`` this implements Steps 2-4 of the
|
|
2848
|
+
PCMCIplus method. For ``mode='standard'`` this implements the
|
|
2849
|
+
standard PC algorithm adapted to time series.
|
|
2850
|
+
tau_min : int, optional (default: 0)
|
|
2851
|
+
Minimum time lag to test.
|
|
2852
|
+
tau_max : int, optional (default: 1)
|
|
2853
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
2854
|
+
pc_alpha : float, optional (default: 0.01)
|
|
2855
|
+
Significance level.
|
|
2856
|
+
max_conds_dim : int, optional (default: None)
|
|
2857
|
+
Maximum number of conditions to test. If None is passed, this number
|
|
2858
|
+
is unrestricted.
|
|
2859
|
+
max_combinations : int
|
|
2860
|
+
Maximum number of combinations of conditions of current cardinality
|
|
2861
|
+
to test. Must be infinite (default for max_combinations=1) for consistency.
|
|
2862
|
+
max_conds_py : int, optional (default: None)
|
|
2863
|
+
Maximum number of lagged conditions of Y to use in MCI tests. If
|
|
2864
|
+
None is passed, this number is unrestricted.
|
|
2865
|
+
max_conds_px : int, optional (default: None)
|
|
2866
|
+
Maximum number of lagged conditions of X to use in MCI tests. If
|
|
2867
|
+
None is passed, this number is unrestricted.
|
|
2868
|
+
max_conds_px_lagged : int, optional (default: None)
|
|
2869
|
+
Maximum number of lagged conditions of X when X is lagged in MCI
|
|
2870
|
+
tests. If None is passed, this number is equal to max_conds_px.
|
|
2871
|
+
|
|
2872
|
+
Returns
|
|
2873
|
+
-------
|
|
2874
|
+
graph : array of shape [N, N, tau_max+1]
|
|
2875
|
+
Resulting causal graph, see description above for interpretation.
|
|
2876
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
2877
|
+
Estimated matrix of test statistic values regarding adjacencies.
|
|
2878
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
2879
|
+
Estimated matrix of p-values regarding adjacencies.
|
|
2880
|
+
sepsets : dictionary
|
|
2881
|
+
Separating sets. See paper for details.
|
|
2882
|
+
"""
|
|
2883
|
+
N = self.N
|
|
2884
|
+
|
|
2885
|
+
# Form complete graph
|
|
2886
|
+
if initial_graph is None:
|
|
2887
|
+
graph = np.ones((N, N, tau_max + 1), dtype='<U3')
|
|
2888
|
+
graph[:, :, 0] = "o?o"
|
|
2889
|
+
graph[:, :, 1:] = "-?>"
|
|
2890
|
+
else:
|
|
2891
|
+
graph = initial_graph
|
|
2892
|
+
|
|
2893
|
+
# Remove lag-zero self-loops
|
|
2894
|
+
graph[range(N), range(N), 0] = ""
|
|
2895
|
+
|
|
2896
|
+
# Define adjacencies for standard and contemp_conds mode
|
|
2897
|
+
if mode == 'contemp_conds':
|
|
2898
|
+
adjt = self._get_adj_time_series_contemp(graph)
|
|
2899
|
+
elif mode == 'standard':
|
|
2900
|
+
adjt = self._get_adj_time_series(graph)
|
|
2901
|
+
|
|
2902
|
+
val_matrix = np.zeros((N, N, tau_max + 1))
|
|
2903
|
+
|
|
2904
|
+
val_min = dict()
|
|
2905
|
+
for j in range(self.N):
|
|
2906
|
+
val_min[j] = {(p[0], -p[1]): np.inf
|
|
2907
|
+
for p in zip(*np.where(graph[:, j, :] != ""))}
|
|
2908
|
+
|
|
2909
|
+
# Initialize p-values. Set to 1 if there's no link in the initial graph
|
|
2910
|
+
p_matrix = np.zeros((N, N, tau_max + 1))
|
|
2911
|
+
p_matrix[graph == ""] = 1.
|
|
2912
|
+
|
|
2913
|
+
pval_max = dict()
|
|
2914
|
+
for j in range(self.N):
|
|
2915
|
+
pval_max[j] = {(p[0], -p[1]): 0.
|
|
2916
|
+
for p in zip(*np.where(graph[:, j, :] != ""))}
|
|
2917
|
+
|
|
2918
|
+
# TODO: Remove sepsets alltogether?
|
|
2919
|
+
# Intialize sepsets that store the conditions that make i and j
|
|
2920
|
+
# independent
|
|
2921
|
+
sepsets = self._get_sepsets(tau_min, tau_max)
|
|
2922
|
+
|
|
2923
|
+
if self.verbosity > 1:
|
|
2924
|
+
print("\n--------------------------")
|
|
2925
|
+
print("Skeleton discovery phase")
|
|
2926
|
+
print("--------------------------")
|
|
2927
|
+
|
|
2928
|
+
# Start with zero cardinality conditions
|
|
2929
|
+
p = 0
|
|
2930
|
+
while (self._any_tests_remaining(graph, adjt, tau_min, tau_max,
|
|
2931
|
+
p) and p <= max_conds_dim):
|
|
2932
|
+
if self.verbosity > 1:
|
|
2933
|
+
print(
|
|
2934
|
+
"\nTesting contemporaneous condition sets of dimension "
|
|
2935
|
+
"%d: " % p)
|
|
2936
|
+
|
|
2937
|
+
remaining_pairs = self._remaining_pairs(graph, adjt, tau_min,
|
|
2938
|
+
tau_max, p)
|
|
2939
|
+
n_remaining = len(remaining_pairs)
|
|
2940
|
+
for ir, (i, j, abstau) in enumerate(remaining_pairs):
|
|
2941
|
+
# Check if link was not already removed (contemp links)
|
|
2942
|
+
if graph[i, j, abstau] != "":
|
|
2943
|
+
if self.verbosity > 1:
|
|
2944
|
+
self._print_link_info(j=j, index_parent=ir,
|
|
2945
|
+
parent=(i, -abstau),
|
|
2946
|
+
num_parents=n_remaining)
|
|
2947
|
+
|
|
2948
|
+
# Generate all subsets of conditions of cardinality p
|
|
2949
|
+
conditions = list(itertools.combinations(
|
|
2950
|
+
[(k, tauk) for (k, tauk) in adjt[j]
|
|
2951
|
+
if not (k == i and tauk == -abstau)], p))
|
|
2952
|
+
|
|
2953
|
+
n_conditions = len(conditions)
|
|
2954
|
+
if self.verbosity > 1:
|
|
2955
|
+
print(
|
|
2956
|
+
" Iterate through %d subset(s) of conditions: "
|
|
2957
|
+
% n_conditions)
|
|
2958
|
+
if lagged_parents is not None:
|
|
2959
|
+
self._print_pcmciplus_conditions(lagged_parents, i,
|
|
2960
|
+
j, abstau,
|
|
2961
|
+
max_conds_py,
|
|
2962
|
+
max_conds_px,
|
|
2963
|
+
max_conds_px_lagged)
|
|
2964
|
+
nonsig = False
|
|
2965
|
+
# Iterate through condition sets
|
|
2966
|
+
for q, S in enumerate(conditions):
|
|
2967
|
+
if q > max_combinations:
|
|
2968
|
+
break
|
|
2969
|
+
|
|
2970
|
+
# Run MCI test
|
|
2971
|
+
val, pval, Z, dependent = self._run_pcalg_test(graph=graph,
|
|
2972
|
+
i=i, abstau=abstau, j=j, S=S, lagged_parents=lagged_parents,
|
|
2973
|
+
max_conds_py=max_conds_py,
|
|
2974
|
+
max_conds_px=max_conds_px, max_conds_px_lagged=max_conds_px_lagged,
|
|
2975
|
+
tau_max=tau_max, alpha_or_thres=pc_alpha)
|
|
2976
|
+
|
|
2977
|
+
# Store minimum absolute test statistic value for sorting adjt
|
|
2978
|
+
# (only internally used)
|
|
2979
|
+
val_min[j][(i, -abstau)] = min(np.abs(val),
|
|
2980
|
+
val_min[j].get(
|
|
2981
|
+
(i, -abstau)))
|
|
2982
|
+
# Store maximum p-value (only internally used)
|
|
2983
|
+
pval_max[j][(i, -abstau)] = max(pval,
|
|
2984
|
+
pval_max[j].get(
|
|
2985
|
+
(i, -abstau)))
|
|
2986
|
+
|
|
2987
|
+
# Store max. p-value and corresponding value to return
|
|
2988
|
+
if pval >= p_matrix[i, j, abstau]:
|
|
2989
|
+
p_matrix[i, j, abstau] = pval
|
|
2990
|
+
val_matrix[i, j, abstau] = val
|
|
2991
|
+
|
|
2992
|
+
if self.verbosity > 1:
|
|
2993
|
+
self._print_cond_info(Z=S, comb_index=q, pval=pval,
|
|
2994
|
+
val=val)
|
|
2995
|
+
|
|
2996
|
+
# If conditional independence is found, remove link
|
|
2997
|
+
# from graph and store sepsets
|
|
2998
|
+
if not dependent: # pval > pc_alpha:
|
|
2999
|
+
nonsig = True
|
|
3000
|
+
if abstau == 0:
|
|
3001
|
+
graph[i, j, 0] = graph[j, i, 0] = ""
|
|
3002
|
+
sepsets[((i, 0), j)] = sepsets[
|
|
3003
|
+
((j, 0), i)] = list(S)
|
|
3004
|
+
# Also store p-value in other contemp. entry
|
|
3005
|
+
p_matrix[j, i, 0] = p_matrix[i, j, 0]
|
|
3006
|
+
else:
|
|
3007
|
+
graph[i, j, abstau] = ""
|
|
3008
|
+
sepsets[((i, -abstau), j)] = list(S)
|
|
3009
|
+
break
|
|
3010
|
+
|
|
3011
|
+
# Print the results if needed
|
|
3012
|
+
if self.verbosity > 1:
|
|
3013
|
+
self._print_a_pc_result(nonsig,
|
|
3014
|
+
conds_dim=p,
|
|
3015
|
+
max_combinations=
|
|
3016
|
+
max_combinations)
|
|
3017
|
+
else:
|
|
3018
|
+
self._print_link_info(j=j, index_parent=ir,
|
|
3019
|
+
parent=(i, -abstau),
|
|
3020
|
+
num_parents=n_remaining,
|
|
3021
|
+
already_removed=True)
|
|
3022
|
+
|
|
3023
|
+
# Increase condition cardinality
|
|
3024
|
+
p += 1
|
|
3025
|
+
|
|
3026
|
+
# Re-compute adj and sort by minimum absolute test statistic value
|
|
3027
|
+
if mode == 'contemp_conds':
|
|
3028
|
+
adjt = self._get_adj_time_series_contemp(graph, sort_by=val_min)
|
|
3029
|
+
elif mode == 'standard':
|
|
3030
|
+
adjt = self._get_adj_time_series(graph, sort_by=val_min)
|
|
3031
|
+
|
|
3032
|
+
if self.verbosity > 1:
|
|
3033
|
+
print("\nUpdated contemp. adjacencies:")
|
|
3034
|
+
self._print_parents(all_parents=adjt, val_min=val_min,
|
|
3035
|
+
pval_max=pval_max)
|
|
3036
|
+
|
|
3037
|
+
if self.verbosity > 1:
|
|
3038
|
+
if not (self._any_tests_remaining(graph, adjt, tau_min, tau_max,
|
|
3039
|
+
p) and p <= max_conds_dim):
|
|
3040
|
+
print("\nAlgorithm converged at p = %d." % (p - 1))
|
|
3041
|
+
else:
|
|
3042
|
+
print(
|
|
3043
|
+
"\nAlgorithm not yet converged, but max_conds_dim = %d"
|
|
3044
|
+
" reached." % max_conds_dim)
|
|
3045
|
+
|
|
3046
|
+
return {'graph': graph,
|
|
3047
|
+
'sepsets': sepsets,
|
|
3048
|
+
'p_matrix': p_matrix,
|
|
3049
|
+
'val_matrix': val_matrix,
|
|
3050
|
+
}
|
|
3051
|
+
|
|
3052
|
+
def _get_sepsets(self, tau_min, tau_max):
|
|
3053
|
+
"""Returns initial sepsets.
|
|
3054
|
+
|
|
3055
|
+
Parameters
|
|
3056
|
+
----------
|
|
3057
|
+
tau_min : int, optional (default: 0)
|
|
3058
|
+
Minimum time lag to test.
|
|
3059
|
+
tau_max : int, optional (default: 1)
|
|
3060
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
3061
|
+
|
|
3062
|
+
Returns
|
|
3063
|
+
-------
|
|
3064
|
+
sepsets : dict
|
|
3065
|
+
Initialized sepsets.
|
|
3066
|
+
"""
|
|
3067
|
+
sepsets = dict([(((i, -tau), j), [])
|
|
3068
|
+
for tau in range(tau_min, tau_max + 1)
|
|
3069
|
+
for i in range(self.N)
|
|
3070
|
+
for j in range(self.N)])
|
|
3071
|
+
|
|
3072
|
+
return sepsets
|
|
3073
|
+
|
|
3074
|
+
def _find_unshielded_triples(self, graph):
|
|
3075
|
+
"""Find unshielded triples i_tau o-(>) k_t o-o j_t with i_tau -/- j_t.
|
|
3076
|
+
|
|
3077
|
+
Excludes conflicting links.
|
|
3078
|
+
|
|
3079
|
+
Parameters
|
|
3080
|
+
----------
|
|
3081
|
+
graph : array of shape [N, N, tau_max+1]
|
|
3082
|
+
Causal graph, see description above for interpretation.
|
|
3083
|
+
|
|
3084
|
+
Returns
|
|
3085
|
+
-------
|
|
3086
|
+
triples : list
|
|
3087
|
+
List of triples.
|
|
3088
|
+
"""
|
|
3089
|
+
|
|
3090
|
+
N = graph.shape[0]
|
|
3091
|
+
adjt = self._get_adj_time_series(graph, include_conflicts=False)
|
|
3092
|
+
|
|
3093
|
+
# Find unshielded triples
|
|
3094
|
+
# Find triples i_tau o-(>) k_t o-o j_t with i_tau -/- j_t
|
|
3095
|
+
triples = []
|
|
3096
|
+
for j in range(N):
|
|
3097
|
+
for (k, tauk) in adjt[j]:
|
|
3098
|
+
if tauk == 0 and graph[k,j,0] == "o-o":
|
|
3099
|
+
for (i, taui) in adjt[k]:
|
|
3100
|
+
if ((i, taui) != (j, 0)
|
|
3101
|
+
and graph[i,j,abs(taui)] == ""
|
|
3102
|
+
and (graph[i,k,abs(taui)] == "o-o"
|
|
3103
|
+
or graph[i,k,abs(taui)] == "-->")):
|
|
3104
|
+
# if not (k == j or (
|
|
3105
|
+
# taui == 0 and (i == k or i == j))):
|
|
3106
|
+
# if ((taui == 0 and graph[i, j, 0] == "" and
|
|
3107
|
+
# graph[j, i, 0] == "" and graph[j, k, 0] == "o-o")
|
|
3108
|
+
# or (taui < 0 and graph[j, k, 0] == "o-o"
|
|
3109
|
+
# and graph[i, j, abs(taui)] == "")):
|
|
3110
|
+
triples.append(((i, taui), k, j))
|
|
3111
|
+
|
|
3112
|
+
return triples
|
|
3113
|
+
|
|
3114
|
+
def _pcalg_colliders(self,
|
|
3115
|
+
graph,
|
|
3116
|
+
sepsets,
|
|
3117
|
+
lagged_parents,
|
|
3118
|
+
mode,
|
|
3119
|
+
pc_alpha,
|
|
3120
|
+
tau_max,
|
|
3121
|
+
max_conds_py,
|
|
3122
|
+
max_conds_px,
|
|
3123
|
+
max_conds_px_lagged,
|
|
3124
|
+
contemp_collider_rule,
|
|
3125
|
+
conflict_resolution,
|
|
3126
|
+
):
|
|
3127
|
+
"""Implements the collider orientation step of the PC algorithm for
|
|
3128
|
+
time series.
|
|
3129
|
+
|
|
3130
|
+
Parameters
|
|
3131
|
+
----------
|
|
3132
|
+
graph : array of shape (N, N, tau_max+1)
|
|
3133
|
+
Current graph.
|
|
3134
|
+
sepsets : dictionary
|
|
3135
|
+
Separating sets. See paper for details.
|
|
3136
|
+
lagged_parents : dictionary
|
|
3137
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
3138
|
+
additional conditions for each CI test. As part of PCMCIplus
|
|
3139
|
+
these are the superset of lagged parents estimated with the PC1
|
|
3140
|
+
algorithm.
|
|
3141
|
+
mode : {'standard', 'contemp_conds'}
|
|
3142
|
+
For ``mode='contemp_conds'`` this implements Steps 2-4 of the
|
|
3143
|
+
PCMCIplus method. For ``mode='standard'`` this implements the
|
|
3144
|
+
standard PC algorithm adapted to time series.
|
|
3145
|
+
pc_alpha : float, optional (default: 0.01)
|
|
3146
|
+
Significance level.
|
|
3147
|
+
tau_max : int, optional (default: 1)
|
|
3148
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
3149
|
+
max_conds_py : int, optional (default: None)
|
|
3150
|
+
Maximum number of lagged conditions of Y to use in MCI tests. If
|
|
3151
|
+
None is passed, this number is unrestricted.
|
|
3152
|
+
max_conds_px : int, optional (default: None)
|
|
3153
|
+
Maximum number of lagged conditions of X to use in MCI tests. If
|
|
3154
|
+
None is passed, this number is unrestricted.
|
|
3155
|
+
max_conds_px_lagged : int, optional (default: None)
|
|
3156
|
+
Maximum number of lagged conditions of X when X is lagged in MCI
|
|
3157
|
+
tests. If None is passed, this number is equal to max_conds_px.
|
|
3158
|
+
contemp_collider_rule : {'majority', 'conservative', 'none'}
|
|
3159
|
+
Rule for collider phase to use. See the paper for details. Only
|
|
3160
|
+
'majority' and 'conservative' lead to an order-independent
|
|
3161
|
+
algorithm.
|
|
3162
|
+
conflict_resolution : bool, optional (default: True)
|
|
3163
|
+
Whether to mark conflicts in orientation rules. Only for True
|
|
3164
|
+
this leads to an order-independent algorithm.
|
|
3165
|
+
|
|
3166
|
+
Returns
|
|
3167
|
+
-------
|
|
3168
|
+
graph : array of shape [N, N, tau_max+1]
|
|
3169
|
+
Resulting causal graph, see description above for interpretation.
|
|
3170
|
+
sepsets : dictionary
|
|
3171
|
+
Separating sets. See paper for details.
|
|
3172
|
+
ambiguous_triples : list
|
|
3173
|
+
List of ambiguous triples, only relevant for 'majority' and
|
|
3174
|
+
'conservative' rules, see paper for details.
|
|
3175
|
+
"""
|
|
3176
|
+
|
|
3177
|
+
if self.verbosity > 1:
|
|
3178
|
+
print("\n----------------------------")
|
|
3179
|
+
print("Collider orientation phase")
|
|
3180
|
+
print("----------------------------")
|
|
3181
|
+
print("\ncontemp_collider_rule = %s" % contemp_collider_rule)
|
|
3182
|
+
print("conflict_resolution = %s\n" % conflict_resolution)
|
|
3183
|
+
|
|
3184
|
+
# Check that no middle mark '?' exists
|
|
3185
|
+
for (i, j, tau) in zip(*np.where(graph!='')):
|
|
3186
|
+
if graph[i,j,tau][1] != '-':
|
|
3187
|
+
raise ValueError("Middle mark '?' exists!")
|
|
3188
|
+
|
|
3189
|
+
# Find unshielded triples
|
|
3190
|
+
triples = self._find_unshielded_triples(graph)
|
|
3191
|
+
|
|
3192
|
+
v_structures = []
|
|
3193
|
+
ambiguous_triples = []
|
|
3194
|
+
|
|
3195
|
+
if contemp_collider_rule is None or contemp_collider_rule == 'none':
|
|
3196
|
+
# Standard collider orientation rule of PC algorithm
|
|
3197
|
+
# If k_t not in sepsets(i_tau, j_t), then orient
|
|
3198
|
+
# as i_tau --> k_t <-- j_t
|
|
3199
|
+
for itaukj in triples:
|
|
3200
|
+
(i, tau), k, j = itaukj
|
|
3201
|
+
if (k, 0) not in sepsets[((i, tau), j)]:
|
|
3202
|
+
v_structures.append(itaukj)
|
|
3203
|
+
else:
|
|
3204
|
+
# Apply 'majority' or 'conservative' rule to orient colliders
|
|
3205
|
+
# Compute all (contemp) subsets of potential parents of i and all
|
|
3206
|
+
# subsets of potential parents of j that make i and j independent
|
|
3207
|
+
def subsets(s):
|
|
3208
|
+
if len(s) == 0: return []
|
|
3209
|
+
subsets = []
|
|
3210
|
+
for cardinality in range(len(s) + 1):
|
|
3211
|
+
subsets += list(itertools.combinations(s, cardinality))
|
|
3212
|
+
subsets = [list(sub) for sub in list(set(subsets))]
|
|
3213
|
+
return subsets
|
|
3214
|
+
|
|
3215
|
+
# We only consider contemporaneous adjacencies because only these
|
|
3216
|
+
# can include the (contemp) k. Furthermore, next to adjacencies of j,
|
|
3217
|
+
# we only need to check adjacencies of i for tau=0
|
|
3218
|
+
if mode == 'contemp_conds':
|
|
3219
|
+
adjt = self._get_adj_time_series_contemp(graph)
|
|
3220
|
+
elif mode == 'standard':
|
|
3221
|
+
adjt = self._get_adj_time_series(graph)
|
|
3222
|
+
|
|
3223
|
+
n_triples = len(triples)
|
|
3224
|
+
for ir, itaukj in enumerate(triples):
|
|
3225
|
+
(i, tau), k, j = itaukj
|
|
3226
|
+
|
|
3227
|
+
if self.verbosity > 1:
|
|
3228
|
+
self._print_triple_info(itaukj, ir, n_triples)
|
|
3229
|
+
|
|
3230
|
+
neighbor_subsets_tmp = subsets(
|
|
3231
|
+
[(l, taul) for (l, taul) in adjt[j]
|
|
3232
|
+
if not (l == i and tau == taul)])
|
|
3233
|
+
if tau == 0:
|
|
3234
|
+
# Furthermore, we only need to check contemp. adjacencies
|
|
3235
|
+
# of i for tau=0
|
|
3236
|
+
neighbor_subsets_tmp += subsets(
|
|
3237
|
+
[(l, taul) for (l, taul) in adjt[i]
|
|
3238
|
+
if not (l == j and taul == 0)])
|
|
3239
|
+
|
|
3240
|
+
# Make unique
|
|
3241
|
+
neighbor_subsets = []
|
|
3242
|
+
for subset in neighbor_subsets_tmp:
|
|
3243
|
+
if subset not in neighbor_subsets:
|
|
3244
|
+
neighbor_subsets.append(subset)
|
|
3245
|
+
|
|
3246
|
+
n_neighbors = len(neighbor_subsets)
|
|
3247
|
+
|
|
3248
|
+
if self.verbosity > 1:
|
|
3249
|
+
print(
|
|
3250
|
+
" Iterate through %d condition subset(s) of "
|
|
3251
|
+
"neighbors: " % n_neighbors)
|
|
3252
|
+
if lagged_parents is not None:
|
|
3253
|
+
self._print_pcmciplus_conditions(lagged_parents, i, j,
|
|
3254
|
+
abs(tau), max_conds_py, max_conds_px,
|
|
3255
|
+
max_conds_px_lagged)
|
|
3256
|
+
|
|
3257
|
+
# Test which neighbor subsets separate i and j
|
|
3258
|
+
neighbor_sepsets = []
|
|
3259
|
+
for iss, S in enumerate(neighbor_subsets):
|
|
3260
|
+
val, pval, Z, dependent = self._run_pcalg_test(graph=graph,
|
|
3261
|
+
i=i, abstau=abs(tau), j=j, S=S, lagged_parents=lagged_parents,
|
|
3262
|
+
max_conds_py=max_conds_py,
|
|
3263
|
+
max_conds_px=max_conds_px, max_conds_px_lagged=max_conds_px_lagged,
|
|
3264
|
+
tau_max=tau_max, alpha_or_thres=pc_alpha)
|
|
3265
|
+
|
|
3266
|
+
if self.verbosity > 1:
|
|
3267
|
+
self._print_cond_info(Z=S, comb_index=iss, pval=pval,
|
|
3268
|
+
val=val)
|
|
3269
|
+
|
|
3270
|
+
if not dependent: #pval > pc_alpha:
|
|
3271
|
+
neighbor_sepsets += [S]
|
|
3272
|
+
|
|
3273
|
+
if len(neighbor_sepsets) > 0:
|
|
3274
|
+
fraction = np.sum(
|
|
3275
|
+
[(k, 0) in S for S in neighbor_sepsets]) / float(
|
|
3276
|
+
len(neighbor_sepsets))
|
|
3277
|
+
|
|
3278
|
+
if contemp_collider_rule == 'conservative':
|
|
3279
|
+
# Triple is labeled as unambiguous if at least one
|
|
3280
|
+
# separating set is found and either k is in ALL
|
|
3281
|
+
# (fraction == 1) or NONE (fraction == 0) of them
|
|
3282
|
+
if len(neighbor_sepsets) == 0:
|
|
3283
|
+
if self.verbosity > 1:
|
|
3284
|
+
print(
|
|
3285
|
+
" No separating subsets --> ambiguous "
|
|
3286
|
+
"triple found")
|
|
3287
|
+
ambiguous_triples.append(itaukj)
|
|
3288
|
+
else:
|
|
3289
|
+
if fraction == 0:
|
|
3290
|
+
# If (k, 0) is in none of the neighbor_sepsets,
|
|
3291
|
+
# orient as collider
|
|
3292
|
+
v_structures.append(itaukj)
|
|
3293
|
+
if self.verbosity > 1:
|
|
3294
|
+
print(
|
|
3295
|
+
" Fraction of separating subsets "
|
|
3296
|
+
"containing (%s 0) is = 0 --> collider "
|
|
3297
|
+
"found" % self.var_names[k])
|
|
3298
|
+
# Also delete (k, 0) from sepsets (if present)
|
|
3299
|
+
if (k, 0) in sepsets[((i, tau), j)]:
|
|
3300
|
+
sepsets[((i, tau), j)].remove((k, 0))
|
|
3301
|
+
if tau == 0:
|
|
3302
|
+
if (k, 0) in sepsets[((j, tau), i)]:
|
|
3303
|
+
sepsets[((j, tau), i)].remove((k, 0))
|
|
3304
|
+
elif fraction == 1:
|
|
3305
|
+
# If (k, 0) is in all of the neighbor_sepsets,
|
|
3306
|
+
# leave unoriented
|
|
3307
|
+
if self.verbosity > 1:
|
|
3308
|
+
print(
|
|
3309
|
+
" Fraction of separating subsets "
|
|
3310
|
+
"containing (%s 0) is = 1 --> "
|
|
3311
|
+
"non-collider found" % self.var_names[k])
|
|
3312
|
+
# Also add (k, 0) to sepsets (if not present)
|
|
3313
|
+
if (k, 0) not in sepsets[((i, tau), j)]:
|
|
3314
|
+
sepsets[((i, tau), j)].append((k, 0))
|
|
3315
|
+
if tau == 0:
|
|
3316
|
+
if (k, 0) not in sepsets[((j, tau), i)]:
|
|
3317
|
+
sepsets[((j, tau), i)].append((k, 0))
|
|
3318
|
+
else:
|
|
3319
|
+
if self.verbosity > 1:
|
|
3320
|
+
print(
|
|
3321
|
+
" Fraction of separating subsets "
|
|
3322
|
+
"containing (%s 0) is = between 0 and 1 "
|
|
3323
|
+
"--> ambiguous triple found" %
|
|
3324
|
+
self.var_names[k])
|
|
3325
|
+
ambiguous_triples.append(itaukj)
|
|
3326
|
+
|
|
3327
|
+
elif contemp_collider_rule == 'majority':
|
|
3328
|
+
|
|
3329
|
+
if len(neighbor_sepsets) == 0:
|
|
3330
|
+
if self.verbosity > 1:
|
|
3331
|
+
print(
|
|
3332
|
+
" No separating subsets --> ambiguous "
|
|
3333
|
+
"triple found")
|
|
3334
|
+
ambiguous_triples.append(itaukj)
|
|
3335
|
+
else:
|
|
3336
|
+
if fraction == 0.5:
|
|
3337
|
+
if self.verbosity > 1:
|
|
3338
|
+
print(
|
|
3339
|
+
" Fraction of separating subsets "
|
|
3340
|
+
"containing (%s 0) is = 0.5 --> ambiguous "
|
|
3341
|
+
"triple found" % self.var_names[k])
|
|
3342
|
+
ambiguous_triples.append(itaukj)
|
|
3343
|
+
elif fraction < 0.5:
|
|
3344
|
+
v_structures.append(itaukj)
|
|
3345
|
+
if self.verbosity > 1:
|
|
3346
|
+
print(
|
|
3347
|
+
" Fraction of separating subsets "
|
|
3348
|
+
"containing (%s 0) is < 0.5 "
|
|
3349
|
+
"--> collider found" % self.var_names[k])
|
|
3350
|
+
# Also delete (k, 0) from sepsets (if present)
|
|
3351
|
+
if (k, 0) in sepsets[((i, tau), j)]:
|
|
3352
|
+
sepsets[((i, tau), j)].remove((k, 0))
|
|
3353
|
+
if tau == 0:
|
|
3354
|
+
if (k, 0) in sepsets[((j, tau), i)]:
|
|
3355
|
+
sepsets[((j, tau), i)].remove((k, 0))
|
|
3356
|
+
elif fraction > 0.5:
|
|
3357
|
+
if self.verbosity > 1:
|
|
3358
|
+
print(
|
|
3359
|
+
" Fraction of separating subsets "
|
|
3360
|
+
"containing (%s 0) is > 0.5 "
|
|
3361
|
+
"--> non-collider found" %
|
|
3362
|
+
self.var_names[k])
|
|
3363
|
+
# Also add (k, 0) to sepsets (if not present)
|
|
3364
|
+
if (k, 0) not in sepsets[((i, tau), j)]:
|
|
3365
|
+
sepsets[((i, tau), j)].append((k, 0))
|
|
3366
|
+
if tau == 0:
|
|
3367
|
+
if (k, 0) not in sepsets[((j, tau), i)]:
|
|
3368
|
+
sepsets[((j, tau), i)].append((k, 0))
|
|
3369
|
+
|
|
3370
|
+
if self.verbosity > 1 and len(v_structures) > 0:
|
|
3371
|
+
print("\nOrienting links among colliders:")
|
|
3372
|
+
|
|
3373
|
+
link_marker = {True:"o-o", False:"-->"}
|
|
3374
|
+
|
|
3375
|
+
# Now go through list of v-structures and (optionally) detect conflicts
|
|
3376
|
+
oriented_links = []
|
|
3377
|
+
for itaukj in v_structures:
|
|
3378
|
+
(i, tau), k, j = itaukj
|
|
3379
|
+
|
|
3380
|
+
if self.verbosity > 1:
|
|
3381
|
+
print("\n Collider (%s % d) %s %s o-o %s:" % (
|
|
3382
|
+
self.var_names[i], tau, link_marker[
|
|
3383
|
+
tau==0], self.var_names[k],
|
|
3384
|
+
self.var_names[j]))
|
|
3385
|
+
|
|
3386
|
+
if (k, j) not in oriented_links and (j, k) not in oriented_links:
|
|
3387
|
+
if self.verbosity > 1:
|
|
3388
|
+
print(" Orient %s o-o %s as %s --> %s " % (
|
|
3389
|
+
self.var_names[j], self.var_names[k], self.var_names[j],
|
|
3390
|
+
self.var_names[k]))
|
|
3391
|
+
# graph[k, j, 0] = 0
|
|
3392
|
+
graph[k, j, 0] = "<--" #0
|
|
3393
|
+
graph[j, k, 0] = "-->"
|
|
3394
|
+
|
|
3395
|
+
oriented_links.append((j, k))
|
|
3396
|
+
else:
|
|
3397
|
+
if conflict_resolution is False and self.verbosity > 1:
|
|
3398
|
+
print(" Already oriented")
|
|
3399
|
+
|
|
3400
|
+
if conflict_resolution:
|
|
3401
|
+
if (k, j) in oriented_links:
|
|
3402
|
+
if self.verbosity > 1:
|
|
3403
|
+
print(
|
|
3404
|
+
" Conflict since %s <-- %s already "
|
|
3405
|
+
"oriented: Mark link as `2` in graph" % (
|
|
3406
|
+
self.var_names[j], self.var_names[k]))
|
|
3407
|
+
graph[j, k, 0] = graph[k, j, 0] = "x-x" #2
|
|
3408
|
+
|
|
3409
|
+
if tau == 0:
|
|
3410
|
+
if (i, k) not in oriented_links and (
|
|
3411
|
+
k, i) not in oriented_links:
|
|
3412
|
+
if self.verbosity > 1:
|
|
3413
|
+
print(" Orient %s o-o %s as %s --> %s " % (
|
|
3414
|
+
self.var_names[i], self.var_names[k],
|
|
3415
|
+
self.var_names[i], self.var_names[k]))
|
|
3416
|
+
graph[k, i, 0] = "<--" #0
|
|
3417
|
+
graph[i, k, 0] = "-->"
|
|
3418
|
+
|
|
3419
|
+
oriented_links.append((i, k))
|
|
3420
|
+
else:
|
|
3421
|
+
if conflict_resolution is False and self.verbosity > 1:
|
|
3422
|
+
print(" Already oriented")
|
|
3423
|
+
|
|
3424
|
+
if conflict_resolution:
|
|
3425
|
+
if (k, i) in oriented_links:
|
|
3426
|
+
if self.verbosity > 1:
|
|
3427
|
+
print(
|
|
3428
|
+
" Conflict since %s <-- %s already "
|
|
3429
|
+
"oriented: Mark link as `2` in graph" % (
|
|
3430
|
+
self.var_names[i], self.var_names[k]))
|
|
3431
|
+
graph[i, k, 0] = graph[k, i, 0] = "x-x" #2
|
|
3432
|
+
|
|
3433
|
+
if self.verbosity > 1:
|
|
3434
|
+
adjt = self._get_adj_time_series(graph)
|
|
3435
|
+
print("\nUpdated adjacencies:")
|
|
3436
|
+
self._print_parents(all_parents=adjt, val_min=None, pval_max=None)
|
|
3437
|
+
|
|
3438
|
+
return {'graph': graph,
|
|
3439
|
+
'sepsets': sepsets,
|
|
3440
|
+
'ambiguous_triples': ambiguous_triples,
|
|
3441
|
+
}
|
|
3442
|
+
|
|
3443
|
+
def _find_triples_rule1(self, graph):
|
|
3444
|
+
"""Find triples i_tau --> k_t o-o j_t with i_tau -/- j_t.
|
|
3445
|
+
|
|
3446
|
+
Excludes conflicting links.
|
|
3447
|
+
|
|
3448
|
+
Parameters
|
|
3449
|
+
----------
|
|
3450
|
+
graph : array of shape [N, N, tau_max+1]
|
|
3451
|
+
Causal graph, see description above for interpretation.
|
|
3452
|
+
|
|
3453
|
+
Returns
|
|
3454
|
+
-------
|
|
3455
|
+
triples : list
|
|
3456
|
+
List of triples.
|
|
3457
|
+
"""
|
|
3458
|
+
adjt = self._get_adj_time_series(graph, include_conflicts=False)
|
|
3459
|
+
|
|
3460
|
+
N = graph.shape[0]
|
|
3461
|
+
triples = []
|
|
3462
|
+
for j in range(N):
|
|
3463
|
+
for (k, tauk) in adjt[j]:
|
|
3464
|
+
if tauk == 0 and graph[j, k, 0] == 'o-o':
|
|
3465
|
+
for (i, taui) in adjt[k]:
|
|
3466
|
+
if ((i, taui) != (j, 0)
|
|
3467
|
+
and graph[i,j,abs(taui)] == ""
|
|
3468
|
+
and (graph[i,k,abs(taui)] == "-->")):
|
|
3469
|
+
triples.append(((i, taui), k, j))
|
|
3470
|
+
return triples
|
|
3471
|
+
|
|
3472
|
+
def _find_triples_rule2(self, graph):
|
|
3473
|
+
"""Find triples i_t --> k_t --> j_t with i_t o-o j_t.
|
|
3474
|
+
|
|
3475
|
+
Excludes conflicting links.
|
|
3476
|
+
|
|
3477
|
+
Parameters
|
|
3478
|
+
----------
|
|
3479
|
+
graph : array of shape [N, N, tau_max+1]
|
|
3480
|
+
Causal graph, see description above for interpretation.
|
|
3481
|
+
|
|
3482
|
+
Returns
|
|
3483
|
+
-------
|
|
3484
|
+
triples : list
|
|
3485
|
+
List of triples.
|
|
3486
|
+
"""
|
|
3487
|
+
|
|
3488
|
+
adjtcont = self._get_adj_time_series_contemp(graph,
|
|
3489
|
+
include_conflicts=False)
|
|
3490
|
+
N = graph.shape[0]
|
|
3491
|
+
|
|
3492
|
+
triples = []
|
|
3493
|
+
for j in range(N):
|
|
3494
|
+
for (k, tauk) in adjtcont[j]:
|
|
3495
|
+
if graph[k, j, 0] == '-->':
|
|
3496
|
+
for (i, taui) in adjtcont[k]:
|
|
3497
|
+
if graph[i, k, 0] == '-->' and (i, taui) != (j, 0):
|
|
3498
|
+
if graph[i, j, 0] == 'o-o' and graph[j, i, 0] == 'o-o':
|
|
3499
|
+
triples.append(((i, 0), k, j))
|
|
3500
|
+
return triples
|
|
3501
|
+
|
|
3502
|
+
def _find_chains_rule3(self, graph):
|
|
3503
|
+
"""Find chains i_t o-o k_t --> j_t and i_t o-o l_t --> j_t with
|
|
3504
|
+
i_t o-o j_t and k_t -/- l_t.
|
|
3505
|
+
|
|
3506
|
+
Excludes conflicting links.
|
|
3507
|
+
|
|
3508
|
+
Parameters
|
|
3509
|
+
----------
|
|
3510
|
+
graph : array of shape [N, N, tau_max+1]
|
|
3511
|
+
Causal graph, see description above for interpretation.
|
|
3512
|
+
|
|
3513
|
+
Returns
|
|
3514
|
+
-------
|
|
3515
|
+
chains : list
|
|
3516
|
+
List of chains.
|
|
3517
|
+
"""
|
|
3518
|
+
N = graph.shape[0]
|
|
3519
|
+
adjtcont = self._get_adj_time_series_contemp(graph,
|
|
3520
|
+
include_conflicts=False)
|
|
3521
|
+
|
|
3522
|
+
chains = []
|
|
3523
|
+
for j in range(N):
|
|
3524
|
+
for (i, _) in adjtcont[j]:
|
|
3525
|
+
if graph[j, i, 0] == 'o-o':
|
|
3526
|
+
for (k, _) in adjtcont[j]:
|
|
3527
|
+
for (l, _) in adjtcont[j]:
|
|
3528
|
+
if ((k != l)
|
|
3529
|
+
and (k != i)
|
|
3530
|
+
and (l != i)
|
|
3531
|
+
and graph[k,j,0] == "-->"
|
|
3532
|
+
and graph[l,j,0] == "-->"
|
|
3533
|
+
and graph[k,i,0] == "o-o"
|
|
3534
|
+
and graph[l,i,0] == "o-o"
|
|
3535
|
+
and graph[k,l,0] == ""
|
|
3536
|
+
):
|
|
3537
|
+
chains.append((((i, 0), k, j),
|
|
3538
|
+
((i, 0), l, j)))
|
|
3539
|
+
|
|
3540
|
+
return chains
|
|
3541
|
+
|
|
3542
|
+
def _pcalg_rules_timeseries(self,
|
|
3543
|
+
graph,
|
|
3544
|
+
ambiguous_triples,
|
|
3545
|
+
conflict_resolution,
|
|
3546
|
+
):
|
|
3547
|
+
"""Implements the rule orientation step of the PC algorithm for
|
|
3548
|
+
time series.
|
|
3549
|
+
|
|
3550
|
+
Parameters
|
|
3551
|
+
----------
|
|
3552
|
+
graph : array of shape (N, N, tau_max+1)
|
|
3553
|
+
Current graph.
|
|
3554
|
+
ambiguous_triples : list
|
|
3555
|
+
List of ambiguous triples, only relevant for 'majority' and
|
|
3556
|
+
'conservative' rules, see paper for details.
|
|
3557
|
+
conflict_resolution : bool
|
|
3558
|
+
Whether to mark conflicts in orientation rules. Only for True
|
|
3559
|
+
this leads to an order-independent algorithm.
|
|
3560
|
+
|
|
3561
|
+
Returns
|
|
3562
|
+
-------
|
|
3563
|
+
graph : array of shape [N, N, tau_max+1]
|
|
3564
|
+
Resulting causal graph, see description above for interpretation.
|
|
3565
|
+
"""
|
|
3566
|
+
N = graph.shape[0]
|
|
3567
|
+
|
|
3568
|
+
def rule1(graph, oriented_links):
|
|
3569
|
+
"""Find (unambiguous) triples i_tau --> k_t o-o j_t with
|
|
3570
|
+
i_tau -/- j_t and orient as i_tau --> k_t --> j_t.
|
|
3571
|
+
"""
|
|
3572
|
+
triples = self._find_triples_rule1(graph)
|
|
3573
|
+
triples_left = False
|
|
3574
|
+
|
|
3575
|
+
for itaukj in triples:
|
|
3576
|
+
if itaukj not in ambiguous_triples:
|
|
3577
|
+
triples_left = True
|
|
3578
|
+
# Orient as i_tau --> k_t --> j_t
|
|
3579
|
+
(i, tau), k, j = itaukj
|
|
3580
|
+
if (j, k) not in oriented_links and (
|
|
3581
|
+
k, j) not in oriented_links:
|
|
3582
|
+
if self.verbosity > 1:
|
|
3583
|
+
print(
|
|
3584
|
+
" R1: Found (%s % d) --> %s o-o %s, "
|
|
3585
|
+
"orient as %s --> %s" % (
|
|
3586
|
+
self.var_names[i], tau, self.var_names[k],
|
|
3587
|
+
self.var_names[j],
|
|
3588
|
+
self.var_names[k], self.var_names[j]))
|
|
3589
|
+
# graph[j, k, 0] = 0
|
|
3590
|
+
graph[k, j, 0] = '-->'
|
|
3591
|
+
graph[j, k, 0] = '<--' # 0
|
|
3592
|
+
|
|
3593
|
+
oriented_links.append((k, j))
|
|
3594
|
+
|
|
3595
|
+
if conflict_resolution:
|
|
3596
|
+
if (j, k) in oriented_links:
|
|
3597
|
+
if self.verbosity > 1:
|
|
3598
|
+
print(
|
|
3599
|
+
" Conflict since %s <-- %s already"
|
|
3600
|
+
" oriented: Mark link as `2` in graph" % (
|
|
3601
|
+
self.var_names[k], self.var_names[j]))
|
|
3602
|
+
# graph[j, k, 0] = graph[k, j, 0] = 2
|
|
3603
|
+
graph[j, k, 0] = graph[k, j, 0] = 'x-x'
|
|
3604
|
+
|
|
3605
|
+
return triples_left, graph, oriented_links
|
|
3606
|
+
|
|
3607
|
+
def rule2(graph, oriented_links):
|
|
3608
|
+
"""Find (unambiguous) triples i_t --> k_t --> j_t with i_t o-o j_t
|
|
3609
|
+
and orient as i_t --> j_t.
|
|
3610
|
+
"""
|
|
3611
|
+
|
|
3612
|
+
triples = self._find_triples_rule2(graph)
|
|
3613
|
+
triples_left = False
|
|
3614
|
+
|
|
3615
|
+
for itaukj in triples:
|
|
3616
|
+
if itaukj not in ambiguous_triples:
|
|
3617
|
+
# TODO: CHeck whether this is actually needed
|
|
3618
|
+
# since ambiguous triples are always unshielded and here
|
|
3619
|
+
# we look for triples where i and j are connected
|
|
3620
|
+
triples_left = True
|
|
3621
|
+
# Orient as i_t --> j_t
|
|
3622
|
+
(i, tau), k, j = itaukj
|
|
3623
|
+
if (j, i) not in oriented_links and (
|
|
3624
|
+
i, j) not in oriented_links:
|
|
3625
|
+
if self.verbosity > 1:
|
|
3626
|
+
print(
|
|
3627
|
+
" R2: Found %s --> %s --> %s with %s "
|
|
3628
|
+
"o-o %s, orient as %s --> %s" % (
|
|
3629
|
+
self.var_names[i], self.var_names[k],
|
|
3630
|
+
self.var_names[j],
|
|
3631
|
+
self.var_names[i], self.var_names[j],
|
|
3632
|
+
self.var_names[i], self.var_names[j]))
|
|
3633
|
+
graph[i, j, 0] = '-->'
|
|
3634
|
+
graph[j, i, 0] = '<--' # 0
|
|
3635
|
+
|
|
3636
|
+
oriented_links.append((i, j))
|
|
3637
|
+
if conflict_resolution:
|
|
3638
|
+
if (j, i) in oriented_links:
|
|
3639
|
+
if self.verbosity > 1:
|
|
3640
|
+
print(
|
|
3641
|
+
" Conflict since %s <-- %s already "
|
|
3642
|
+
"oriented: Mark link as `2` in graph" % (
|
|
3643
|
+
self.var_names[i], self.var_names[j]))
|
|
3644
|
+
# graph[j, i, 0] = graph[i, j, 0] = 2
|
|
3645
|
+
graph[j, i, 0] = graph[i, j, 0] = 'x-x'
|
|
3646
|
+
|
|
3647
|
+
return triples_left, graph, oriented_links
|
|
3648
|
+
|
|
3649
|
+
def rule3(graph, oriented_links):
|
|
3650
|
+
"""Find (unambiguous) chains i_t o-o k_t --> j_t
|
|
3651
|
+
and i_t o-o l_t --> j_t with i_t o-o j_t
|
|
3652
|
+
and k_t -/- l_t: Orient as i_t --> j_t.
|
|
3653
|
+
"""
|
|
3654
|
+
# First find all chains i_t -- k_t --> j_t with i_t -- j_t
|
|
3655
|
+
# and k_t -/- l_t
|
|
3656
|
+
chains = self._find_chains_rule3(graph)
|
|
3657
|
+
|
|
3658
|
+
chains_left = False
|
|
3659
|
+
|
|
3660
|
+
for (itaukj, itaulj) in chains:
|
|
3661
|
+
if (itaukj not in ambiguous_triples and
|
|
3662
|
+
itaulj not in ambiguous_triples):
|
|
3663
|
+
# TODO: CHeck whether this is actually needed
|
|
3664
|
+
# since ambiguous triples are always unshielded and here
|
|
3665
|
+
# we look for triples where i and j are connected
|
|
3666
|
+
chains_left = True
|
|
3667
|
+
# Orient as i_t --> j_t
|
|
3668
|
+
(i, tau), k, j = itaukj
|
|
3669
|
+
_ , l, _ = itaulj
|
|
3670
|
+
|
|
3671
|
+
if (j, i) not in oriented_links and (
|
|
3672
|
+
i, j) not in oriented_links:
|
|
3673
|
+
if self.verbosity > 1:
|
|
3674
|
+
print(
|
|
3675
|
+
" R3: Found %s o-o %s --> %s and %s o-o "
|
|
3676
|
+
"%s --> %s with %s o-o %s and %s -/- %s, "
|
|
3677
|
+
"orient as %s --> %s" % (
|
|
3678
|
+
self.var_names[i], self.var_names[k],
|
|
3679
|
+
self.var_names[j], self.var_names[i],
|
|
3680
|
+
self.var_names[l], self.var_names[j],
|
|
3681
|
+
self.var_names[i], self.var_names[j],
|
|
3682
|
+
self.var_names[k], self.var_names[l],
|
|
3683
|
+
self.var_names[i], self.var_names[j]))
|
|
3684
|
+
graph[i, j, 0] = '-->'
|
|
3685
|
+
graph[j, i, 0] = '<--' # 0
|
|
3686
|
+
|
|
3687
|
+
oriented_links.append((i, j))
|
|
3688
|
+
if conflict_resolution:
|
|
3689
|
+
if (j, i) in oriented_links:
|
|
3690
|
+
if self.verbosity > 1:
|
|
3691
|
+
print(
|
|
3692
|
+
" Conflict since %s <-- %s already "
|
|
3693
|
+
"oriented: Mark link as `2` in graph" % (
|
|
3694
|
+
self.var_names[i], self.var_names[j]))
|
|
3695
|
+
graph[j, i, 0] = graph[i, j, 0] = 'x-x'
|
|
3696
|
+
|
|
3697
|
+
return chains_left, graph, oriented_links
|
|
3698
|
+
|
|
3699
|
+
if self.verbosity > 1:
|
|
3700
|
+
print("\n")
|
|
3701
|
+
print("----------------------------")
|
|
3702
|
+
print("Rule orientation phase")
|
|
3703
|
+
print("----------------------------")
|
|
3704
|
+
|
|
3705
|
+
oriented_links = []
|
|
3706
|
+
graph_new = np.copy(graph)
|
|
3707
|
+
any1 = any2 = any3 = True
|
|
3708
|
+
while (any1 or any2 or any3):
|
|
3709
|
+
if self.verbosity > 1:
|
|
3710
|
+
print("\nTry rule(s) %s" % (
|
|
3711
|
+
np.where(np.array([0, any1, any2, any3]))))
|
|
3712
|
+
any1, graph_new, oriented_links = rule1(graph_new, oriented_links)
|
|
3713
|
+
any2, graph_new, oriented_links = rule2(graph_new, oriented_links)
|
|
3714
|
+
any3, graph_new, oriented_links = rule3(graph_new, oriented_links)
|
|
3715
|
+
|
|
3716
|
+
if self.verbosity > 1:
|
|
3717
|
+
adjt = self._get_adj_time_series(graph_new)
|
|
3718
|
+
print("\nUpdated adjacencies:")
|
|
3719
|
+
self._print_parents(all_parents=adjt, val_min=None, pval_max=None)
|
|
3720
|
+
|
|
3721
|
+
return graph_new
|
|
3722
|
+
|
|
3723
|
+
def _optimize_pcmciplus_alpha(self,
|
|
3724
|
+
link_assumptions,
|
|
3725
|
+
tau_min,
|
|
3726
|
+
tau_max,
|
|
3727
|
+
pc_alpha,
|
|
3728
|
+
contemp_collider_rule,
|
|
3729
|
+
conflict_resolution,
|
|
3730
|
+
reset_lagged_links,
|
|
3731
|
+
max_conds_dim,
|
|
3732
|
+
max_combinations,
|
|
3733
|
+
max_conds_py,
|
|
3734
|
+
max_conds_px,
|
|
3735
|
+
max_conds_px_lagged,
|
|
3736
|
+
fdr_method,
|
|
3737
|
+
):
|
|
3738
|
+
"""Optimizes pc_alpha in PCMCIplus.
|
|
3739
|
+
|
|
3740
|
+
If a list or None is passed for ``pc_alpha``, the significance level is
|
|
3741
|
+
optimized for every graph across the given ``pc_alpha`` values using the
|
|
3742
|
+
score computed in ``cond_ind_test.get_model_selection_criterion()``
|
|
3743
|
+
|
|
3744
|
+
Parameters
|
|
3745
|
+
----------
|
|
3746
|
+
See those for run_pcmciplus()
|
|
3747
|
+
|
|
3748
|
+
Returns
|
|
3749
|
+
-------
|
|
3750
|
+
Results for run_pcmciplus() for the optimal pc_alpha.
|
|
3751
|
+
"""
|
|
3752
|
+
|
|
3753
|
+
if pc_alpha is None:
|
|
3754
|
+
pc_alpha_list = [0.001, 0.005, 0.01, 0.025, 0.05]
|
|
3755
|
+
else:
|
|
3756
|
+
pc_alpha_list = pc_alpha
|
|
3757
|
+
|
|
3758
|
+
if self.verbosity > 0:
|
|
3759
|
+
print("\n##\n## Optimizing pc_alpha over " +
|
|
3760
|
+
"pc_alpha_list = %s" % str(pc_alpha_list) +
|
|
3761
|
+
"\n##")
|
|
3762
|
+
|
|
3763
|
+
results = {}
|
|
3764
|
+
score = np.zeros_like(pc_alpha_list)
|
|
3765
|
+
for iscore, pc_alpha_here in enumerate(pc_alpha_list):
|
|
3766
|
+
# Print statement about the pc_alpha being tested
|
|
3767
|
+
if self.verbosity > 0:
|
|
3768
|
+
print("\n## pc_alpha = %s (%d/%d):" % (pc_alpha_here,
|
|
3769
|
+
iscore + 1,
|
|
3770
|
+
score.shape[0]))
|
|
3771
|
+
# Get the results for this alpha value
|
|
3772
|
+
results[pc_alpha_here] = \
|
|
3773
|
+
self.run_pcmciplus(link_assumptions=link_assumptions,
|
|
3774
|
+
tau_min=tau_min,
|
|
3775
|
+
tau_max=tau_max,
|
|
3776
|
+
pc_alpha=pc_alpha_here,
|
|
3777
|
+
contemp_collider_rule=contemp_collider_rule,
|
|
3778
|
+
conflict_resolution=conflict_resolution,
|
|
3779
|
+
reset_lagged_links=reset_lagged_links,
|
|
3780
|
+
max_conds_dim=max_conds_dim,
|
|
3781
|
+
max_combinations=max_combinations,
|
|
3782
|
+
max_conds_py=max_conds_py,
|
|
3783
|
+
max_conds_px=max_conds_px,
|
|
3784
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
3785
|
+
fdr_method=fdr_method)
|
|
3786
|
+
|
|
3787
|
+
# Get one member of the Markov equivalence class of the result
|
|
3788
|
+
# of PCMCIplus, which is a CPDAG
|
|
3789
|
+
|
|
3790
|
+
# First create order that is based on some feature of the variables
|
|
3791
|
+
# to avoid order-dependence of DAG, i.e., it should not matter
|
|
3792
|
+
# in which order the variables appear in dataframe
|
|
3793
|
+
# Here we use the sum of absolute val_matrix values incident at j
|
|
3794
|
+
val_matrix = results[pc_alpha_here]['val_matrix']
|
|
3795
|
+
variable_order = np.argsort(
|
|
3796
|
+
np.abs(val_matrix).sum(axis=(0,2)))[::-1]
|
|
3797
|
+
|
|
3798
|
+
dag = self._get_dag_from_cpdag(
|
|
3799
|
+
cpdag_graph=results[pc_alpha_here]['graph'],
|
|
3800
|
+
variable_order=variable_order)
|
|
3801
|
+
|
|
3802
|
+
|
|
3803
|
+
# Compute the best average score when the model selection
|
|
3804
|
+
# is applied to all N variables
|
|
3805
|
+
for j in range(self.N):
|
|
3806
|
+
parents = []
|
|
3807
|
+
for i, tau in zip(*np.where(dag[:,j,:] == "-->")):
|
|
3808
|
+
parents.append((i, -tau))
|
|
3809
|
+
score_j = self.cond_ind_test.get_model_selection_criterion(
|
|
3810
|
+
j, parents, tau_max)
|
|
3811
|
+
score[iscore] += score_j
|
|
3812
|
+
score[iscore] /= float(self.N)
|
|
3813
|
+
|
|
3814
|
+
# Record the optimal alpha value
|
|
3815
|
+
optimal_alpha = pc_alpha_list[score.argmin()]
|
|
3816
|
+
|
|
3817
|
+
if self.verbosity > 0:
|
|
3818
|
+
print("\n##"+
|
|
3819
|
+
"\n\n## Scores for individual pc_alpha values:\n")
|
|
3820
|
+
for iscore, pc_alpha in enumerate(pc_alpha_list):
|
|
3821
|
+
print(" pc_alpha = %7s yields score = %.5f" % (pc_alpha,
|
|
3822
|
+
score[iscore]))
|
|
3823
|
+
print("\n##\n## Results for optimal " +
|
|
3824
|
+
"pc_alpha = %s\n##" % optimal_alpha)
|
|
3825
|
+
self.print_results(results[optimal_alpha], alpha_level=optimal_alpha)
|
|
3826
|
+
|
|
3827
|
+
optimal_results = results[optimal_alpha]
|
|
3828
|
+
optimal_results['optimal_alpha'] = optimal_alpha
|
|
3829
|
+
return optimal_results
|
|
3830
|
+
|
|
3831
|
+
|
|
3832
|
+
if __name__ == '__main__':
|
|
3833
|
+
from tigramite.independence_tests.parcorr import ParCorr
|
|
3834
|
+
from tigramite.independence_tests.regressionCI import RegressionCI
|
|
3835
|
+
# from tigramite.independence_tests.cmiknn import CMIknn
|
|
3836
|
+
|
|
3837
|
+
import tigramite.data_processing as pp
|
|
3838
|
+
from tigramite.toymodels import structural_causal_processes as toys
|
|
3839
|
+
import tigramite.plotting as tp
|
|
3840
|
+
from matplotlib import pyplot as plt
|
|
3841
|
+
|
|
3842
|
+
# random_state = np.random.default_rng(seed=43)
|
|
3843
|
+
# # Example process to play around with
|
|
3844
|
+
# # Each key refers to a variable and the incoming links are supplied
|
|
3845
|
+
# # as a list of format [((var, -lag), coeff, function), ...]
|
|
3846
|
+
# def lin_f(x): return x
|
|
3847
|
+
# def nonlin_f(x): return (x + 5. * x ** 2 * np.exp(-x ** 2 / 20.))
|
|
3848
|
+
|
|
3849
|
+
# T = 1000
|
|
3850
|
+
# data = random_state.standard_normal((T, 4))
|
|
3851
|
+
# # Simple sun
|
|
3852
|
+
# data[:,3] = random_state.standard_normal((T)) # np.sin(np.arange(T)*20/np.pi) + 0.1*random_state.standard_normal((T))
|
|
3853
|
+
# c = 0.8
|
|
3854
|
+
# for t in range(1, T):
|
|
3855
|
+
# data[t, 0] += 0.4*data[t-1, 0] + 0.4*data[t-1, 1] + c*data[t-1,3]
|
|
3856
|
+
# data[t, 1] += 0.5*data[t-1, 1] + c*data[t,3]
|
|
3857
|
+
# data[t, 2] += 0.6*data[t-1, 2] + 0.3*data[t-2, 1] #+ c*data[t-1,3]
|
|
3858
|
+
# dataframe = pp.DataFrame(data, var_names=[r'$X^0$', r'$X^1$', r'$X^2$', 'Sun'])
|
|
3859
|
+
# # tp.plot_timeseries(dataframe); plt.show()
|
|
3860
|
+
|
|
3861
|
+
# ci_test = CMIknn(significance="fixed_thres", verbosity=3) #
|
|
3862
|
+
# ci_test = ParCorr() #significance="fixed_thres") #
|
|
3863
|
+
# dataframe_nosun = pp.DataFrame(data[:,[0,1,2]], var_names=[r'$X^0$', r'$X^1$', r'$X^2$'])
|
|
3864
|
+
# pcmci_parcorr = PCMCI(
|
|
3865
|
+
# dataframe=dataframe_nosun,
|
|
3866
|
+
# cond_ind_test=parcorr,
|
|
3867
|
+
# verbosity=0)
|
|
3868
|
+
# tau_max = 1 #2
|
|
3869
|
+
# results = pcmci_parcorr.run_pcmci(tau_max=tau_max, pc_alpha=0.2, alpha_level = 0.01)
|
|
3870
|
+
# Remove parents of variable 3
|
|
3871
|
+
# Only estimate parents of variables 0, 1, 2
|
|
3872
|
+
# link_assumptions = None #{}
|
|
3873
|
+
# for j in range(4):
|
|
3874
|
+
# if j in [0, 1, 2]:
|
|
3875
|
+
# # Directed lagged links
|
|
3876
|
+
# link_assumptions[j] = {(var, -lag): '-?>' for var in [0, 1, 2]
|
|
3877
|
+
# for lag in range(1, tau_max + 1)}
|
|
3878
|
+
# # Unoriented contemporaneous links
|
|
3879
|
+
# link_assumptions[j].update({(var, 0): 'o?o' for var in [0, 1, 2] if var != j})
|
|
3880
|
+
# # Directed lagged and contemporaneous links from the sun (3)
|
|
3881
|
+
# link_assumptions[j].update({(var, -lag): '-?>' for var in [3]
|
|
3882
|
+
# for lag in range(0, tau_max + 1)})
|
|
3883
|
+
# else:
|
|
3884
|
+
# link_assumptions[j] = {}
|
|
3885
|
+
|
|
3886
|
+
# for j in link_assumptions:
|
|
3887
|
+
# print(link_assumptions[j])
|
|
3888
|
+
# pcmci_parcorr = PCMCI(
|
|
3889
|
+
# dataframe=dataframe,
|
|
3890
|
+
# cond_ind_test=ci_test,
|
|
3891
|
+
# verbosity=1)
|
|
3892
|
+
# results = pcmci_parcorr.run_pcmciplus(tau_max=tau_max,
|
|
3893
|
+
# pc_alpha=[0.001, 0.01, 0.05, 0.8],
|
|
3894
|
+
# reset_lagged_links=False,
|
|
3895
|
+
# link_assumptions=link_assumptions
|
|
3896
|
+
# ) #, alpha_level = 0.01)
|
|
3897
|
+
# print(results['graph'].shape)
|
|
3898
|
+
# # print(results['graph'][:,3,:])
|
|
3899
|
+
# print(np.round(results['p_matrix'][:,:,0], 2))
|
|
3900
|
+
# print(np.round(results['val_matrix'][:,:,0], 2))
|
|
3901
|
+
# print(results['graph'][:,:,0])
|
|
3902
|
+
|
|
3903
|
+
# Plot time series graph
|
|
3904
|
+
# tp.plot_graph(
|
|
3905
|
+
# val_matrix=results['val_matrix'],
|
|
3906
|
+
# graph=results['graph'],
|
|
3907
|
+
# var_names=[r'$X^0$', r'$X^1$', r'$X^2$', 'Sun'],
|
|
3908
|
+
# link_colorbar_label='MCI',
|
|
3909
|
+
# ); plt.show()
|
|
3910
|
+
|
|
3911
|
+
# links_coeffs = {0: [((0, -1), 0.7, lin_f)],
|
|
3912
|
+
# 1: [((1, -1), 0.7, lin_f), ((0, 0), 0.2, lin_f), ((2, -2), 0.2, lin_f)],
|
|
3913
|
+
# 2: [((2, -1), 0.3, lin_f)],
|
|
3914
|
+
# }
|
|
3915
|
+
# T = 100 # time series length
|
|
3916
|
+
# data, _ = toys.structural_causal_process(links_coeffs, T=T, seed=3)
|
|
3917
|
+
# T, N = data.shape
|
|
3918
|
+
|
|
3919
|
+
|
|
3920
|
+
multidata = np.random.randn(10, 100, 5)
|
|
3921
|
+
data_type = np.zeros((10, 100, 5), dtype='bool')
|
|
3922
|
+
data_type[:,:,:3] = True
|
|
3923
|
+
|
|
3924
|
+
dataframe = pp.DataFrame(multidata,
|
|
3925
|
+
data_type=data_type,
|
|
3926
|
+
analysis_mode='multiple',
|
|
3927
|
+
missing_flag = 999.,
|
|
3928
|
+
# time_offsets = {0:50, 1:0}
|
|
3929
|
+
# reference_points=list(range(500, 1000))
|
|
3930
|
+
)
|
|
3931
|
+
|
|
3932
|
+
pcmci = PCMCI(dataframe=dataframe,
|
|
3933
|
+
cond_ind_test=RegressionCI(verbosity=0), verbosity=0)
|
|
3934
|
+
|
|
3935
|
+
# results = pcmci.run_pcmciplus(tau_max=1)
|