tigramite-fast 5.2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tigramite/__init__.py +0 -0
- tigramite/causal_effects.py +1525 -0
- tigramite/causal_mediation.py +1592 -0
- tigramite/data_processing.py +1574 -0
- tigramite/graphs.py +1509 -0
- tigramite/independence_tests/LBFGS.py +1114 -0
- tigramite/independence_tests/__init__.py +0 -0
- tigramite/independence_tests/cmiknn.py +661 -0
- tigramite/independence_tests/cmiknn_mixed.py +1397 -0
- tigramite/independence_tests/cmisymb.py +286 -0
- tigramite/independence_tests/gpdc.py +664 -0
- tigramite/independence_tests/gpdc_torch.py +820 -0
- tigramite/independence_tests/gsquared.py +190 -0
- tigramite/independence_tests/independence_tests_base.py +1310 -0
- tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
- tigramite/independence_tests/pairwise_CI.py +383 -0
- tigramite/independence_tests/parcorr.py +369 -0
- tigramite/independence_tests/parcorr_mult.py +485 -0
- tigramite/independence_tests/parcorr_wls.py +451 -0
- tigramite/independence_tests/regressionCI.py +403 -0
- tigramite/independence_tests/robust_parcorr.py +403 -0
- tigramite/jpcmciplus.py +966 -0
- tigramite/lpcmci.py +3649 -0
- tigramite/models.py +2257 -0
- tigramite/pcmci.py +3935 -0
- tigramite/pcmci_base.py +1218 -0
- tigramite/plotting.py +4735 -0
- tigramite/rpcmci.py +467 -0
- tigramite/toymodels/__init__.py +0 -0
- tigramite/toymodels/context_model.py +261 -0
- tigramite/toymodels/non_additive.py +1231 -0
- tigramite/toymodels/structural_causal_processes.py +1201 -0
- tigramite/toymodels/surrogate_generator.py +319 -0
- tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
- tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
- tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
- tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
- tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
tigramite/jpcmciplus.py
ADDED
|
@@ -0,0 +1,966 @@
|
|
|
1
|
+
"""Tigramite causal discovery for time series."""
|
|
2
|
+
|
|
3
|
+
# Authors: Wiebke Günther <wiebke.guenther@dlr.de>, Urmi Ninad, Jakob Runge <jakob@jakob-runge.com>
|
|
4
|
+
#
|
|
5
|
+
# License: GNU General Public License v3.0
|
|
6
|
+
|
|
7
|
+
from __future__ import print_function
|
|
8
|
+
import numpy as np
|
|
9
|
+
from tigramite.pcmci import PCMCI
|
|
10
|
+
from copy import deepcopy
|
|
11
|
+
import itertools
|
|
12
|
+
|
|
13
|
+
from tigramite.toymodels.context_model import _group_links
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class JPCMCIplus(PCMCI):
|
|
17
|
+
r"""J-PCMCIplus causal discovery for time series datasets from multiple contexts.
|
|
18
|
+
|
|
19
|
+
This class is based on the PCMCI framework as described in
|
|
20
|
+
[i]. JPCMCIplus enables causal discovery for time series data from
|
|
21
|
+
different contexts, i.e. datasets, where some of the variables
|
|
22
|
+
describing the context might be unobserved. The method is described
|
|
23
|
+
in detail in [ii]. See the tutorial for guidance in applying the
|
|
24
|
+
method.
|
|
25
|
+
|
|
26
|
+
References
|
|
27
|
+
----------
|
|
28
|
+
.. [i] J. Runge, P. Nowack, M. Kretschmer, S. Flaxman, D. Sejdinovic,
|
|
29
|
+
Detecting and quantifying causal associations in large nonlinear
|
|
30
|
+
time series datasets. Sci. Adv. 5, eaau4996
|
|
31
|
+
(2019) https://advances.sciencemag.org/content/5/11/eaau4996
|
|
32
|
+
|
|
33
|
+
.. [ii] W. Günther, U. Ninad, J. Runge, Causal discovery for time
|
|
34
|
+
series from multiple datasets with latent contexts. UAI 2023
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
node_classification : dictionary
|
|
39
|
+
Classification of nodes into system, context, or dummy nodes.
|
|
40
|
+
Keys of the dictionary are from {0, ..., N-1} where N is the number of nodes.
|
|
41
|
+
Options for the values are "system", "time_context", "space_context", "time_dummy", or "space_dummy".
|
|
42
|
+
|
|
43
|
+
Attributes
|
|
44
|
+
----------
|
|
45
|
+
all_parents : dictionary
|
|
46
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
47
|
+
the conditioning-parents estimated with PC algorithm.
|
|
48
|
+
val_min : dictionary
|
|
49
|
+
Dictionary of form val_min[j][(i, -tau)] = float
|
|
50
|
+
containing the minimum test statistic value for each link estimated in
|
|
51
|
+
the PC algorithm.
|
|
52
|
+
pval_max : dictionary
|
|
53
|
+
Dictionary of form pval_max[j][(i, -tau)] = float containing the maximum
|
|
54
|
+
p-value for each link estimated in the PC algorithm.
|
|
55
|
+
iterations : dictionary
|
|
56
|
+
Dictionary containing further information on algorithm steps.
|
|
57
|
+
N : int
|
|
58
|
+
Number of variables.
|
|
59
|
+
T : dict
|
|
60
|
+
Time series sample length of dataset(s).
|
|
61
|
+
dummy_parents : dictionary or None
|
|
62
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
63
|
+
the dependence of the system nodes on the dummy nodes.
|
|
64
|
+
observed_context_parents : dictionary or None
|
|
65
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
66
|
+
the dependence of the system nodes on the observed context nodes.
|
|
67
|
+
dummy_ci_test : conditional independence test object
|
|
68
|
+
Conditional independence test used to test dependence between system nodes and dummy nodes.
|
|
69
|
+
Currently, ParCorr is used with one-hot encoded dummies.
|
|
70
|
+
mode : "system_search" or "context_search" or "dummy_search" (default: "system_search")
|
|
71
|
+
time_context_nodes : list
|
|
72
|
+
List with entries from {0, ..., N-1} where N is the number of nodes.
|
|
73
|
+
This is the list of the temporal context nodes which are assumed to be constant over the different datasets.
|
|
74
|
+
space_context_nodes :
|
|
75
|
+
List with entries from {0, ..., N-1} where N is the number of nodes.
|
|
76
|
+
This is the list of the spatial context nodes which are assumed to be constant over time.
|
|
77
|
+
time_dummy : int or None (default: None)
|
|
78
|
+
Node corresponding to the temporal dummy variable.
|
|
79
|
+
space_dummy : int or None (default: None)
|
|
80
|
+
Node corresponding to the spatial dummy variable.
|
|
81
|
+
system_nodes : list
|
|
82
|
+
List with entries from {0, ..., N-1} where N is the number of nodes.
|
|
83
|
+
This is the list of the system nodes.
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
def __init__(self, node_classification, **kwargs):
|
|
87
|
+
# Init base class
|
|
88
|
+
PCMCI.__init__(self, **kwargs)
|
|
89
|
+
|
|
90
|
+
self.system_nodes = self.group_nodes(node_classification, "system")
|
|
91
|
+
self.time_context_nodes = self.group_nodes(node_classification, "time_context")
|
|
92
|
+
self.space_context_nodes = self.group_nodes(node_classification, "space_context")
|
|
93
|
+
self.time_dummy = self.group_nodes(node_classification, "time_dummy")
|
|
94
|
+
self.space_dummy = self.group_nodes(node_classification, "space_dummy")
|
|
95
|
+
|
|
96
|
+
self.dummy_parents = {i: [] for i in range(self.N)}
|
|
97
|
+
self.observed_context_parents = {i: [] for i in range(self.N)}
|
|
98
|
+
self.mode = "system_search"
|
|
99
|
+
|
|
100
|
+
def group_nodes(self, node_types, node_type):
|
|
101
|
+
nodes = range(self.N)
|
|
102
|
+
return [node for node in nodes if node_types[node] == node_type]
|
|
103
|
+
|
|
104
|
+
def run_jpcmciplus(self,
|
|
105
|
+
contemp_collider_rule='majority',
|
|
106
|
+
link_assumptions=None,
|
|
107
|
+
tau_min=0,
|
|
108
|
+
tau_max=1,
|
|
109
|
+
pc_alpha=0.01,
|
|
110
|
+
conflict_resolution=True,
|
|
111
|
+
reset_lagged_links=False,
|
|
112
|
+
max_conds_dim=None,
|
|
113
|
+
max_combinations=1,
|
|
114
|
+
max_conds_py=None,
|
|
115
|
+
max_conds_px=None,
|
|
116
|
+
max_conds_px_lagged=None,
|
|
117
|
+
fdr_method='none'):
|
|
118
|
+
"""Runs JPCMCIplus time-lagged and contemporaneous causal discovery for time series from multiple contexts.
|
|
119
|
+
|
|
120
|
+
Method described in: W. Günther, U. Ninad, J. Runge, Causal discovery
|
|
121
|
+
for time series from multiple datasets with latent contexts. UAI
|
|
122
|
+
2023
|
|
123
|
+
|
|
124
|
+
Notes
|
|
125
|
+
-----
|
|
126
|
+
The JPCMCIplus causal discovery method is described in [ii], where
|
|
127
|
+
also analytical and numerical results are presented. JPCMCIplus can identify the joint causal graph
|
|
128
|
+
over multiple datasets containing time series data from different contexts under the standard assumptions
|
|
129
|
+
of Causal Sufficiency, Faithfulness and the Markov condition, as well as some background knowledge assumptions.
|
|
130
|
+
JPCMCIplus estimates time-lagged and contemporaneous causal links from context to system
|
|
131
|
+
variables and in between system variables by a four-step procedure:
|
|
132
|
+
|
|
133
|
+
1. **Discovery of supersets of the lagged parents of the system and observed temporal context nodes** by
|
|
134
|
+
running the :math:`PC_1` lagged phase on this subset of nodes to obtain :math:`\\hat{\\mathcal{B}}^-_t(X_t^j)`.
|
|
135
|
+
|
|
136
|
+
2. Next, the **MCI test is run on pairs of system and context nodes conditional on subsets of system
|
|
137
|
+
and context**, i.e. perform MCI tests for pairs :math:`((C^j_{t-\\tau}, X^i_t))_{\\tau > 0}`,
|
|
138
|
+
:math:`(C_t^j, X_t^i)`, :math:`(X_t^i, C_t^j)` for all :math:`i,j`,
|
|
139
|
+
|
|
140
|
+
.. math:: C_{t-\\tau}^i \\perp X_t^j | \\mathbf{S}, \\hat{\\mathcal{B}}^-_t(X_t^j)
|
|
141
|
+
\\setminus \\{ C_{t-\\tau}^i \\}, \\hat{\\mathcal{B}}^-_{t-\\tau}(C_{t-\\tau}^i)
|
|
142
|
+
|
|
143
|
+
with :math:`\\mathbf{S}` being a subset of the contemporaneous adjacencies :math:`\\mathcal{A}_t(X_t^j)` and
|
|
144
|
+
:math:`\\hat{\\mathcal{B}}^-_t(X_t^j)` are the lagged adjacencies from step one. If :math:`C` is a
|
|
145
|
+
spatial context variable, we only have to test the contemporaneous pairs
|
|
146
|
+
:math:`(C_t^j, X_t^i)`, :math:`(X_t^i, C_t^j)` for all :math:`i,j`.
|
|
147
|
+
If :math:`C_t^j` and :math:`X_t^i` are conditionally independent, all lagged links between :math:`C_t^j` and
|
|
148
|
+
:math:`X^j_{t-\\tau}` are also removed for all :math:`\\tau`.
|
|
149
|
+
|
|
150
|
+
3. **Perform MCI tests on all system-dummy pairs conditional on the superset of lagged links, the discovered
|
|
151
|
+
contemporaneous context adjacencies, as well as on subsets of contemporaneous system links**, i.e. test
|
|
152
|
+
for :math:`(D, X_t^i)`, :math:`(X_t^i, D)` for all :math:`i`, i.e.
|
|
153
|
+
|
|
154
|
+
.. math:: D \\perp X_t^j | \\mathbf{S}, \\hat{\\mathcal{B}}^C_t(X_t^j),
|
|
155
|
+
|
|
156
|
+
where :math:`\\mathbf{S} \\subset \\mathcal{A}_t(X_t^i)` and :math:`\\hat{\\mathcal{B}}^C_t(X_t^j)`
|
|
157
|
+
are the lagged and contextual adjacencies found in the previous step.
|
|
158
|
+
If :math:`D` and :math:`X_t^j` are found to be conditionally independence, links between :math:`D` and
|
|
159
|
+
:math:`X^j_{t-\\tau}` are removed for all :math:`\\tau`.
|
|
160
|
+
By assumption context node is the parent in all system-context links.
|
|
161
|
+
|
|
162
|
+
4. Finally, we **perform MCI tests on all system pairs conditional on discovered lagged, context and dummy
|
|
163
|
+
adjacencies, as well as on subsets of contemporaneous system links** and **orientation phase**. In more detail,
|
|
164
|
+
we perform MCI test for pairs :math:`((X^j_{t-\\tau}, X_t^i))_{\\tau > 0}`, :math:`(X_t^i, X_t^j)` for all
|
|
165
|
+
:math:`i, j`, i.e.
|
|
166
|
+
|
|
167
|
+
.. math:: X^i_{t-\\tau} \\perp X_t^j | \\mathbf{S}, \\hat{\\mathcal{B}}^{CD}_t(X_t^j)
|
|
168
|
+
\\setminus \\{ X_{t-\\tau}^i \\},\\hat{\\mathcal{B}}^{CD}_t(X_{t-\\tau}^i)
|
|
169
|
+
|
|
170
|
+
where :math:`\\mathbf{S} \\subset \\mathcal{A}_t(X_t^i)` and :math:`\\hat{\\mathcal{B}}^{CD}_t(X_t^j)`
|
|
171
|
+
are the lagged, contextual, and dummy adjacencies found in the previous steps.
|
|
172
|
+
Finally, all remaining edges (without expert knowledge) are oriented using the PCMCI+ orientation phase while
|
|
173
|
+
making use of all triples involving one context or dummy variable and two system variables as in the non-time
|
|
174
|
+
series case.
|
|
175
|
+
|
|
176
|
+
JPCMCIplus can be flexibly combined with any kind of conditional
|
|
177
|
+
independence test statistic adapted to the kind of data (continuous
|
|
178
|
+
or discrete) and its assumed dependency types. These are available in
|
|
179
|
+
``tigramite.independence_tests``.
|
|
180
|
+
See PCMCIplus for a description of the parameters of JPCMCIplus. Also, guidance on best practices for
|
|
181
|
+
setting these parameters is given there.
|
|
182
|
+
|
|
183
|
+
Parameters
|
|
184
|
+
----------
|
|
185
|
+
link_assumptions : dict
|
|
186
|
+
Dictionary of form {j:{(i, -tau): link_type, ...}, ...} specifying
|
|
187
|
+
assumptions about links. This initializes the graph with entries
|
|
188
|
+
graph[i,j,tau] = link_type. For example, graph[i,j,0] = '-->'
|
|
189
|
+
implies that a directed link from i to j at lag 0 must exist.
|
|
190
|
+
Valid link types are 'o-o', '-->', '<--'. In addition, the middle
|
|
191
|
+
mark can be '?' instead of '-'. Then '-?>' implies that this link
|
|
192
|
+
may not exist, but if it exists, its orientation is '-->'. Link
|
|
193
|
+
assumptions need to be consistent, i.e., graph[i,j,0] = '-->'
|
|
194
|
+
requires graph[j,i,0] = '<--' and acyclicity must hold. If a link
|
|
195
|
+
does not appear in the dictionary, it is assumed absent. That is,
|
|
196
|
+
if link_assumptions is not None, then all links have to be specified
|
|
197
|
+
or the links are assumed absent.
|
|
198
|
+
tau_min : int, optional (default: 0)
|
|
199
|
+
Minimum time lag to test.
|
|
200
|
+
tau_max : int, optional (default: 1)
|
|
201
|
+
Maximum time lag. Must be larger or equal to tau_min.
|
|
202
|
+
pc_alpha : float or list of floats, default: 0.01
|
|
203
|
+
Significance level in algorithm. If a list or None is passed, the
|
|
204
|
+
pc_alpha level is optimized for every graph across the given
|
|
205
|
+
pc_alpha values ([0.001, 0.005, 0.01, 0.025, 0.05] for None) using
|
|
206
|
+
the score computed in cond_ind_test.get_model_selection_criterion().
|
|
207
|
+
contemp_collider_rule : {'majority', 'conservative', 'none'}
|
|
208
|
+
Rule for collider phase to use. See the paper for details. Only
|
|
209
|
+
'majority' and 'conservative' lead to an order-independent
|
|
210
|
+
algorithm.
|
|
211
|
+
conflict_resolution : bool, optional (default: True)
|
|
212
|
+
Whether to mark conflicts in orientation rules. Only for True
|
|
213
|
+
this leads to an order-independent algorithm.
|
|
214
|
+
reset_lagged_links : bool, optional (default: False)
|
|
215
|
+
Restricts the detection of lagged causal links in Step 2 to the
|
|
216
|
+
significant adjacencies found in the PC1 algorithm in Step 1. For
|
|
217
|
+
True, *all* lagged links are considered again, which improves
|
|
218
|
+
detection power for lagged links, but also leads to larger
|
|
219
|
+
runtimes.
|
|
220
|
+
max_conds_dim : int, optional (default: None)
|
|
221
|
+
Maximum number of conditions to test. If None is passed, this number
|
|
222
|
+
is unrestricted.
|
|
223
|
+
max_combinations : int, optional (default: 1)
|
|
224
|
+
Maximum number of combinations of conditions of current cardinality
|
|
225
|
+
to test. Defaults to 1 for PC_1 algorithm. For original PC algorithm
|
|
226
|
+
a larger number, such as 10, can be used.
|
|
227
|
+
max_conds_py : int, optional (default: None)
|
|
228
|
+
Maximum number of lagged conditions of Y to use in MCI tests. If
|
|
229
|
+
None is passed, this number is unrestricted.
|
|
230
|
+
max_conds_px : int, optional (default: None)
|
|
231
|
+
Maximum number of lagged conditions of X to use in MCI tests. If
|
|
232
|
+
None is passed, this number is unrestricted.
|
|
233
|
+
max_conds_px_lagged : int, optional (default: None)
|
|
234
|
+
Maximum number of lagged conditions of X when X is lagged in MCI
|
|
235
|
+
tests. If None is passed, this number is equal to max_conds_px.
|
|
236
|
+
fdr_method : str, optional (default: 'none')
|
|
237
|
+
Correction method, default is Benjamini-Hochberg False Discovery
|
|
238
|
+
Rate method.
|
|
239
|
+
Returns
|
|
240
|
+
-------
|
|
241
|
+
graph : array of shape [N, N, tau_max+1]
|
|
242
|
+
Resulting causal graph, see description above for interpretation.
|
|
243
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
244
|
+
Estimated matrix of test statistic values regarding adjacencies.
|
|
245
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
246
|
+
Estimated matrix of p-values regarding adjacencies.
|
|
247
|
+
sepset : dictionary
|
|
248
|
+
Separating sets. See paper for details.
|
|
249
|
+
ambiguous_triples : list
|
|
250
|
+
List of ambiguous triples, only relevant for 'majority' and
|
|
251
|
+
'conservative' rules, see paper for details.
|
|
252
|
+
"""
|
|
253
|
+
observed_context_nodes = self.time_context_nodes + self.space_context_nodes
|
|
254
|
+
|
|
255
|
+
# initialize / clean link_assumptions
|
|
256
|
+
if link_assumptions is not None:
|
|
257
|
+
_link_assumptions = deepcopy(link_assumptions)
|
|
258
|
+
else:
|
|
259
|
+
_link_assumptions = self._set_link_assumptions(link_assumptions, tau_min, tau_max,
|
|
260
|
+
remove_contemp=False)
|
|
261
|
+
# {j: {(i, -tau): ("-?>" if tau > 0 else "o?o") for i in range(self.N)
|
|
262
|
+
# for tau in range(tau_max + 1)} for j in range(self.N)}
|
|
263
|
+
|
|
264
|
+
_link_assumptions = self.assume_exogenous_context(_link_assumptions, observed_context_nodes)
|
|
265
|
+
_link_assumptions = self.clean_link_assumptions(_link_assumptions, tau_max)
|
|
266
|
+
|
|
267
|
+
# for j in _link_assumptions:
|
|
268
|
+
# print(j, _link_assumptions[j])
|
|
269
|
+
# self._set_link_assumptions(_link_assumptions, tau_min, tau_max,
|
|
270
|
+
# remove_contemp=False)
|
|
271
|
+
|
|
272
|
+
# Check if pc_alpha is chosen to optimize over a list
|
|
273
|
+
if pc_alpha is None or isinstance(pc_alpha, (list, tuple, np.ndarray)):
|
|
274
|
+
# Call optimizer wrapper around run_pcmciplus()
|
|
275
|
+
return self._optimize_pcmciplus_alpha(
|
|
276
|
+
link_assumptions=link_assumptions,
|
|
277
|
+
tau_min=tau_min,
|
|
278
|
+
tau_max=tau_max,
|
|
279
|
+
pc_alpha=pc_alpha,
|
|
280
|
+
contemp_collider_rule=contemp_collider_rule,
|
|
281
|
+
conflict_resolution=conflict_resolution,
|
|
282
|
+
reset_lagged_links=reset_lagged_links,
|
|
283
|
+
max_conds_dim=max_conds_dim,
|
|
284
|
+
max_combinations=max_combinations,
|
|
285
|
+
max_conds_py=max_conds_py,
|
|
286
|
+
max_conds_px=max_conds_px,
|
|
287
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
288
|
+
fdr_method=fdr_method)
|
|
289
|
+
|
|
290
|
+
elif pc_alpha < 0. or pc_alpha > 1:
|
|
291
|
+
raise ValueError("Choose 0 <= pc_alpha <= 1")
|
|
292
|
+
|
|
293
|
+
# Check the limits on tau
|
|
294
|
+
self._check_tau_limits(tau_min, tau_max)
|
|
295
|
+
|
|
296
|
+
# Step 0 and 1:
|
|
297
|
+
context_results = self.discover_lagged_context_system_links(
|
|
298
|
+
_link_assumptions,
|
|
299
|
+
tau_min=tau_min,
|
|
300
|
+
tau_max=tau_max,
|
|
301
|
+
pc_alpha=pc_alpha,
|
|
302
|
+
reset_lagged_links=reset_lagged_links,
|
|
303
|
+
max_conds_dim=max_conds_dim,
|
|
304
|
+
max_combinations=max_combinations,
|
|
305
|
+
max_conds_py=max_conds_py,
|
|
306
|
+
max_conds_px=max_conds_px,
|
|
307
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
308
|
+
fdr_method=fdr_method
|
|
309
|
+
)
|
|
310
|
+
ctxt_res = deepcopy(context_results)
|
|
311
|
+
# Store the parents in the pcmci member
|
|
312
|
+
self.observed_context_parents = deepcopy(context_results['parents'])
|
|
313
|
+
self.all_lagged_parents = deepcopy(
|
|
314
|
+
context_results['lagged_parents']) # remove context nodes from lagged parents
|
|
315
|
+
self.all_lagged_parents = {i: [el for el in self.all_lagged_parents[i] if el[0] not in self.time_context_nodes]
|
|
316
|
+
for i in
|
|
317
|
+
range(self.N)}
|
|
318
|
+
|
|
319
|
+
# if self.verbosity > 0:
|
|
320
|
+
# print("\nDiscovered observed context parents: ", context_results['parents'])
|
|
321
|
+
|
|
322
|
+
if len(self.time_dummy) > 0 or len(self.space_dummy) > 0:
|
|
323
|
+
# step 2:
|
|
324
|
+
dummy_system_results = self.discover_dummy_system_links(
|
|
325
|
+
_link_assumptions,
|
|
326
|
+
ctxt_res,
|
|
327
|
+
self.all_lagged_parents,
|
|
328
|
+
tau_min=tau_min,
|
|
329
|
+
tau_max=tau_max,
|
|
330
|
+
pc_alpha=pc_alpha,
|
|
331
|
+
reset_lagged_links=reset_lagged_links,
|
|
332
|
+
max_conds_dim=max_conds_dim,
|
|
333
|
+
max_conds_py=max_conds_py,
|
|
334
|
+
max_conds_px=max_conds_px,
|
|
335
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
336
|
+
fdr_method=fdr_method
|
|
337
|
+
)
|
|
338
|
+
# Store the parents in the pcmci member
|
|
339
|
+
self.dummy_parents = dummy_system_results['parents']
|
|
340
|
+
else:
|
|
341
|
+
dummy_system_results = deepcopy(context_results)
|
|
342
|
+
|
|
343
|
+
# if self.verbosity > 0:
|
|
344
|
+
# print("Discovered dummy parents: ", self.dummy_parents)
|
|
345
|
+
|
|
346
|
+
# step 3:
|
|
347
|
+
self.mode = "system_search"
|
|
348
|
+
|
|
349
|
+
lagged_context_dummy_parents = {
|
|
350
|
+
i: list(
|
|
351
|
+
dict.fromkeys(self.all_lagged_parents[i] + self.observed_context_parents[i] + self.dummy_parents[i]))
|
|
352
|
+
for i in self.system_nodes}
|
|
353
|
+
# we only care about the parents of system nodes
|
|
354
|
+
lagged_context_dummy_parents.update(
|
|
355
|
+
{i: [] for i in observed_context_nodes + self.time_dummy + self.space_dummy})
|
|
356
|
+
|
|
357
|
+
dummy_system_results_copy = deepcopy(dummy_system_results)
|
|
358
|
+
|
|
359
|
+
# step 4:
|
|
360
|
+
system_skeleton_results = self.discover_system_system_links(link_assumptions=_link_assumptions,
|
|
361
|
+
lagged_context_dummy_parents=lagged_context_dummy_parents,
|
|
362
|
+
tau_min=tau_min,
|
|
363
|
+
tau_max=tau_max,
|
|
364
|
+
pc_alpha=pc_alpha,
|
|
365
|
+
reset_lagged_links=reset_lagged_links,
|
|
366
|
+
max_conds_dim=max_conds_dim,
|
|
367
|
+
max_conds_py=max_conds_py,
|
|
368
|
+
max_conds_px=max_conds_px,
|
|
369
|
+
max_conds_px_lagged=max_conds_px_lagged,
|
|
370
|
+
fdr_method=fdr_method)
|
|
371
|
+
|
|
372
|
+
# orientation phase
|
|
373
|
+
colliders_step_results = self._pcmciplus_collider_phase(
|
|
374
|
+
system_skeleton_results['graph'], system_skeleton_results['sepsets'],
|
|
375
|
+
lagged_context_dummy_parents, pc_alpha,
|
|
376
|
+
tau_min, tau_max, max_conds_py, max_conds_px, max_conds_px_lagged,
|
|
377
|
+
conflict_resolution, contemp_collider_rule)
|
|
378
|
+
|
|
379
|
+
final_graph = self._pcmciplus_rule_orientation_phase(colliders_step_results['graph'],
|
|
380
|
+
colliders_step_results['ambiguous_triples'],
|
|
381
|
+
conflict_resolution)
|
|
382
|
+
|
|
383
|
+
# add context-system and dummy-system values and pvalues back in (lost because of link_assumption)
|
|
384
|
+
for c in observed_context_nodes + self.time_dummy + self.space_dummy:
|
|
385
|
+
for j in range(self.N):
|
|
386
|
+
for lag in range(tau_max + 1):
|
|
387
|
+
# add context-system links to results
|
|
388
|
+
system_skeleton_results['val_matrix'][c, j, lag] = dummy_system_results_copy['val_matrix'][
|
|
389
|
+
c, j, lag]
|
|
390
|
+
system_skeleton_results['val_matrix'][j, c, lag] = dummy_system_results_copy['val_matrix'][
|
|
391
|
+
j, c, lag]
|
|
392
|
+
|
|
393
|
+
system_skeleton_results['p_matrix'][c, j, lag] = dummy_system_results_copy['p_matrix'][c, j, lag]
|
|
394
|
+
system_skeleton_results['p_matrix'][j, c, lag] = dummy_system_results_copy['p_matrix'][j, c, lag]
|
|
395
|
+
|
|
396
|
+
# No confidence interval estimation here
|
|
397
|
+
return_dict = {'graph': final_graph, 'p_matrix': system_skeleton_results['p_matrix'],
|
|
398
|
+
'val_matrix': system_skeleton_results['val_matrix'],
|
|
399
|
+
'sepsets': colliders_step_results['sepsets'],
|
|
400
|
+
'ambiguous_triples': colliders_step_results['ambiguous_triples'],
|
|
401
|
+
'conf_matrix': None}
|
|
402
|
+
|
|
403
|
+
# Print the results
|
|
404
|
+
if self.verbosity > 0:
|
|
405
|
+
self.print_results(return_dict, alpha_level=pc_alpha)
|
|
406
|
+
|
|
407
|
+
# Return the dictionary
|
|
408
|
+
self.results = return_dict
|
|
409
|
+
|
|
410
|
+
# Return the dictionary
|
|
411
|
+
return return_dict
|
|
412
|
+
|
|
413
|
+
def assume_exogenous_context(self, link_assumptions, observed_context_nodes):
|
|
414
|
+
"""Helper function to amend the link_assumptions to ensure that all context-system links are oriented
|
|
415
|
+
such that the context variable is the parent."""
|
|
416
|
+
for j in link_assumptions:
|
|
417
|
+
if j in self.system_nodes:
|
|
418
|
+
for link in link_assumptions[j]:
|
|
419
|
+
i, lag = link
|
|
420
|
+
if i in observed_context_nodes + self.time_dummy + self.space_dummy: # is context var
|
|
421
|
+
link_type = link_assumptions[j][link]
|
|
422
|
+
link_assumptions[j][link] = '-' + link_type[1] + '>'
|
|
423
|
+
return link_assumptions
|
|
424
|
+
|
|
425
|
+
def clean_link_assumptions(self, link_assumptions, tau_max):
|
|
426
|
+
"""Helper function to amend the link_assumptions in the following ways
|
|
427
|
+
* remove any links where dummy is the child
|
|
428
|
+
* remove any lagged links to dummy, and space_context (not to observed time context)
|
|
429
|
+
* and system - context links where context is the child
|
|
430
|
+
* and any links between spatial and temporal context
|
|
431
|
+
"""
|
|
432
|
+
for node in self.time_dummy + self.space_dummy:
|
|
433
|
+
link_assumptions[node] = {}
|
|
434
|
+
|
|
435
|
+
for j in self.system_nodes + self.time_context_nodes + self.space_context_nodes:
|
|
436
|
+
for lag in range(1, tau_max + 1):
|
|
437
|
+
for c in self.time_dummy + self.space_dummy + self.space_context_nodes:
|
|
438
|
+
if (c, -lag) in link_assumptions[j]: link_assumptions[j].pop((c, -lag), None)
|
|
439
|
+
for c in self.space_context_nodes + self.time_context_nodes:
|
|
440
|
+
for j in self.system_nodes:
|
|
441
|
+
for lag in range(tau_max + 1):
|
|
442
|
+
if (j, -lag) in link_assumptions[c]: link_assumptions[c].pop((j, -lag), None)
|
|
443
|
+
if (c, 0) in link_assumptions[c]: link_assumptions[c].pop((c, 0), None) # remove self-links
|
|
444
|
+
|
|
445
|
+
for c in self.space_context_nodes:
|
|
446
|
+
for k in self.time_context_nodes:
|
|
447
|
+
for lag in range(tau_max + 1):
|
|
448
|
+
if (k, -lag) in link_assumptions[c]: link_assumptions[c].pop((k, -lag), None)
|
|
449
|
+
for c in self.time_context_nodes:
|
|
450
|
+
for k in self.space_context_nodes:
|
|
451
|
+
if (k, 0) in link_assumptions[c]: link_assumptions[c].pop((k, 0), None)
|
|
452
|
+
|
|
453
|
+
return link_assumptions
|
|
454
|
+
|
|
455
|
+
def remove_dummy_link_assumptions(self, link_assumptions):
|
|
456
|
+
"""Helper function to remove any links to dummy from link_assumptions."""
|
|
457
|
+
link_assumptions_wo_dummy = deepcopy(link_assumptions)
|
|
458
|
+
for j in self.system_nodes + self.time_context_nodes + self.space_context_nodes:
|
|
459
|
+
for dummy_node in self.time_dummy + self.space_dummy:
|
|
460
|
+
if (dummy_node, 0) in link_assumptions_wo_dummy[j]:
|
|
461
|
+
link_assumptions_wo_dummy[j].pop((dummy_node, 0), None)
|
|
462
|
+
return link_assumptions_wo_dummy
|
|
463
|
+
|
|
464
|
+
def add_found_context_link_assumptions(self, link_assumptions, tau_max):
|
|
465
|
+
"""Helper function to add discovered links between system and observed context nodes to link_assumptions."""
|
|
466
|
+
link_assumptions_dummy = deepcopy(link_assumptions)
|
|
467
|
+
|
|
468
|
+
for c in self.space_context_nodes + self.time_context_nodes:
|
|
469
|
+
link_assumptions_dummy[c] = {}
|
|
470
|
+
for j in self.system_nodes + self.time_context_nodes + self.space_context_nodes:
|
|
471
|
+
for c in self.space_context_nodes + self.time_context_nodes:
|
|
472
|
+
for lag in range(tau_max + 1):
|
|
473
|
+
if (c, -lag) in link_assumptions_dummy[j]:
|
|
474
|
+
link_assumptions_dummy[j].pop((c, -lag), None)
|
|
475
|
+
link_assumptions_dummy[j].update({parent: '-->' for parent in self.observed_context_parents[j]})
|
|
476
|
+
|
|
477
|
+
return link_assumptions_dummy
|
|
478
|
+
|
|
479
|
+
def clean_system_link_assumptions(self, link_assumptions, tau_max):
|
|
480
|
+
"""Helper function to remove any links to dummy and observed context nodes from link_assumptions.
|
|
481
|
+
Add discovered links to contextual parents (from steps 1 and 2) to the link_assumptions.
|
|
482
|
+
"""
|
|
483
|
+
dummy_vars = self.time_dummy + self.space_dummy
|
|
484
|
+
observed_context_nodes = self.time_context_nodes + self.space_context_nodes
|
|
485
|
+
system_links = deepcopy(link_assumptions)
|
|
486
|
+
|
|
487
|
+
for j in self.system_nodes:
|
|
488
|
+
for C in dummy_vars + observed_context_nodes:
|
|
489
|
+
for lag in range(tau_max + 1):
|
|
490
|
+
if (C, -lag) in system_links[j]:
|
|
491
|
+
system_links[j].pop((C, -lag), None)
|
|
492
|
+
|
|
493
|
+
for j in system_links:
|
|
494
|
+
system_links[j].update(
|
|
495
|
+
{parent: '-->' for parent in self.observed_context_parents[j] + self.dummy_parents[j]})
|
|
496
|
+
|
|
497
|
+
for C in observed_context_nodes + dummy_vars:
|
|
498
|
+
# we are not interested in links between context variables (thus system_links[C] = {})
|
|
499
|
+
system_links[C] = {}
|
|
500
|
+
return system_links
|
|
501
|
+
|
|
502
|
+
def discover_lagged_context_system_links(self, link_assumptions,
|
|
503
|
+
tau_min=0,
|
|
504
|
+
tau_max=1, pc_alpha=0.01,
|
|
505
|
+
reset_lagged_links=False,
|
|
506
|
+
max_conds_dim=None,
|
|
507
|
+
max_combinations=1,
|
|
508
|
+
max_conds_py=None,
|
|
509
|
+
max_conds_px=None,
|
|
510
|
+
max_conds_px_lagged=None,
|
|
511
|
+
fdr_method='none'):
|
|
512
|
+
"""
|
|
513
|
+
Step 1 of JPCMCIplus, i.e. discovery of links between observed context nodes and system nodes through an
|
|
514
|
+
application of the skeleton phase of PCMCIplus to this subset of nodes (observed context nodes and system
|
|
515
|
+
nodes).
|
|
516
|
+
See run_jpcmciplus for a description of the parameters.
|
|
517
|
+
|
|
518
|
+
Returns
|
|
519
|
+
-------
|
|
520
|
+
graph : array of shape [N, N, tau_max+1]
|
|
521
|
+
Resulting causal graph, see description above for interpretation.
|
|
522
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
523
|
+
Estimated matrix of test statistic values regarding adjacencies.
|
|
524
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
525
|
+
Estimated matrix of p-values regarding adjacencies.
|
|
526
|
+
parents : dictionary
|
|
527
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
528
|
+
the estimated context parents of the system nodes.
|
|
529
|
+
lagged_parents : dictionary
|
|
530
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
531
|
+
the conditioning-parents estimated with PC algorithm.
|
|
532
|
+
"""
|
|
533
|
+
|
|
534
|
+
# Initializing
|
|
535
|
+
context_parents = {i: [] for i in range(self.N)}
|
|
536
|
+
|
|
537
|
+
# find links between expressive context, and between expressive context and system nodes
|
|
538
|
+
# here, we exclude any links to dummy
|
|
539
|
+
_link_assumptions_wo_dummy = self.remove_dummy_link_assumptions(link_assumptions)
|
|
540
|
+
_int_link_assumptions = self._set_link_assumptions(_link_assumptions_wo_dummy, tau_min, tau_max)
|
|
541
|
+
|
|
542
|
+
# Step 1: Get a superset of lagged parents from run_pc_stable
|
|
543
|
+
if self.verbosity > 0:
|
|
544
|
+
print("\n##\n## J-PCMCI+ Step 1: Selecting lagged conditioning sets\n##")
|
|
545
|
+
|
|
546
|
+
lagged_parents = self.run_pc_stable(link_assumptions=link_assumptions,
|
|
547
|
+
tau_min=tau_min,
|
|
548
|
+
tau_max=tau_max,
|
|
549
|
+
pc_alpha=pc_alpha,
|
|
550
|
+
max_conds_dim=max_conds_dim,
|
|
551
|
+
max_combinations=max_combinations)
|
|
552
|
+
|
|
553
|
+
self.mode = "context_search"
|
|
554
|
+
|
|
555
|
+
p_matrix = self.p_matrix
|
|
556
|
+
val_matrix = self.val_matrix
|
|
557
|
+
|
|
558
|
+
# run PCMCI+ skeleton phase on subset of links to discover context-system links
|
|
559
|
+
if self.verbosity > 0:
|
|
560
|
+
print("\n##\n## J-PCMCI+ Step 2: Discovering context-system links\n##")
|
|
561
|
+
if link_assumptions is not None:
|
|
562
|
+
print("\nWith link_assumptions = %s" % str(_int_link_assumptions))
|
|
563
|
+
|
|
564
|
+
skeleton_results = self._pcmciplus_mci_skeleton_phase(
|
|
565
|
+
lagged_parents, _int_link_assumptions, pc_alpha,
|
|
566
|
+
tau_min, tau_max, max_conds_dim, None,
|
|
567
|
+
max_conds_py, max_conds_px, max_conds_px_lagged,
|
|
568
|
+
reset_lagged_links, fdr_method,
|
|
569
|
+
p_matrix, val_matrix
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
skeleton_graph = skeleton_results['graph']
|
|
573
|
+
|
|
574
|
+
for j in self.system_nodes + self.time_context_nodes + self.space_context_nodes:
|
|
575
|
+
for c in self.space_context_nodes + self.time_context_nodes:
|
|
576
|
+
for k in range(tau_max + 1):
|
|
577
|
+
if skeleton_graph[c, j, k] == 'o?o' or skeleton_graph[c, j, k] == '-?>' or skeleton_graph[
|
|
578
|
+
c, j, k] == 'o-o' or skeleton_graph[c, j, k] == '-->':
|
|
579
|
+
context_parents[j].append((c, -k))
|
|
580
|
+
|
|
581
|
+
return_dict = {'graph': skeleton_results['graph'], 'p_matrix': skeleton_results['p_matrix'],
|
|
582
|
+
'val_matrix': skeleton_results['val_matrix'],
|
|
583
|
+
'parents': context_parents, 'lagged_parents': lagged_parents}
|
|
584
|
+
|
|
585
|
+
# Print the results
|
|
586
|
+
if self.verbosity > 0:
|
|
587
|
+
self.print_results(return_dict, alpha_level=pc_alpha)
|
|
588
|
+
|
|
589
|
+
return return_dict
|
|
590
|
+
|
|
591
|
+
def discover_dummy_system_links(self, link_assumptions,
|
|
592
|
+
context_system_results,
|
|
593
|
+
lagged_parents,
|
|
594
|
+
tau_min=0,
|
|
595
|
+
tau_max=1,
|
|
596
|
+
pc_alpha=0.01,
|
|
597
|
+
reset_lagged_links=False,
|
|
598
|
+
max_conds_dim=None,
|
|
599
|
+
max_conds_py=None,
|
|
600
|
+
max_conds_px=None,
|
|
601
|
+
max_conds_px_lagged=None,
|
|
602
|
+
fdr_method='none'):
|
|
603
|
+
"""
|
|
604
|
+
Step 2 of JPCMCIplus, i.e. discovery of links between observed (time and space) dummy nodes and system nodes
|
|
605
|
+
through an application of the skeleton phase of PCMCIplus to this subset of nodes (dummy nodes and
|
|
606
|
+
system nodes).
|
|
607
|
+
See run_jpcmciplus for a description of the parameters.
|
|
608
|
+
|
|
609
|
+
Parameters
|
|
610
|
+
----------
|
|
611
|
+
context_system_results : dictionary
|
|
612
|
+
Output of discover_lagged_and_context_system_links, i.e. lagged and context parents together with the
|
|
613
|
+
corresponding estimated test statistic values regarding adjacencies.
|
|
614
|
+
lagged_parents : dictionary
|
|
615
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing the conditioning-parents
|
|
616
|
+
estimated with PC algorithm.
|
|
617
|
+
|
|
618
|
+
Returns
|
|
619
|
+
-------
|
|
620
|
+
graph : array of shape [N, N, tau_max+1]
|
|
621
|
+
Resulting causal graph, see description above for interpretation.
|
|
622
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
623
|
+
Estimated matrix of test statistic values regarding adjacencies.
|
|
624
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
625
|
+
Estimated matrix of p-values regarding adjacencies.
|
|
626
|
+
parents : dictionary
|
|
627
|
+
Dictionary of form {0:[(0, -1), (3, -2), ...], 1:[], ...} containing
|
|
628
|
+
the estimated dummy parents of the system nodes.
|
|
629
|
+
"""
|
|
630
|
+
lagged_context_parents = {i: list(dict.fromkeys(context_system_results['parents'][i] + lagged_parents[i])) for
|
|
631
|
+
i in range(self.N)}
|
|
632
|
+
dummy_parents = {i: [] for i in range(self.N)}
|
|
633
|
+
p_matrix = context_system_results['p_matrix']
|
|
634
|
+
|
|
635
|
+
# setup link assumptions without the observed context nodes
|
|
636
|
+
_link_assumptions_dummy = self.add_found_context_link_assumptions(link_assumptions, tau_max)
|
|
637
|
+
_int_link_assumptions = self._set_link_assumptions(_link_assumptions_dummy, tau_min, tau_max)
|
|
638
|
+
|
|
639
|
+
self.mode = "dummy_search"
|
|
640
|
+
if self.verbosity > 0:
|
|
641
|
+
print("\n##\n## J-PCMCI+ Step 3: Discovering dummy-system links\n##")
|
|
642
|
+
if _link_assumptions_dummy is not None:
|
|
643
|
+
print("\nWith link_assumptions = %s" % str(_int_link_assumptions))
|
|
644
|
+
|
|
645
|
+
skeleton_results_dummy = self._pcmciplus_mci_skeleton_phase(
|
|
646
|
+
lagged_context_parents, _int_link_assumptions, pc_alpha,
|
|
647
|
+
tau_min, tau_max, max_conds_dim, None,
|
|
648
|
+
max_conds_py, max_conds_px, max_conds_px_lagged,
|
|
649
|
+
reset_lagged_links, fdr_method,
|
|
650
|
+
self.p_matrix, self.val_matrix
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
skeleton_graph_dummy = skeleton_results_dummy['graph']
|
|
654
|
+
|
|
655
|
+
for j in self.system_nodes:
|
|
656
|
+
for k in range(tau_max + 1):
|
|
657
|
+
for dummy_node in self.time_dummy + self.space_dummy:
|
|
658
|
+
if skeleton_graph_dummy[dummy_node, j, k] == 'o?o' or \
|
|
659
|
+
skeleton_graph_dummy[dummy_node, j, k] == '-?>' or \
|
|
660
|
+
skeleton_graph_dummy[dummy_node, j, k] == 'o-o' or \
|
|
661
|
+
skeleton_graph_dummy[dummy_node, j, k] == '-->':
|
|
662
|
+
dummy_parents[j].append((dummy_node, k))
|
|
663
|
+
for context_node in self.time_context_nodes + self.space_context_nodes:
|
|
664
|
+
skeleton_results_dummy['val_matrix'][context_node, j, k] = context_system_results['val_matrix'][
|
|
665
|
+
context_node, j, k]
|
|
666
|
+
skeleton_results_dummy['val_matrix'][j, context_node, k] = context_system_results['val_matrix'][
|
|
667
|
+
j, context_node, k]
|
|
668
|
+
|
|
669
|
+
skeleton_results_dummy['p_matrix'][context_node, j, k] = p_matrix[context_node, j, k]
|
|
670
|
+
skeleton_results_dummy['p_matrix'][j, context_node, k] = p_matrix[j, context_node, k]
|
|
671
|
+
|
|
672
|
+
return_dict = {'graph': skeleton_results_dummy['graph'], 'p_matrix': skeleton_results_dummy['p_matrix'],
|
|
673
|
+
'val_matrix': skeleton_results_dummy['val_matrix'], 'parents': dummy_parents}
|
|
674
|
+
|
|
675
|
+
# Print the results
|
|
676
|
+
if self.verbosity > 0:
|
|
677
|
+
self.print_results(return_dict, alpha_level=pc_alpha)
|
|
678
|
+
return return_dict
|
|
679
|
+
|
|
680
|
+
def discover_system_system_links(self, link_assumptions,
|
|
681
|
+
lagged_context_dummy_parents,
|
|
682
|
+
tau_min=0,
|
|
683
|
+
tau_max=1,
|
|
684
|
+
pc_alpha=0.01,
|
|
685
|
+
reset_lagged_links=False,
|
|
686
|
+
max_conds_dim=None,
|
|
687
|
+
max_conds_py=None,
|
|
688
|
+
max_conds_px=None,
|
|
689
|
+
max_conds_px_lagged=None,
|
|
690
|
+
fdr_method='none'
|
|
691
|
+
):
|
|
692
|
+
"""
|
|
693
|
+
Step 4 of JPCMCIplus and orientation phase, i.e. discovery of links between system nodes given the knowledge
|
|
694
|
+
about their context parents through an application of PCMCIplus to this subset of nodes (system nodes).
|
|
695
|
+
See run_jpcmciplus for a description of the other parameters.
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
Parameters
|
|
699
|
+
----------
|
|
700
|
+
lagged_context_dummy_parents : dictionary
|
|
701
|
+
Dictionary containing lagged and (dummy and observed) context parents of the system nodes estimated during
|
|
702
|
+
step 1 and step 2 of J-PCMCI+.
|
|
703
|
+
|
|
704
|
+
Returns
|
|
705
|
+
-------
|
|
706
|
+
graph : array of shape [N, N, tau_max+1]
|
|
707
|
+
Resulting causal graph, see description above for interpretation.
|
|
708
|
+
val_matrix : array of shape [N, N, tau_max+1]
|
|
709
|
+
Estimated matrix of test statistic values regarding adjacencies.
|
|
710
|
+
p_matrix : array of shape [N, N, tau_max+1]
|
|
711
|
+
Estimated matrix of p-values regarding adjacencies.
|
|
712
|
+
sepset : dictionary
|
|
713
|
+
Separating sets. See paper for details.
|
|
714
|
+
ambiguous_triples : list
|
|
715
|
+
List of ambiguous triples, only relevant for 'majority' and
|
|
716
|
+
'conservative' rules, see paper for details.
|
|
717
|
+
"""
|
|
718
|
+
self.mode = "system_search"
|
|
719
|
+
|
|
720
|
+
# Get the parents from run_pc_stable only on the system links
|
|
721
|
+
system_links = self.clean_system_link_assumptions(link_assumptions, tau_max)
|
|
722
|
+
# Set the selected links
|
|
723
|
+
_int_link_assumptions = self._set_link_assumptions(system_links, tau_min, tau_max)
|
|
724
|
+
|
|
725
|
+
if self.verbosity > 0:
|
|
726
|
+
print("\n##\n## J-PCMCI+ Step 4: Discovering system-system links \n##")
|
|
727
|
+
if system_links is not None:
|
|
728
|
+
print("\nWith link_assumptions = %s" % str(_int_link_assumptions))
|
|
729
|
+
|
|
730
|
+
skeleton_results = self._pcmciplus_mci_skeleton_phase(
|
|
731
|
+
lagged_context_dummy_parents, _int_link_assumptions, pc_alpha,
|
|
732
|
+
tau_min, tau_max, max_conds_dim, None,
|
|
733
|
+
max_conds_py, max_conds_px, max_conds_px_lagged,
|
|
734
|
+
reset_lagged_links, fdr_method,
|
|
735
|
+
self.p_matrix, self.val_matrix
|
|
736
|
+
)
|
|
737
|
+
|
|
738
|
+
return skeleton_results
|
|
739
|
+
|
|
740
|
+
def _remaining_pairs(self, graph, adjt, tau_min, tau_max, p):
|
|
741
|
+
"""Helper function returning the remaining pairs that still need to be
|
|
742
|
+
tested depending on the JPCMCIplus step, i.e. discovery of context-system links (step 1),
|
|
743
|
+
dummy-context links (step 2) or system-system links in which case the function of the parent class is called.
|
|
744
|
+
"""
|
|
745
|
+
all_context_nodes = self.time_context_nodes + self.space_context_nodes
|
|
746
|
+
if self.mode == "context_search":
|
|
747
|
+
# during discovery of context-system links we are only
|
|
748
|
+
# interested in context-context and context-system pairs
|
|
749
|
+
N = graph.shape[0]
|
|
750
|
+
pairs = []
|
|
751
|
+
for (i, j) in itertools.product(range(N), range(N)):
|
|
752
|
+
for abstau in range(tau_min, tau_max + 1):
|
|
753
|
+
if (graph[i, j, abstau] != ""
|
|
754
|
+
and len(
|
|
755
|
+
[a for a in adjt[j] if a != (i, -abstau)]) >= p
|
|
756
|
+
and i in all_context_nodes):
|
|
757
|
+
pairs.append((i, j, abstau))
|
|
758
|
+
return pairs
|
|
759
|
+
elif self.mode == "dummy_search":
|
|
760
|
+
# during discovery of dummy-system links we are only
|
|
761
|
+
# interested in dummy-system pairs
|
|
762
|
+
N = graph.shape[0]
|
|
763
|
+
pairs = []
|
|
764
|
+
for (i, j) in itertools.product(range(N), range(N)):
|
|
765
|
+
for abstau in range(tau_min, tau_max + 1):
|
|
766
|
+
if (graph[i, j, abstau] != ""
|
|
767
|
+
and len(
|
|
768
|
+
[a for a in adjt[j] if a != (i, -abstau)]) >= p
|
|
769
|
+
and i in self.time_dummy + self.space_dummy):
|
|
770
|
+
pairs.append((i, j, abstau))
|
|
771
|
+
return pairs
|
|
772
|
+
else:
|
|
773
|
+
return super()._remaining_pairs(graph, adjt, tau_min, tau_max, p)
|
|
774
|
+
|
|
775
|
+
def _run_pcalg_test(self, graph, i, abstau, j, S, lagged_parents, max_conds_py,
|
|
776
|
+
max_conds_px, max_conds_px_lagged, tau_max, alpha_or_thres=None):
|
|
777
|
+
"""MCI conditional independence tests within PCMCIplus or PC algorithm. Depending on the JPCMCIplus step
|
|
778
|
+
the setup is adapted slightly. During the discovery of dummy-system links (step 2) we are using
|
|
779
|
+
the dummy_ci_test and condition on the parents found during step 1; during the discovery of system-system links
|
|
780
|
+
(step 3) we are conditioning on the found contextual parents.
|
|
781
|
+
Parameters
|
|
782
|
+
----------
|
|
783
|
+
graph : array
|
|
784
|
+
...
|
|
785
|
+
i : int
|
|
786
|
+
Variable index.
|
|
787
|
+
abstau : int
|
|
788
|
+
Time lag (absolute value).
|
|
789
|
+
j : int
|
|
790
|
+
Variable index.
|
|
791
|
+
S : list
|
|
792
|
+
List of contemporaneous conditions.
|
|
793
|
+
lagged_parents : dictionary of lists
|
|
794
|
+
Dictionary of lagged parents for each node.
|
|
795
|
+
max_conds_py : int
|
|
796
|
+
Max number of lagged parents for node j.
|
|
797
|
+
max_conds_px : int
|
|
798
|
+
Max number of lagged parents for lagged node i.
|
|
799
|
+
max_conds_px_lagged : int
|
|
800
|
+
Maximum number of lagged conditions of X when X is lagged in MCI
|
|
801
|
+
tests. If None is passed, this number is equal to max_conds_px.
|
|
802
|
+
tau_max : int
|
|
803
|
+
Maximum time lag.
|
|
804
|
+
Returns
|
|
805
|
+
-------
|
|
806
|
+
val : float
|
|
807
|
+
Test statistic value.
|
|
808
|
+
pval : float
|
|
809
|
+
Test statistic p-value.
|
|
810
|
+
Z : list
|
|
811
|
+
List of conditions.
|
|
812
|
+
"""
|
|
813
|
+
|
|
814
|
+
if self.mode == 'dummy_search':
|
|
815
|
+
# during discovery of dummy-system links we condition on the found contextual parents from step 1.
|
|
816
|
+
cond = list(S) + self.observed_context_parents[j]
|
|
817
|
+
cond = list(dict.fromkeys(cond)) # remove overlapps
|
|
818
|
+
return super()._run_pcalg_test(graph, i, abstau, j, cond, lagged_parents, max_conds_py,
|
|
819
|
+
max_conds_px, max_conds_px_lagged, tau_max, alpha_or_thres)
|
|
820
|
+
|
|
821
|
+
elif self.mode == 'system_search':
|
|
822
|
+
# during discovery of system-system links we are conditioning on the found contextual parents
|
|
823
|
+
cond = list(S) + self.dummy_parents[j] + self.observed_context_parents[j]
|
|
824
|
+
cond = list(dict.fromkeys(cond)) # remove overlapps
|
|
825
|
+
return super()._run_pcalg_test(graph, i, abstau, j, cond, lagged_parents, max_conds_py,
|
|
826
|
+
max_conds_px, max_conds_px_lagged, tau_max, alpha_or_thres)
|
|
827
|
+
else:
|
|
828
|
+
return super()._run_pcalg_test(graph, i, abstau, j, S, lagged_parents, max_conds_py,
|
|
829
|
+
max_conds_px, max_conds_px_lagged, tau_max, alpha_or_thres)
|
|
830
|
+
|
|
831
|
+
if __name__ == '__main__':
|
|
832
|
+
# Imports
|
|
833
|
+
from numpy.random import SeedSequence, default_rng
|
|
834
|
+
|
|
835
|
+
import tigramite
|
|
836
|
+
from tigramite.toymodels import structural_causal_processes as toys
|
|
837
|
+
from tigramite.toymodels.context_model import ContextModel
|
|
838
|
+
# from tigramite.jpcmciplus import JPCMCIplus
|
|
839
|
+
from tigramite.independence_tests.parcorr_mult import ParCorrMult
|
|
840
|
+
import tigramite.data_processing as pp
|
|
841
|
+
import tigramite.plotting as tp
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
# Set seeds for reproducibility
|
|
845
|
+
ss = SeedSequence(12345)
|
|
846
|
+
child_seeds = ss.spawn(2)
|
|
847
|
+
|
|
848
|
+
model_seed = child_seeds[0]
|
|
849
|
+
context_seed = child_seeds[1]
|
|
850
|
+
|
|
851
|
+
random_state = np.random.default_rng(model_seed)
|
|
852
|
+
|
|
853
|
+
# Choose the time series length and number of spatial contexts
|
|
854
|
+
T = 100
|
|
855
|
+
nb_domains = 50
|
|
856
|
+
|
|
857
|
+
transient_fraction=0.2
|
|
858
|
+
tau_max = 2
|
|
859
|
+
frac_observed = 0.5
|
|
860
|
+
|
|
861
|
+
# Specify the model
|
|
862
|
+
def lin(x): return x
|
|
863
|
+
|
|
864
|
+
links = {0: [((0, -1), 0.3, lin), ((3, -1), 0.7, lin), ((4, 0), 0.9, lin)],
|
|
865
|
+
1: [((1, -1), 0.4, lin), ((3, -1), 0.8, lin)],
|
|
866
|
+
2: [((2, -1), 0.3, lin), ((1, 0), -0.5, lin), ((4, 0), 0.5, lin), ((5, 0), 0.6, lin)] ,
|
|
867
|
+
3: [],
|
|
868
|
+
4: [],
|
|
869
|
+
5: []
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
# Specify which node is a context node via node_type (can be "system", "time_context", or "space_context")
|
|
873
|
+
node_classification = {
|
|
874
|
+
0: "system",
|
|
875
|
+
1: "system",
|
|
876
|
+
2: "system",
|
|
877
|
+
3: "time_context",
|
|
878
|
+
4: "time_context",
|
|
879
|
+
5: "space_context"
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
# Specify dynamical noise term distributions, here unit variance Gaussians
|
|
883
|
+
#random_state = np.random.RandomState(seed)
|
|
884
|
+
noises = [random_state.standard_normal for j in range(6)]
|
|
885
|
+
|
|
886
|
+
contextmodel = ContextModel(links=links, node_classification=node_classification,
|
|
887
|
+
noises=noises,
|
|
888
|
+
seed=context_seed)
|
|
889
|
+
|
|
890
|
+
data_ens, nonstationary = contextmodel.generate_data(nb_domains, T)
|
|
891
|
+
|
|
892
|
+
assert not nonstationary
|
|
893
|
+
|
|
894
|
+
system_indices = [0,1,2]
|
|
895
|
+
# decide which context variables should be latent, and which are observed
|
|
896
|
+
observed_indices_time = [4]
|
|
897
|
+
latent_indices_time = [3]
|
|
898
|
+
|
|
899
|
+
observed_indices_space = [5]
|
|
900
|
+
latent_indices_space = []
|
|
901
|
+
|
|
902
|
+
# all system variables are also observed, thus we get the following observed data
|
|
903
|
+
observed_indices = system_indices + observed_indices_time + observed_indices_space
|
|
904
|
+
data_observed = {key: data_ens[key][:,observed_indices] for key in data_ens}
|
|
905
|
+
|
|
906
|
+
|
|
907
|
+
# Add one-hot-encoding of time-steps and dataset index to the observational data.
|
|
908
|
+
# These are the values of the time and space dummy variables.
|
|
909
|
+
dummy_data_time = np.identity(T)
|
|
910
|
+
|
|
911
|
+
data_dict = {}
|
|
912
|
+
for i in range(nb_domains):
|
|
913
|
+
dummy_data_space = np.zeros((T, nb_domains))
|
|
914
|
+
dummy_data_space[:, i] = 1.
|
|
915
|
+
data_dict[i] = np.hstack((data_observed[i], dummy_data_time, dummy_data_space))
|
|
916
|
+
|
|
917
|
+
# Define vector-valued variables including dummy variables as well as observed (system and context) variables
|
|
918
|
+
nb_observed_context_nodes = len(observed_indices_time) + len(observed_indices_space)
|
|
919
|
+
N = len(system_indices)
|
|
920
|
+
process_vars = system_indices
|
|
921
|
+
observed_temporal_context_nodes = list(range(N, N + len(observed_indices_time)))
|
|
922
|
+
observed_spatial_context_nodes = list(range(N + len(observed_indices_time),
|
|
923
|
+
N + len(observed_indices_time) + len(observed_indices_space)))
|
|
924
|
+
time_dummy_index = N + nb_observed_context_nodes
|
|
925
|
+
space_dummy_index = N + nb_observed_context_nodes + 1
|
|
926
|
+
time_dummy = list(range(time_dummy_index, time_dummy_index + T))
|
|
927
|
+
space_dummy = list(range(time_dummy_index + T, time_dummy_index + T + nb_domains))
|
|
928
|
+
|
|
929
|
+
vector_vars = {i: [(i, 0)] for i in process_vars + observed_temporal_context_nodes + observed_spatial_context_nodes}
|
|
930
|
+
vector_vars[time_dummy_index] = [(i, 0) for i in time_dummy]
|
|
931
|
+
vector_vars[space_dummy_index] = [(i, 0) for i in space_dummy]
|
|
932
|
+
|
|
933
|
+
# Name all the variables and initialize the dataframe object
|
|
934
|
+
# Be careful to use analysis_mode = 'multiple'
|
|
935
|
+
sys_var_names = ['X_' + str(i) for i in process_vars]
|
|
936
|
+
context_var_names = ['t-C_'+str(i) for i in observed_indices_time] + ['s-C_'+str(i) for i in observed_indices_space]
|
|
937
|
+
var_names = sys_var_names + context_var_names + ['t-dummy', 's-dummy']
|
|
938
|
+
|
|
939
|
+
dataframe = pp.DataFrame(
|
|
940
|
+
data=data_dict,
|
|
941
|
+
vector_vars = vector_vars,
|
|
942
|
+
analysis_mode = 'multiple',
|
|
943
|
+
var_names = var_names
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
# Classify all the nodes into system, context, or dummy
|
|
948
|
+
node_classification_jpcmci = {i: node_classification[var] for i, var in enumerate(observed_indices)}
|
|
949
|
+
node_classification_jpcmci.update({time_dummy_index : "time_dummy", space_dummy_index : "space_dummy"})
|
|
950
|
+
|
|
951
|
+
# Create a J-PCMCI+ object, passing the dataframe and (conditional)
|
|
952
|
+
# independence test objects, as well as the observed temporal and spatial context nodes
|
|
953
|
+
# and the indices of the dummies.
|
|
954
|
+
JPCMCIplus = JPCMCIplus(dataframe=dataframe,
|
|
955
|
+
cond_ind_test=ParCorrMult(significance='analytic'),
|
|
956
|
+
node_classification=node_classification_jpcmci,
|
|
957
|
+
verbosity=1,)
|
|
958
|
+
|
|
959
|
+
# Define the analysis parameters.
|
|
960
|
+
tau_max = 2
|
|
961
|
+
pc_alpha = 0.01
|
|
962
|
+
|
|
963
|
+
# Run J-PCMCI+
|
|
964
|
+
results = JPCMCIplus.run_jpcmciplus(tau_min=0,
|
|
965
|
+
tau_max=tau_max,
|
|
966
|
+
pc_alpha=pc_alpha)
|