tigramite-fast 5.2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tigramite/__init__.py +0 -0
- tigramite/causal_effects.py +1525 -0
- tigramite/causal_mediation.py +1592 -0
- tigramite/data_processing.py +1574 -0
- tigramite/graphs.py +1509 -0
- tigramite/independence_tests/LBFGS.py +1114 -0
- tigramite/independence_tests/__init__.py +0 -0
- tigramite/independence_tests/cmiknn.py +661 -0
- tigramite/independence_tests/cmiknn_mixed.py +1397 -0
- tigramite/independence_tests/cmisymb.py +286 -0
- tigramite/independence_tests/gpdc.py +664 -0
- tigramite/independence_tests/gpdc_torch.py +820 -0
- tigramite/independence_tests/gsquared.py +190 -0
- tigramite/independence_tests/independence_tests_base.py +1310 -0
- tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
- tigramite/independence_tests/pairwise_CI.py +383 -0
- tigramite/independence_tests/parcorr.py +369 -0
- tigramite/independence_tests/parcorr_mult.py +485 -0
- tigramite/independence_tests/parcorr_wls.py +451 -0
- tigramite/independence_tests/regressionCI.py +403 -0
- tigramite/independence_tests/robust_parcorr.py +403 -0
- tigramite/jpcmciplus.py +966 -0
- tigramite/lpcmci.py +3649 -0
- tigramite/models.py +2257 -0
- tigramite/pcmci.py +3935 -0
- tigramite/pcmci_base.py +1218 -0
- tigramite/plotting.py +4735 -0
- tigramite/rpcmci.py +467 -0
- tigramite/toymodels/__init__.py +0 -0
- tigramite/toymodels/context_model.py +261 -0
- tigramite/toymodels/non_additive.py +1231 -0
- tigramite/toymodels/structural_causal_processes.py +1201 -0
- tigramite/toymodels/surrogate_generator.py +319 -0
- tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
- tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
- tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
- tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
- tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1201 @@
|
|
|
1
|
+
"""Tigramite toymodels."""
|
|
2
|
+
|
|
3
|
+
# Author: Jakob Runge <jakob@jakob-runge.com>
|
|
4
|
+
#
|
|
5
|
+
# License: GNU General Public License v3.0
|
|
6
|
+
from __future__ import print_function
|
|
7
|
+
from collections import defaultdict, OrderedDict
|
|
8
|
+
import sys
|
|
9
|
+
import warnings
|
|
10
|
+
import copy
|
|
11
|
+
import math
|
|
12
|
+
import numpy as np
|
|
13
|
+
import scipy.sparse
|
|
14
|
+
import scipy.sparse.linalg
|
|
15
|
+
import itertools
|
|
16
|
+
|
|
17
|
+
def _generate_noise(covar_matrix, time=1000, use_inverse=False):
|
|
18
|
+
"""
|
|
19
|
+
Generate a multivariate normal distribution using correlated innovations.
|
|
20
|
+
|
|
21
|
+
Parameters
|
|
22
|
+
----------
|
|
23
|
+
covar_matrix : array
|
|
24
|
+
Covariance matrix of the random variables
|
|
25
|
+
time : int
|
|
26
|
+
Sample size
|
|
27
|
+
use_inverse : bool, optional
|
|
28
|
+
Negate the off-diagonal elements and invert the covariance matrix
|
|
29
|
+
before use
|
|
30
|
+
|
|
31
|
+
return_eigenvectors
|
|
32
|
+
-------
|
|
33
|
+
noise : array
|
|
34
|
+
Random noise generated according to covar_matrix
|
|
35
|
+
"""
|
|
36
|
+
# Pull out the number of nodes from the shape of the covar_matrix
|
|
37
|
+
n_nodes = covar_matrix.shape[0]
|
|
38
|
+
# Make a deep copy for use in the inverse case
|
|
39
|
+
this_covar = covar_matrix
|
|
40
|
+
# Take the negative inverse if needed
|
|
41
|
+
if use_inverse:
|
|
42
|
+
this_covar = copy.deepcopy(covar_matrix)
|
|
43
|
+
this_covar *= -1
|
|
44
|
+
this_covar[np.diag_indices_from(this_covar)] *= -1
|
|
45
|
+
this_covar = np.linalg.inv(this_covar)
|
|
46
|
+
# Return the noise distribution
|
|
47
|
+
return np.random.multivariate_normal(mean=np.zeros(n_nodes),
|
|
48
|
+
cov=this_covar,
|
|
49
|
+
size=time)
|
|
50
|
+
|
|
51
|
+
def _check_stability(graph):
|
|
52
|
+
"""
|
|
53
|
+
Raises an AssertionError if the input graph corresponds to a non-stationary
|
|
54
|
+
process.
|
|
55
|
+
|
|
56
|
+
Parameters
|
|
57
|
+
----------
|
|
58
|
+
graph : array
|
|
59
|
+
Lagged connectivity matrices. Shape is (n_nodes, n_nodes, max_delay+1)
|
|
60
|
+
"""
|
|
61
|
+
# Get the shape from the input graph
|
|
62
|
+
n_nodes, _, period = graph.shape
|
|
63
|
+
# Set the top section as the horizontally stacked matrix of
|
|
64
|
+
# shape (n_nodes, n_nodes * period)
|
|
65
|
+
stability_matrix = \
|
|
66
|
+
scipy.sparse.hstack([scipy.sparse.lil_matrix(graph[:, :, t_slice])
|
|
67
|
+
for t_slice in range(period)])
|
|
68
|
+
# Extend an identity matrix of shape
|
|
69
|
+
# (n_nodes * (period - 1), n_nodes * (period - 1)) to shape
|
|
70
|
+
# (n_nodes * (period - 1), n_nodes * period) and stack the top section on
|
|
71
|
+
# top to make the stability matrix of shape
|
|
72
|
+
# (n_nodes * period, n_nodes * period)
|
|
73
|
+
stability_matrix = \
|
|
74
|
+
scipy.sparse.vstack([stability_matrix,
|
|
75
|
+
scipy.sparse.eye(n_nodes * (period - 1),
|
|
76
|
+
n_nodes * period)])
|
|
77
|
+
# Check the number of dimensions to see if we can afford to use a dense
|
|
78
|
+
# matrix
|
|
79
|
+
n_eigs = stability_matrix.shape[0]
|
|
80
|
+
if n_eigs <= 25:
|
|
81
|
+
# If it is relatively low in dimensionality, use a dense array
|
|
82
|
+
stability_matrix = stability_matrix.todense()
|
|
83
|
+
eigen_values, _ = scipy.linalg.eig(stability_matrix)
|
|
84
|
+
else:
|
|
85
|
+
# If it is a large dimensionality, convert to a compressed row sorted
|
|
86
|
+
# matrix, as it may be easier for the linear algebra package
|
|
87
|
+
stability_matrix = stability_matrix.tocsr()
|
|
88
|
+
# Get the eigen values of the stability matrix
|
|
89
|
+
eigen_values = scipy.sparse.linalg.eigs(stability_matrix,
|
|
90
|
+
k=(n_eigs - 2),
|
|
91
|
+
return_eigenvectors=False)
|
|
92
|
+
# Ensure they all have less than one magnitude
|
|
93
|
+
assert np.all(np.abs(eigen_values) < 1.), \
|
|
94
|
+
"Values given by time lagged connectivity matrix corresponds to a "+\
|
|
95
|
+
" non-stationary process!"
|
|
96
|
+
|
|
97
|
+
def _check_initial_values(initial_values, shape):
|
|
98
|
+
"""
|
|
99
|
+
Raises a AssertionError if the input initial values:
|
|
100
|
+
* Are not a numpy array OR
|
|
101
|
+
* Do not have the shape (n_nodes, max_delay+1)
|
|
102
|
+
|
|
103
|
+
Parameters
|
|
104
|
+
----------
|
|
105
|
+
graph : array
|
|
106
|
+
Lagged connectivity matrices. Shape is (n_nodes, n_nodes, max_delay+1)
|
|
107
|
+
"""
|
|
108
|
+
# Ensure it is a numpy array
|
|
109
|
+
assert isinstance(initial_values, np.ndarray),\
|
|
110
|
+
"User must provide initial_values as a numpy.ndarray"
|
|
111
|
+
# Check the shape is correct
|
|
112
|
+
assert initial_values.shape == shape,\
|
|
113
|
+
"Initial values must be of shape (n_nodes, max_delay+1)"+\
|
|
114
|
+
"\n current shape : " + str(initial_values.shape)+\
|
|
115
|
+
"\n desired shape : " + str(shape)
|
|
116
|
+
|
|
117
|
+
def _var_network(graph,
|
|
118
|
+
add_noise=True,
|
|
119
|
+
inno_cov=None,
|
|
120
|
+
invert_inno=False,
|
|
121
|
+
T=100,
|
|
122
|
+
initial_values=None):
|
|
123
|
+
"""
|
|
124
|
+
Returns a vector-autoregressive process with correlated innovations.
|
|
125
|
+
|
|
126
|
+
Useful for testing.
|
|
127
|
+
|
|
128
|
+
Example:
|
|
129
|
+
graph=numpy.array([[[0.2,0.,0.],[0.5,0.,0.]],
|
|
130
|
+
[[0.,0.1,0. ],[0.3,0.,0.]]])
|
|
131
|
+
|
|
132
|
+
represents a process
|
|
133
|
+
|
|
134
|
+
X_1(t) = 0.2 X_1(t-1) + 0.5 X_2(t-1) + eps_1(t)
|
|
135
|
+
X_2(t) = 0.3 X_2(t-1) + 0.1 X_1(t-2) + eps_2(t)
|
|
136
|
+
|
|
137
|
+
with inv_inno_cov being the negative (except for diagonal) inverse
|
|
138
|
+
covariance matrix of (eps_1(t), eps_2(t)) OR inno_cov being
|
|
139
|
+
the covariance. Initial values can also be provided.
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
graph : array
|
|
145
|
+
Lagged connectivity matrices. Shape is (n_nodes, n_nodes, max_delay+1)
|
|
146
|
+
add_noise : bool, optional (default: True)
|
|
147
|
+
Flag to add random noise or not
|
|
148
|
+
inno_cov : array, optional (default: None)
|
|
149
|
+
Covariance matrix of innovations.
|
|
150
|
+
invert_inno : bool, optional (defualt : False)
|
|
151
|
+
Flag to negate off-diagonal elements of inno_cov and invert it before
|
|
152
|
+
using it as the covariance matrix of innovations
|
|
153
|
+
T : int, optional (default: 100)
|
|
154
|
+
Sample size.
|
|
155
|
+
|
|
156
|
+
initial_values : array, optional (defult: None)
|
|
157
|
+
Initial values for each node. Shape is (n_nodes, max_delay+1), i.e. must
|
|
158
|
+
be of shape (graph.shape[1], graph.shape[2]).
|
|
159
|
+
|
|
160
|
+
Returns
|
|
161
|
+
-------
|
|
162
|
+
X : array
|
|
163
|
+
Array of realization.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
n_nodes, _, period = graph.shape
|
|
167
|
+
|
|
168
|
+
time = T
|
|
169
|
+
# Test stability
|
|
170
|
+
_check_stability(graph)
|
|
171
|
+
|
|
172
|
+
# Generate the returned data
|
|
173
|
+
data = np.random.randn(n_nodes, time)
|
|
174
|
+
# Load the initial values
|
|
175
|
+
if initial_values is not None:
|
|
176
|
+
# Check the shape of the initial values
|
|
177
|
+
_check_initial_values(initial_values, data[:, :period].shape)
|
|
178
|
+
# Input the initial values
|
|
179
|
+
data[:, :period] = initial_values
|
|
180
|
+
|
|
181
|
+
# Check if we are adding noise
|
|
182
|
+
noise = None
|
|
183
|
+
if add_noise:
|
|
184
|
+
# Use inno_cov if it was provided
|
|
185
|
+
if inno_cov is not None:
|
|
186
|
+
noise = _generate_noise(inno_cov,
|
|
187
|
+
time=time,
|
|
188
|
+
use_inverse=invert_inno)
|
|
189
|
+
# Otherwise just use uncorrelated random noise
|
|
190
|
+
else:
|
|
191
|
+
noise = np.random.randn(time, n_nodes)
|
|
192
|
+
|
|
193
|
+
for a_time in range(period, time):
|
|
194
|
+
data_past = np.repeat(
|
|
195
|
+
data[:, a_time-period:a_time][:, ::-1].reshape(1, n_nodes, period),
|
|
196
|
+
n_nodes, axis=0)
|
|
197
|
+
data[:, a_time] = (data_past*graph).sum(axis=2).sum(axis=1)
|
|
198
|
+
if add_noise:
|
|
199
|
+
data[:, a_time] += noise[a_time]
|
|
200
|
+
|
|
201
|
+
return data.transpose()
|
|
202
|
+
|
|
203
|
+
def _iter_coeffs(parents_neighbors_coeffs):
|
|
204
|
+
"""
|
|
205
|
+
Iterator through the current parents_neighbors_coeffs structure. Mainly to
|
|
206
|
+
save repeated code and make it easier to change this structure.
|
|
207
|
+
|
|
208
|
+
Parameters
|
|
209
|
+
----------
|
|
210
|
+
parents_neighbors_coeffs : dict
|
|
211
|
+
Dictionary of format:
|
|
212
|
+
{..., j:[((var1, lag1), coef1), ((var2, lag2), coef2), ...], ...} for
|
|
213
|
+
all variables where vars must be in [0..N-1] and lags <= 0 with number
|
|
214
|
+
of variables N.
|
|
215
|
+
|
|
216
|
+
Yields
|
|
217
|
+
-------
|
|
218
|
+
(node_id, parent_id, time_lag, coeff) : tuple
|
|
219
|
+
Tuple defining the relationship between nodes across time
|
|
220
|
+
"""
|
|
221
|
+
# Iterate through all defined nodes
|
|
222
|
+
for node_id in list(parents_neighbors_coeffs):
|
|
223
|
+
# Iterate over parent nodes and unpack node and coeff
|
|
224
|
+
for (parent_id, time_lag), coeff in parents_neighbors_coeffs[node_id]:
|
|
225
|
+
# Yield the entry
|
|
226
|
+
yield node_id, parent_id, time_lag, coeff
|
|
227
|
+
|
|
228
|
+
def _check_parent_neighbor(parents_neighbors_coeffs):
|
|
229
|
+
"""
|
|
230
|
+
Checks to insure input parent-neighbor connectivity input is sane. This
|
|
231
|
+
means that:
|
|
232
|
+
* all time lags are non-positive
|
|
233
|
+
* all parent nodes are included as nodes themselves
|
|
234
|
+
* all node indexing is contiguous
|
|
235
|
+
* all node indexing starts from zero
|
|
236
|
+
Raises a ValueError if any one of these conditions are not met.
|
|
237
|
+
|
|
238
|
+
Parameters
|
|
239
|
+
----------
|
|
240
|
+
parents_neighbors_coeffs : dict
|
|
241
|
+
Dictionary of format:
|
|
242
|
+
{..., j:[((var1, lag1), coef1), ((var2, lag2), coef2), ...], ...} for
|
|
243
|
+
all variables where vars must be in [0..N-1] and lags <= 0 with number
|
|
244
|
+
of variables N.
|
|
245
|
+
"""
|
|
246
|
+
# Initialize some lists for checking later
|
|
247
|
+
all_nodes = set()
|
|
248
|
+
all_parents = set()
|
|
249
|
+
# Iterate through variables
|
|
250
|
+
for j in list(parents_neighbors_coeffs):
|
|
251
|
+
# Cache all node ids to ensure they are contiguous
|
|
252
|
+
all_nodes.add(j)
|
|
253
|
+
# Iterate through all nodes
|
|
254
|
+
for j, i, tau, _ in _iter_coeffs(parents_neighbors_coeffs):
|
|
255
|
+
# Check all time lags are equal to or less than zero
|
|
256
|
+
if tau > 0:
|
|
257
|
+
raise ValueError("Lag between parent {} and node {}".format(i, j)+\
|
|
258
|
+
" is {} > 0, must be <= 0!".format(tau))
|
|
259
|
+
# Cache all parent ids to ensure they are mentioned as node ids
|
|
260
|
+
all_parents.add(i)
|
|
261
|
+
# Check that all nodes are contiguous from zero
|
|
262
|
+
all_nodes_list = sorted(list(all_nodes))
|
|
263
|
+
if all_nodes_list != list(range(len(all_nodes_list))):
|
|
264
|
+
raise ValueError("Node IDs in input dictionary must be contiguous"+\
|
|
265
|
+
" and start from zero!\n"+\
|
|
266
|
+
" Found IDs : [" +\
|
|
267
|
+
",".join(map(str, all_nodes_list))+ "]")
|
|
268
|
+
# Check that all parent nodes are mentioned as a node ID
|
|
269
|
+
if not all_parents.issubset(all_nodes):
|
|
270
|
+
missing_nodes = sorted(list(all_parents - all_nodes))
|
|
271
|
+
all_parents_list = sorted(list(all_parents))
|
|
272
|
+
raise ValueError("Parent IDs in input dictionary must also be in set"+\
|
|
273
|
+
" of node IDs."+\
|
|
274
|
+
"\n Parent IDs "+" ".join(map(str, all_parents_list))+\
|
|
275
|
+
"\n Node IDs "+" ".join(map(str, all_nodes_list)) +\
|
|
276
|
+
"\n Missing IDs " + " ".join(map(str, missing_nodes)))
|
|
277
|
+
|
|
278
|
+
def _check_symmetric_relations(a_matrix):
|
|
279
|
+
"""
|
|
280
|
+
Check if the argument matrix is symmetric. Raise a value error with details
|
|
281
|
+
about the offending elements if it is not. This is useful for checking the
|
|
282
|
+
instantaneously linked nodes have the same link strength.
|
|
283
|
+
|
|
284
|
+
Parameters
|
|
285
|
+
----------
|
|
286
|
+
a_matrix : 2D numpy array
|
|
287
|
+
Relationships between nodes at tau = 0. Indexed such that first index is
|
|
288
|
+
node and second is parent, i.e. node j with parent i has strength
|
|
289
|
+
a_matrix[j,i]
|
|
290
|
+
"""
|
|
291
|
+
# Check it is symmetric
|
|
292
|
+
if not np.allclose(a_matrix, a_matrix.T, rtol=1e-10, atol=1e-10):
|
|
293
|
+
# Store the disagreement elements
|
|
294
|
+
bad_elems = ~np.isclose(a_matrix, a_matrix.T, rtol=1e-10, atol=1e-10)
|
|
295
|
+
bad_idxs = np.argwhere(bad_elems)
|
|
296
|
+
error_message = ""
|
|
297
|
+
for node, parent in bad_idxs:
|
|
298
|
+
# Check that we haven't already printed about this pair
|
|
299
|
+
if bad_elems[node, parent]:
|
|
300
|
+
error_message += \
|
|
301
|
+
"Parent {:d} of node {:d}".format(parent, node)+\
|
|
302
|
+
" has coefficient {:f}.\n".format(a_matrix[node, parent])+\
|
|
303
|
+
"Parent {:d} of node {:d}".format(node, parent)+\
|
|
304
|
+
" has coefficient {:f}.\n".format(a_matrix[parent, node])
|
|
305
|
+
# Check if we already printed about this one
|
|
306
|
+
bad_elems[node, parent] = False
|
|
307
|
+
bad_elems[parent, node] = False
|
|
308
|
+
raise ValueError("Relationships between nodes at tau=0 are not"+\
|
|
309
|
+
" symmetric!\n"+error_message)
|
|
310
|
+
|
|
311
|
+
def _find_max_time_lag_and_node_id(parents_neighbors_coeffs):
|
|
312
|
+
"""
|
|
313
|
+
Function to find the maximum time lag in the parent-neighbors-coefficients
|
|
314
|
+
object, as well as the largest node ID
|
|
315
|
+
|
|
316
|
+
Parameters
|
|
317
|
+
----------
|
|
318
|
+
parents_neighbors_coeffs : dict
|
|
319
|
+
Dictionary of format:
|
|
320
|
+
{..., j:[((var1, lag1), coef1), ((var2, lag2), coef2), ...], ...} for
|
|
321
|
+
all variables where vars must be in [0..N-1] and lags <= 0 with number
|
|
322
|
+
of variables N.
|
|
323
|
+
|
|
324
|
+
Returns
|
|
325
|
+
-------
|
|
326
|
+
(max_time_lag, max_node_id) : tuple
|
|
327
|
+
Tuple of the maximum time lag and maximum node ID
|
|
328
|
+
"""
|
|
329
|
+
# Default maximum lag and node ID
|
|
330
|
+
max_time_lag = 0
|
|
331
|
+
max_node_id = len(parents_neighbors_coeffs.keys()) - 1
|
|
332
|
+
# Iterate through the keys in parents_neighbors_coeffs
|
|
333
|
+
for j, _, tau, _ in _iter_coeffs(parents_neighbors_coeffs):
|
|
334
|
+
# Find max lag time
|
|
335
|
+
max_time_lag = max(max_time_lag, abs(tau))
|
|
336
|
+
# Find the max node ID
|
|
337
|
+
# max_node_id = max(max_node_id, j)
|
|
338
|
+
# Return these values
|
|
339
|
+
return max_time_lag, max_node_id
|
|
340
|
+
|
|
341
|
+
def _get_true_parent_neighbor_dict(parents_neighbors_coeffs):
|
|
342
|
+
"""
|
|
343
|
+
Function to return the dictionary of true parent neighbor causal
|
|
344
|
+
connections in time.
|
|
345
|
+
|
|
346
|
+
Parameters
|
|
347
|
+
----------
|
|
348
|
+
parents_neighbors_coeffs : dict
|
|
349
|
+
Dictionary of format:
|
|
350
|
+
{..., j:[((var1, lag1), coef1), ((var2, lag2), coef2), ...], ...} for
|
|
351
|
+
all variables where vars must be in [0..N-1] and lags <= 0 with number
|
|
352
|
+
of variables N.
|
|
353
|
+
|
|
354
|
+
Returns
|
|
355
|
+
-------
|
|
356
|
+
true_parent_neighbor : dict
|
|
357
|
+
Dictionary of lists of tuples. The dictionary is keyed by node ID, the
|
|
358
|
+
list stores the tuple values (parent_node_id, time_lag)
|
|
359
|
+
"""
|
|
360
|
+
# Initialize the returned dictionary of lists
|
|
361
|
+
true_parents_neighbors = defaultdict(list)
|
|
362
|
+
for j in parents_neighbors_coeffs:
|
|
363
|
+
for link_props in parents_neighbors_coeffs[j]:
|
|
364
|
+
i, tau = link_props[0]
|
|
365
|
+
coeff = link_props[1]
|
|
366
|
+
# Add parent node id and lag if non-zero coeff
|
|
367
|
+
if coeff != 0.:
|
|
368
|
+
true_parents_neighbors[j].append((i, tau))
|
|
369
|
+
# Return the true relations
|
|
370
|
+
return true_parents_neighbors
|
|
371
|
+
|
|
372
|
+
def _get_covariance_matrix(parents_neighbors_coeffs):
|
|
373
|
+
"""
|
|
374
|
+
Determines the covariance matrix for correlated innovations
|
|
375
|
+
|
|
376
|
+
Parameters
|
|
377
|
+
----------
|
|
378
|
+
parents_neighbors_coeffs : dict
|
|
379
|
+
Dictionary of format:
|
|
380
|
+
{..., j:[((var1, lag1), coef1), ((var2, lag2), coef2), ...], ...} for
|
|
381
|
+
all variables where vars must be in [0..N-1] and lags <= 0 with number
|
|
382
|
+
of variables N.
|
|
383
|
+
|
|
384
|
+
Returns
|
|
385
|
+
-------
|
|
386
|
+
covar_matrix : numpy array
|
|
387
|
+
Covariance matrix implied by the parents_neighbors_coeffs. Used to
|
|
388
|
+
generate correlated innovations.
|
|
389
|
+
"""
|
|
390
|
+
# Get the total number of nodes
|
|
391
|
+
_, max_node_id = \
|
|
392
|
+
_find_max_time_lag_and_node_id(parents_neighbors_coeffs)
|
|
393
|
+
n_nodes = max_node_id + 1
|
|
394
|
+
# Initialize the covariance matrix
|
|
395
|
+
covar_matrix = np.identity(n_nodes)
|
|
396
|
+
# Iterate through all the node connections
|
|
397
|
+
for j, i, tau, coeff in _iter_coeffs(parents_neighbors_coeffs):
|
|
398
|
+
# Add to covar_matrix if node connection is instantaneous
|
|
399
|
+
if tau == 0:
|
|
400
|
+
covar_matrix[j, i] = coeff
|
|
401
|
+
return covar_matrix
|
|
402
|
+
|
|
403
|
+
def _get_lag_connect_matrix(parents_neighbors_coeffs):
|
|
404
|
+
"""
|
|
405
|
+
Generates the lagged connectivity matrix from a parent-neighbor
|
|
406
|
+
connectivity dictionary. Used to generate the input for _var_network
|
|
407
|
+
|
|
408
|
+
Parameters
|
|
409
|
+
----------
|
|
410
|
+
parents_neighbors_coeffs : dict
|
|
411
|
+
Dictionary of format:
|
|
412
|
+
{..., j:[((var1, lag1), coef1), ((var2, lag2), coef2), ...], ...} for
|
|
413
|
+
all variables where vars must be in [0..N-1] and lags <= 0 with number
|
|
414
|
+
of variables N.
|
|
415
|
+
|
|
416
|
+
Returns
|
|
417
|
+
-------
|
|
418
|
+
connect_matrix : numpy array
|
|
419
|
+
Lagged connectivity matrix. Shape is (n_nodes, n_nodes, max_delay+1)
|
|
420
|
+
"""
|
|
421
|
+
# Get the total number of nodes and time lag
|
|
422
|
+
max_time_lag, max_node_id = \
|
|
423
|
+
_find_max_time_lag_and_node_id(parents_neighbors_coeffs)
|
|
424
|
+
n_nodes = max_node_id + 1
|
|
425
|
+
n_times = max_time_lag + 1
|
|
426
|
+
# Initialize full time graph
|
|
427
|
+
connect_matrix = np.zeros((n_nodes, n_nodes, n_times))
|
|
428
|
+
for j, i, tau, coeff in _iter_coeffs(parents_neighbors_coeffs):
|
|
429
|
+
# If there is a non-zero time lag, add the connection to the matrix
|
|
430
|
+
if tau != 0:
|
|
431
|
+
connect_matrix[j, i, -(tau+1)] = coeff
|
|
432
|
+
# Return the connectivity matrix
|
|
433
|
+
return connect_matrix
|
|
434
|
+
|
|
435
|
+
def var_process(parents_neighbors_coeffs, T=1000, use='inv_inno_cov',
|
|
436
|
+
verbosity=0, initial_values=None):
|
|
437
|
+
"""Returns a vector-autoregressive process with correlated innovations.
|
|
438
|
+
|
|
439
|
+
Wrapper around var_network with possibly more user-friendly input options.
|
|
440
|
+
|
|
441
|
+
Parameters
|
|
442
|
+
----------
|
|
443
|
+
parents_neighbors_coeffs : dict
|
|
444
|
+
Dictionary of format: {..., j:[((var1, lag1), coef1), ((var2, lag2),
|
|
445
|
+
coef2), ...], ...} for all variables where vars must be in [0..N-1]
|
|
446
|
+
and lags <= 0 with number of variables N. If lag=0, a nonzero value
|
|
447
|
+
in the covariance matrix (or its inverse) is implied. These should be
|
|
448
|
+
the same for (i, j) and (j, i).
|
|
449
|
+
use : str, optional (default: 'inv_inno_cov')
|
|
450
|
+
Specifier, either 'inno_cov' or 'inv_inno_cov'.
|
|
451
|
+
Any other specifier will result in non-correlated noise.
|
|
452
|
+
For debugging, 'no_noise' can also be specified, in which case random
|
|
453
|
+
noise will be disabled.
|
|
454
|
+
T : int, optional (default: 1000)
|
|
455
|
+
Sample size.
|
|
456
|
+
verbosity : int, optional (default: 0)
|
|
457
|
+
Level of verbosity.
|
|
458
|
+
initial_values : array, optional (default: None)
|
|
459
|
+
Initial values for each node. Shape must be (N, max_delay+1)
|
|
460
|
+
|
|
461
|
+
Returns
|
|
462
|
+
-------
|
|
463
|
+
data : array-like
|
|
464
|
+
Data generated from this process
|
|
465
|
+
true_parent_neighbor : dict
|
|
466
|
+
Dictionary of lists of tuples. The dictionary is keyed by node ID, the
|
|
467
|
+
list stores the tuple values (parent_node_id, time_lag)
|
|
468
|
+
"""
|
|
469
|
+
# Check the input parents_neighbors_coeffs dictionary for sanity
|
|
470
|
+
_check_parent_neighbor(parents_neighbors_coeffs)
|
|
471
|
+
# Generate the true parent neighbors graph
|
|
472
|
+
true_parents_neighbors = \
|
|
473
|
+
_get_true_parent_neighbor_dict(parents_neighbors_coeffs)
|
|
474
|
+
# Generate the correlated innovations
|
|
475
|
+
innos = _get_covariance_matrix(parents_neighbors_coeffs)
|
|
476
|
+
# Generate the lagged connectivity matrix for _var_network
|
|
477
|
+
connect_matrix = _get_lag_connect_matrix(parents_neighbors_coeffs)
|
|
478
|
+
# Default values as per 'inno_cov'
|
|
479
|
+
add_noise = True
|
|
480
|
+
invert_inno = False
|
|
481
|
+
# Use the correlated innovations
|
|
482
|
+
if use == 'inno_cov':
|
|
483
|
+
if verbosity > 0:
|
|
484
|
+
print("\nInnovation Cov =\n%s" % str(innos))
|
|
485
|
+
# Use the inverted correlated innovations
|
|
486
|
+
elif use == 'inv_inno_cov':
|
|
487
|
+
invert_inno = True
|
|
488
|
+
if verbosity > 0:
|
|
489
|
+
print("\nInverse Innovation Cov =\n%s" % str(innos))
|
|
490
|
+
# Do not use any noise
|
|
491
|
+
elif use == 'no_noise':
|
|
492
|
+
add_noise = False
|
|
493
|
+
if verbosity > 0:
|
|
494
|
+
print("\nInverse Innovation Cov =\n%s" % str(innos))
|
|
495
|
+
# Use decorrelated noise
|
|
496
|
+
else:
|
|
497
|
+
innos = None
|
|
498
|
+
# Ensure the innovation matrix is symmetric if it is used
|
|
499
|
+
if (innos is not None) and add_noise:
|
|
500
|
+
_check_symmetric_relations(innos)
|
|
501
|
+
# Generate the data using _var_network
|
|
502
|
+
data = _var_network(graph=connect_matrix,
|
|
503
|
+
add_noise=add_noise,
|
|
504
|
+
inno_cov=innos,
|
|
505
|
+
invert_inno=invert_inno,
|
|
506
|
+
T=T,
|
|
507
|
+
initial_values=initial_values)
|
|
508
|
+
# Return the data
|
|
509
|
+
return data, true_parents_neighbors
|
|
510
|
+
|
|
511
|
+
class _Graph():
|
|
512
|
+
r"""Helper class to handle graph properties.
|
|
513
|
+
|
|
514
|
+
Parameters
|
|
515
|
+
----------
|
|
516
|
+
vertices : list
|
|
517
|
+
List of nodes.
|
|
518
|
+
"""
|
|
519
|
+
def __init__(self,vertices):
|
|
520
|
+
self.graph = defaultdict(list)
|
|
521
|
+
self.V = vertices
|
|
522
|
+
|
|
523
|
+
def addEdge(self,u,v):
|
|
524
|
+
"""Adding edge to graph."""
|
|
525
|
+
self.graph[u].append(v)
|
|
526
|
+
|
|
527
|
+
def isCyclicUtil(self, v, visited, recStack):
|
|
528
|
+
"""Utility function to return whether graph is cyclic."""
|
|
529
|
+
# Mark current node as visited and
|
|
530
|
+
# adds to recursion stack
|
|
531
|
+
visited[v] = True
|
|
532
|
+
recStack[v] = True
|
|
533
|
+
|
|
534
|
+
# Recur for all neighbours
|
|
535
|
+
# if any neighbour is visited and in
|
|
536
|
+
# recStack then graph is cyclic
|
|
537
|
+
for neighbour in self.graph[v]:
|
|
538
|
+
if visited[neighbour] == False:
|
|
539
|
+
if self.isCyclicUtil(neighbour, visited, recStack) == True:
|
|
540
|
+
return True
|
|
541
|
+
elif recStack[neighbour] == True:
|
|
542
|
+
return True
|
|
543
|
+
|
|
544
|
+
# The node needs to be poped from
|
|
545
|
+
# recursion stack before function ends
|
|
546
|
+
recStack[v] = False
|
|
547
|
+
return False
|
|
548
|
+
|
|
549
|
+
def isCyclic(self):
|
|
550
|
+
"""Returns whether graph is cyclic."""
|
|
551
|
+
visited = [False] * self.V
|
|
552
|
+
recStack = [False] * self.V
|
|
553
|
+
for node in range(self.V):
|
|
554
|
+
if visited[node] == False:
|
|
555
|
+
if self.isCyclicUtil(node,visited,recStack) == True:
|
|
556
|
+
return True
|
|
557
|
+
return False
|
|
558
|
+
|
|
559
|
+
def topologicalSortUtil(self,v,visited,stack):
|
|
560
|
+
"""A recursive function used by topologicalSort ."""
|
|
561
|
+
# Mark the current node as visited.
|
|
562
|
+
visited[v] = True
|
|
563
|
+
|
|
564
|
+
# Recur for all the vertices adjacent to this vertex
|
|
565
|
+
for i in self.graph[v]:
|
|
566
|
+
if visited[i] == False:
|
|
567
|
+
self.topologicalSortUtil(i,visited,stack)
|
|
568
|
+
|
|
569
|
+
# Push current vertex to stack which stores result
|
|
570
|
+
stack.insert(0,v)
|
|
571
|
+
|
|
572
|
+
def topologicalSort(self):
|
|
573
|
+
"""A sorting function. """
|
|
574
|
+
# Mark all the vertices as not visited
|
|
575
|
+
visited = [False]*self.V
|
|
576
|
+
stack =[]
|
|
577
|
+
|
|
578
|
+
# Call the recursive helper function to store Topological
|
|
579
|
+
# Sort starting from all vertices one by one
|
|
580
|
+
for i in range(self.V):
|
|
581
|
+
if visited[i] == False:
|
|
582
|
+
self.topologicalSortUtil(i, visited,stack)
|
|
583
|
+
|
|
584
|
+
return stack
|
|
585
|
+
|
|
586
|
+
def structural_causal_process_ensemble(realizations=10, ensemble_seed=None, **kwargs):
|
|
587
|
+
"""Returns an ensemble of time series generated from a structural causal process.
|
|
588
|
+
|
|
589
|
+
This adds an ensemble dimension to the output of structural_causal_process.
|
|
590
|
+
|
|
591
|
+
See docstring of structural_causal_process for details.
|
|
592
|
+
|
|
593
|
+
Parameters
|
|
594
|
+
----------
|
|
595
|
+
ensemble_seed : int, optional (default: None)
|
|
596
|
+
Random seed for entire ensemble.
|
|
597
|
+
** kwargs :
|
|
598
|
+
Arguments of structural_causal_process.
|
|
599
|
+
|
|
600
|
+
Returns
|
|
601
|
+
-------
|
|
602
|
+
data : array-like
|
|
603
|
+
Data generated from this process, shape (M, T, N).
|
|
604
|
+
nonvalid : bool
|
|
605
|
+
Indicates whether data has NaNs or infinities.
|
|
606
|
+
|
|
607
|
+
"""
|
|
608
|
+
|
|
609
|
+
nonvalid = False
|
|
610
|
+
for m in range(realizations):
|
|
611
|
+
|
|
612
|
+
# Set ensemble seed
|
|
613
|
+
if ensemble_seed is None:
|
|
614
|
+
seed_here = None
|
|
615
|
+
else:
|
|
616
|
+
seed_here = realizations * ensemble_seed + m
|
|
617
|
+
|
|
618
|
+
# Get data
|
|
619
|
+
data, nonvalid_here = structural_causal_process(seed=seed_here, **kwargs)
|
|
620
|
+
|
|
621
|
+
# Update non-validity
|
|
622
|
+
if nonvalid_here:
|
|
623
|
+
nonvalid = True
|
|
624
|
+
|
|
625
|
+
if m == 0:
|
|
626
|
+
data_ensemble = np.zeros((realizations,) + data.shape, dtype='float32')
|
|
627
|
+
|
|
628
|
+
data_ensemble[m] = data
|
|
629
|
+
|
|
630
|
+
return data_ensemble, nonvalid
|
|
631
|
+
|
|
632
|
+
def structural_causal_process(links, T, noises=None,
|
|
633
|
+
intervention=None, intervention_type='hard',
|
|
634
|
+
transient_fraction=0.2,
|
|
635
|
+
seed=None):
|
|
636
|
+
"""Returns a time series generated from a structural causal process.
|
|
637
|
+
|
|
638
|
+
Allows lagged and contemporaneous dependencies and includes the option
|
|
639
|
+
to have intervened variables or particular samples.
|
|
640
|
+
|
|
641
|
+
The interventional data is in particular useful for generating ground
|
|
642
|
+
truth for the CausalEffects class.
|
|
643
|
+
|
|
644
|
+
In more detail, the method implements a generalized additive noise model process of the form
|
|
645
|
+
|
|
646
|
+
.. math:: X^j_t = \\eta^j_t + \\sum_{X^i_{t-\\tau}\\in \\mathcal{P}(X^j_t)}
|
|
647
|
+
c^i_{\\tau} f^i_{\\tau}(X^i_{t-\\tau})
|
|
648
|
+
|
|
649
|
+
Links have the format ``{0:[((i, -tau), coeff, func),...], 1:[...],
|
|
650
|
+
...}`` where ``func`` can be an arbitrary (nonlinear) function provided
|
|
651
|
+
as a python callable with one argument and coeff is the multiplication
|
|
652
|
+
factor. The noise distributions of :math:`\\eta^j` can be specified in
|
|
653
|
+
``noises``.
|
|
654
|
+
|
|
655
|
+
Through the parameters ``intervention`` and ``intervention_type`` the model
|
|
656
|
+
can also be generated with intervened variables.
|
|
657
|
+
|
|
658
|
+
Parameters
|
|
659
|
+
----------
|
|
660
|
+
links : dict
|
|
661
|
+
Dictionary of format: {0:[((i, -tau), coeff, func),...], 1:[...],
|
|
662
|
+
...} for all variables where i must be in [0..N-1] and tau >= 0 with
|
|
663
|
+
number of variables N. coeff must be a float and func a python
|
|
664
|
+
callable of one argument.
|
|
665
|
+
T : int
|
|
666
|
+
Sample size.
|
|
667
|
+
noises : list of callables or array, optional (default: 'np.random.randn')
|
|
668
|
+
Random distribution function that is called with noises[j](T). If an array,
|
|
669
|
+
it must be of shape ((transient_fraction + 1)*T, N).
|
|
670
|
+
intervention : dict
|
|
671
|
+
Dictionary of format: {1:np.array, ...} containing only keys of intervened
|
|
672
|
+
variables with the value being the array of length T with interventional values.
|
|
673
|
+
Set values to np.nan to leave specific time points of a variable un-intervened.
|
|
674
|
+
intervention_type : str or dict
|
|
675
|
+
Dictionary of format: {1:'hard', 3:'soft', ...} to specify whether intervention is
|
|
676
|
+
hard (set value) or soft (add value) for variable j. If str, all interventions have
|
|
677
|
+
the same type.
|
|
678
|
+
transient_fraction : float
|
|
679
|
+
Added percentage of T used as a transient. In total a realization of length
|
|
680
|
+
(transient_fraction + 1)*T will be generated, but then transient_fraction*T will be
|
|
681
|
+
cut off.
|
|
682
|
+
seed : int, optional (default: None)
|
|
683
|
+
Random seed.
|
|
684
|
+
|
|
685
|
+
Returns
|
|
686
|
+
-------
|
|
687
|
+
data : array-like
|
|
688
|
+
Data generated from this process, shape (T, N).
|
|
689
|
+
nonvalid : bool
|
|
690
|
+
Indicates whether data has NaNs or infinities.
|
|
691
|
+
|
|
692
|
+
"""
|
|
693
|
+
random_state = np.random.RandomState(seed)
|
|
694
|
+
|
|
695
|
+
N = len(links.keys())
|
|
696
|
+
if noises is None:
|
|
697
|
+
noises = [random_state.randn for j in range(N)]
|
|
698
|
+
|
|
699
|
+
if N != max(links.keys())+1:
|
|
700
|
+
raise ValueError("links keys must match N.")
|
|
701
|
+
|
|
702
|
+
if isinstance(noises, np.ndarray):
|
|
703
|
+
if noises.shape != (T + int(math.floor(transient_fraction*T)), N):
|
|
704
|
+
raise ValueError("noises.shape must match ((transient_fraction + 1)*T, N).")
|
|
705
|
+
else:
|
|
706
|
+
if N != len(noises):
|
|
707
|
+
raise ValueError("noises keys must match N.")
|
|
708
|
+
|
|
709
|
+
# Check parameters
|
|
710
|
+
max_lag = 0
|
|
711
|
+
contemp_dag = _Graph(N)
|
|
712
|
+
for j in range(N):
|
|
713
|
+
for link_props in links[j]:
|
|
714
|
+
var, lag = link_props[0]
|
|
715
|
+
coeff = link_props[1]
|
|
716
|
+
func = link_props[2]
|
|
717
|
+
if lag == 0: contemp = True
|
|
718
|
+
if var not in range(N):
|
|
719
|
+
raise ValueError("var must be in 0..{}.".format(N-1))
|
|
720
|
+
if 'float' not in str(type(coeff)):
|
|
721
|
+
raise ValueError("coeff must be float.")
|
|
722
|
+
if lag > 0 or type(lag) != int:
|
|
723
|
+
raise ValueError("lag must be non-positive int.")
|
|
724
|
+
max_lag = max(max_lag, abs(lag))
|
|
725
|
+
|
|
726
|
+
# Create contemp DAG
|
|
727
|
+
if var != j and lag == 0:
|
|
728
|
+
contemp_dag.addEdge(var, j)
|
|
729
|
+
|
|
730
|
+
if contemp_dag.isCyclic() == 1:
|
|
731
|
+
raise ValueError("Contemporaneous links must not contain cycle.")
|
|
732
|
+
|
|
733
|
+
causal_order = contemp_dag.topologicalSort()
|
|
734
|
+
|
|
735
|
+
if intervention is not None:
|
|
736
|
+
if intervention_type is None:
|
|
737
|
+
intervention_type = {j:'hard' for j in intervention}
|
|
738
|
+
elif isinstance(intervention_type, str):
|
|
739
|
+
intervention_type = {j:intervention_type for j in intervention}
|
|
740
|
+
for j in intervention.keys():
|
|
741
|
+
if len(np.atleast_1d(intervention[j])) != T:
|
|
742
|
+
raise ValueError("intervention array for j=%s must be of length T = %d" %(j, T))
|
|
743
|
+
if j not in intervention_type.keys():
|
|
744
|
+
raise ValueError("intervention_type dictionary must contain entry for %s" %(j))
|
|
745
|
+
|
|
746
|
+
transient = int(math.floor(transient_fraction*T))
|
|
747
|
+
|
|
748
|
+
data = np.zeros((T+transient, N), dtype='float32')
|
|
749
|
+
for j in range(N):
|
|
750
|
+
if isinstance(noises, np.ndarray):
|
|
751
|
+
data[:, j] = noises[:, j]
|
|
752
|
+
else:
|
|
753
|
+
data[:, j] = noises[j](T+transient)
|
|
754
|
+
|
|
755
|
+
for t in range(max_lag, T+transient):
|
|
756
|
+
for j in causal_order:
|
|
757
|
+
|
|
758
|
+
if (intervention is not None and j in intervention and t >= transient
|
|
759
|
+
and np.isnan(intervention[j][t - transient]) == False):
|
|
760
|
+
if intervention_type[j] == 'hard':
|
|
761
|
+
data[t, j] = intervention[j][t - transient]
|
|
762
|
+
# Move to next j and skip link_props-loop from parents below
|
|
763
|
+
continue
|
|
764
|
+
else:
|
|
765
|
+
data[t, j] += intervention[j][t - transient]
|
|
766
|
+
|
|
767
|
+
# This loop is only entered if intervention_type != 'hard'
|
|
768
|
+
for link_props in links[j]:
|
|
769
|
+
var, lag = link_props[0]
|
|
770
|
+
coeff = link_props[1]
|
|
771
|
+
func = link_props[2]
|
|
772
|
+
data[t, j] += coeff * func(data[t + lag, var])
|
|
773
|
+
|
|
774
|
+
data = data[transient:]
|
|
775
|
+
|
|
776
|
+
nonvalid = (np.any(np.isnan(data)) or np.any(np.isinf(data)))
|
|
777
|
+
|
|
778
|
+
return data, nonvalid
|
|
779
|
+
|
|
780
|
+
def _get_minmax_lag(links):
|
|
781
|
+
"""Helper function to retrieve tau_min and tau_max from links.
|
|
782
|
+
"""
|
|
783
|
+
|
|
784
|
+
N = len(links)
|
|
785
|
+
|
|
786
|
+
# Get maximum time lag
|
|
787
|
+
min_lag = np.inf
|
|
788
|
+
max_lag = 0
|
|
789
|
+
for j in range(N):
|
|
790
|
+
for link_props in links[j]:
|
|
791
|
+
if len(link_props) > 2:
|
|
792
|
+
var, lag = link_props[0]
|
|
793
|
+
coeff = link_props[1]
|
|
794
|
+
# func = link_props[2]
|
|
795
|
+
if not isinstance(coeff, float) or coeff != 0.:
|
|
796
|
+
min_lag = min(min_lag, abs(lag))
|
|
797
|
+
max_lag = max(max_lag, abs(lag))
|
|
798
|
+
else:
|
|
799
|
+
var, lag = link_props
|
|
800
|
+
min_lag = min(min_lag, abs(lag))
|
|
801
|
+
max_lag = max(max_lag, abs(lag))
|
|
802
|
+
|
|
803
|
+
return min_lag, max_lag
|
|
804
|
+
|
|
805
|
+
def _get_parents(links, exclude_contemp=False):
|
|
806
|
+
"""Helper function to parents from links
|
|
807
|
+
"""
|
|
808
|
+
|
|
809
|
+
N = len(links)
|
|
810
|
+
|
|
811
|
+
# Get maximum time lag
|
|
812
|
+
parents = {}
|
|
813
|
+
for j in range(N):
|
|
814
|
+
parents[j] = []
|
|
815
|
+
for link_props in links[j]:
|
|
816
|
+
var, lag = link_props[0]
|
|
817
|
+
coeff = link_props[1]
|
|
818
|
+
# func = link_props[2]
|
|
819
|
+
if coeff != 0.:
|
|
820
|
+
if not (exclude_contemp and lag == 0):
|
|
821
|
+
parents[j].append((var, lag))
|
|
822
|
+
|
|
823
|
+
return parents
|
|
824
|
+
|
|
825
|
+
def _get_children(parents):
|
|
826
|
+
"""Helper function to children from parents
|
|
827
|
+
"""
|
|
828
|
+
|
|
829
|
+
N = len(parents)
|
|
830
|
+
children = dict([(j, []) for j in range(N)])
|
|
831
|
+
|
|
832
|
+
for j in range(N):
|
|
833
|
+
for par in parents[j]:
|
|
834
|
+
i, tau = par
|
|
835
|
+
children[i].append((j, abs(tau)))
|
|
836
|
+
|
|
837
|
+
return children
|
|
838
|
+
|
|
839
|
+
def links_to_graph(links, tau_max=None):
|
|
840
|
+
"""Helper function to convert dictionary of links to graph array format.
|
|
841
|
+
|
|
842
|
+
Parameters
|
|
843
|
+
---------
|
|
844
|
+
links : dict
|
|
845
|
+
Dictionary of form {0:[((0, -1), coeff, func), ...], 1:[...], ...}.
|
|
846
|
+
Also format {0:[(0, -1), ...], 1:[...], ...} is allowed.
|
|
847
|
+
tau_max : int or None
|
|
848
|
+
Maximum lag. If None, the maximum lag in links is used.
|
|
849
|
+
|
|
850
|
+
Returns
|
|
851
|
+
-------
|
|
852
|
+
graph : array of shape (N, N, tau_max+1)
|
|
853
|
+
Matrix format of graph with 1 for true links and 0 else.
|
|
854
|
+
"""
|
|
855
|
+
N = len(links)
|
|
856
|
+
|
|
857
|
+
# Get maximum time lag
|
|
858
|
+
min_lag, max_lag = _get_minmax_lag(links)
|
|
859
|
+
|
|
860
|
+
# Set maximum lag
|
|
861
|
+
if tau_max is None:
|
|
862
|
+
tau_max = max_lag
|
|
863
|
+
else:
|
|
864
|
+
if max_lag > tau_max:
|
|
865
|
+
raise ValueError("tau_max is smaller than maximum lag = %d "
|
|
866
|
+
"found in links, use tau_max=None or larger "
|
|
867
|
+
"value" % max_lag)
|
|
868
|
+
|
|
869
|
+
graph = np.zeros((N, N, tau_max + 1), dtype='<U3')
|
|
870
|
+
for j in links.keys():
|
|
871
|
+
for link_props in links[j]:
|
|
872
|
+
if len(link_props) > 2:
|
|
873
|
+
var, lag = link_props[0]
|
|
874
|
+
coeff = link_props[1]
|
|
875
|
+
if coeff != 0.:
|
|
876
|
+
graph[var, j, abs(lag)] = "-->"
|
|
877
|
+
if lag == 0:
|
|
878
|
+
graph[j, var, 0] = "<--"
|
|
879
|
+
else:
|
|
880
|
+
var, lag = link_props
|
|
881
|
+
graph[var, j, abs(lag)] = "-->"
|
|
882
|
+
if lag == 0:
|
|
883
|
+
graph[j, var, 0] = "<--"
|
|
884
|
+
|
|
885
|
+
return graph
|
|
886
|
+
|
|
887
|
+
def dag_to_links(dag):
|
|
888
|
+
"""Helper function to convert DAG graph to dictionary of parents.
|
|
889
|
+
|
|
890
|
+
Parameters
|
|
891
|
+
---------
|
|
892
|
+
dag : array of shape (N, N, tau_max+1)
|
|
893
|
+
Matrix format of graph in string format. Must be DAG.
|
|
894
|
+
|
|
895
|
+
Returns
|
|
896
|
+
-------
|
|
897
|
+
parents : dict
|
|
898
|
+
Dictionary of form {0:[(0, -1), ...], 1:[...], ...}.
|
|
899
|
+
"""
|
|
900
|
+
N = dag.shape[0]
|
|
901
|
+
|
|
902
|
+
parents = dict([(j, []) for j in range(N)])
|
|
903
|
+
|
|
904
|
+
allowed_edges = ["-->", "<--", ""]
|
|
905
|
+
if np.any(np.isin(dag, allowed_edges) == False):
|
|
906
|
+
raise ValueError("graph must be a DAG.")
|
|
907
|
+
|
|
908
|
+
for (i, j, tau) in zip(*np.where(dag=='-->')):
|
|
909
|
+
parents[j].append((i, -tau))
|
|
910
|
+
|
|
911
|
+
return parents
|
|
912
|
+
|
|
913
|
+
def generate_structural_causal_process(
|
|
914
|
+
N=2,
|
|
915
|
+
L=1,
|
|
916
|
+
dependency_funcs=['linear'],
|
|
917
|
+
dependency_coeffs=[-0.5, 0.5],
|
|
918
|
+
auto_coeffs=[0.5, 0.7],
|
|
919
|
+
contemp_fraction=0.,
|
|
920
|
+
max_lag=1,
|
|
921
|
+
noise_dists=['gaussian'],
|
|
922
|
+
noise_means=[0.],
|
|
923
|
+
noise_sigmas=[0.5, 2.],
|
|
924
|
+
noise_seed=None,
|
|
925
|
+
seed=None):
|
|
926
|
+
""""Randomly generates a structural causal process based on input characteristics.
|
|
927
|
+
|
|
928
|
+
The process has the form
|
|
929
|
+
|
|
930
|
+
.. math:: X^j_t = \\eta^j_t + a^j X^j_{t-1} + \\sum_{X^i_{t-\\tau}\\in pa(X^j_t)}
|
|
931
|
+
c^i_{\\tau} f^i_{\\tau}(X^i_{t-\\tau})
|
|
932
|
+
|
|
933
|
+
where ``j = 1, ..., N``. Here the properties of :math:`\\eta^j_t` are
|
|
934
|
+
randomly frawn from the noise parameters (see below), :math:`pa
|
|
935
|
+
(X^j_t)` are the causal parents drawn randomly such that in total ``L``
|
|
936
|
+
links occur out of which ``contemp_fraction`` are contemporaneous and
|
|
937
|
+
their time lags are drawn from ``[0 or 1..max_lag]``, the
|
|
938
|
+
coefficients :math:`c^i_{\\tau}` are drawn from
|
|
939
|
+
``dependency_coeffs``, :math:`a^j` are drawn from ``auto_coeffs``,
|
|
940
|
+
and :math:`f^i_{\\tau}` are drawn from ``dependency_funcs``.
|
|
941
|
+
|
|
942
|
+
The returned dictionary links has the format
|
|
943
|
+
``{0:[((i, -tau), coeff, func),...], 1:[...], ...}``
|
|
944
|
+
where ``func`` can be an arbitrary (nonlinear) function provided
|
|
945
|
+
as a python callable with one argument and coeff is the multiplication
|
|
946
|
+
factor. The noise distributions of :math:`\\eta^j` are returned in
|
|
947
|
+
``noises``, see specifics below.
|
|
948
|
+
|
|
949
|
+
The process might be non-stationary. In case of asymptotically linear
|
|
950
|
+
dependency functions and no contemporaneous links this can be checked with
|
|
951
|
+
``check_stationarity(...)``. Otherwise check by generating a large sample
|
|
952
|
+
and test for np.inf.
|
|
953
|
+
|
|
954
|
+
Parameters
|
|
955
|
+
---------
|
|
956
|
+
N : int
|
|
957
|
+
Number of variables.
|
|
958
|
+
L : int
|
|
959
|
+
Number of cross-links between two different variables.
|
|
960
|
+
dependency_funcs : list
|
|
961
|
+
List of callables or strings 'linear' or 'nonlinear' for a linear and a specific nonlinear function
|
|
962
|
+
that is asymptotically linear.
|
|
963
|
+
dependency_coeffs : list
|
|
964
|
+
List of floats from which the coupling coefficients are randomly drawn.
|
|
965
|
+
auto_coeffs : list
|
|
966
|
+
List of floats from which the lag-1 autodependencies are randomly drawn.
|
|
967
|
+
contemp_fraction : float [0., 1]
|
|
968
|
+
Fraction of the L links that are contemporaneous (lag zero).
|
|
969
|
+
max_lag : int
|
|
970
|
+
Maximum lag from which the time lags of links are drawn.
|
|
971
|
+
noise_dists : list
|
|
972
|
+
List of noise functions. Either in
|
|
973
|
+
{'gaussian', 'weibull', 'uniform'} or user-specified, in which case
|
|
974
|
+
it must be parametrized just by the size parameter. E.g. def beta
|
|
975
|
+
(T): return np.random.beta(a=1, b=0.5, T)
|
|
976
|
+
noise_means : list
|
|
977
|
+
Noise mean. Only used for noise in {'gaussian', 'weibull', 'uniform'}.
|
|
978
|
+
noise_sigmas : list
|
|
979
|
+
Noise standard deviation. Only used for noise in {'gaussian', 'weibull', 'uniform'}.
|
|
980
|
+
seed : int
|
|
981
|
+
Random seed to draw the above random functions from.
|
|
982
|
+
noise_seed : int
|
|
983
|
+
Random seed for noise function random generator.
|
|
984
|
+
|
|
985
|
+
Returns
|
|
986
|
+
-------
|
|
987
|
+
links : dict
|
|
988
|
+
Dictionary of form {0:[((0, -1), coeff, func), ...], 1:[...], ...}.
|
|
989
|
+
noises : list
|
|
990
|
+
List of N noise functions to call by noise(T) where T is the time series length.
|
|
991
|
+
"""
|
|
992
|
+
|
|
993
|
+
# Init random states
|
|
994
|
+
random_state = np.random.RandomState(seed)
|
|
995
|
+
random_state_noise = np.random.RandomState(noise_seed)
|
|
996
|
+
|
|
997
|
+
def linear(x): return x
|
|
998
|
+
def nonlinear(x): return (x + 5. * x**2 * np.exp(-x**2 / 20.))
|
|
999
|
+
|
|
1000
|
+
if max_lag == 0:
|
|
1001
|
+
contemp_fraction = 1.
|
|
1002
|
+
|
|
1003
|
+
if contemp_fraction > 0.:
|
|
1004
|
+
ordered_pairs = list(itertools.combinations(range(N), 2))
|
|
1005
|
+
max_poss_links = min(L, len(ordered_pairs))
|
|
1006
|
+
L_contemp = int(contemp_fraction*max_poss_links)
|
|
1007
|
+
L_lagged = max_poss_links - L_contemp
|
|
1008
|
+
else:
|
|
1009
|
+
L_lagged = L
|
|
1010
|
+
L_contemp = 0
|
|
1011
|
+
|
|
1012
|
+
# Random order
|
|
1013
|
+
causal_order = list(random_state.permutation(N))
|
|
1014
|
+
|
|
1015
|
+
# Init link dict
|
|
1016
|
+
links = dict([(i, []) for i in range(N)])
|
|
1017
|
+
|
|
1018
|
+
# Generate auto-dependencies at lag 1
|
|
1019
|
+
if max_lag > 0:
|
|
1020
|
+
for i in causal_order:
|
|
1021
|
+
a = random_state.choice(auto_coeffs)
|
|
1022
|
+
if a != 0.:
|
|
1023
|
+
links[i].append(((int(i), -1), float(a), linear))
|
|
1024
|
+
|
|
1025
|
+
# Non-cyclic contemp random pairs of links such that
|
|
1026
|
+
# index of cause < index of effect
|
|
1027
|
+
# Take up to (!) L_contemp links
|
|
1028
|
+
ordered_pairs = list(itertools.combinations(range(N), 2))
|
|
1029
|
+
random_state.shuffle(ordered_pairs)
|
|
1030
|
+
contemp_links = [(causal_order[pair[0]], causal_order[pair[1]])
|
|
1031
|
+
for pair in ordered_pairs[:L_contemp]]
|
|
1032
|
+
|
|
1033
|
+
# Possibly cyclic lagged random pairs of links
|
|
1034
|
+
# where we remove already chosen contemp links
|
|
1035
|
+
# Take up to (!) L_contemp links
|
|
1036
|
+
unordered_pairs = list(itertools.permutations(range(N), 2))
|
|
1037
|
+
unordered_pairs = list(set(unordered_pairs) - set(ordered_pairs[:L_contemp]))
|
|
1038
|
+
random_state.shuffle(unordered_pairs)
|
|
1039
|
+
lagged_links = [(causal_order[pair[0]], causal_order[pair[1]])
|
|
1040
|
+
for pair in unordered_pairs[:L_lagged]]
|
|
1041
|
+
|
|
1042
|
+
chosen_links = lagged_links + contemp_links
|
|
1043
|
+
|
|
1044
|
+
# Populate links
|
|
1045
|
+
for (i, j) in chosen_links:
|
|
1046
|
+
|
|
1047
|
+
# Choose lag
|
|
1048
|
+
if (i, j) in contemp_links:
|
|
1049
|
+
tau = 0
|
|
1050
|
+
else:
|
|
1051
|
+
tau = int(random_state.randint(1, max_lag+1))
|
|
1052
|
+
|
|
1053
|
+
# Choose dependency
|
|
1054
|
+
c = float(random_state.choice(dependency_coeffs))
|
|
1055
|
+
if c != 0:
|
|
1056
|
+
func = random_state.choice(dependency_funcs)
|
|
1057
|
+
if func == 'linear':
|
|
1058
|
+
func = linear
|
|
1059
|
+
elif func == 'nonlinear':
|
|
1060
|
+
func = nonlinear
|
|
1061
|
+
|
|
1062
|
+
links[j].append(((int(i), -tau), c, func))
|
|
1063
|
+
|
|
1064
|
+
# Now generate noise functions
|
|
1065
|
+
# Either choose among pre-defined noise types or supply your own
|
|
1066
|
+
class NoiseModel:
|
|
1067
|
+
def __init__(self, mean=0., sigma=1.):
|
|
1068
|
+
self.mean = mean
|
|
1069
|
+
self.sigma = sigma
|
|
1070
|
+
def gaussian(self, T):
|
|
1071
|
+
# Get zero-mean unit variance gaussian distribution
|
|
1072
|
+
return self.mean + self.sigma*random_state_noise.randn(T)
|
|
1073
|
+
def weibull(self, T):
|
|
1074
|
+
# Get zero-mean sigma variance weibull distribution
|
|
1075
|
+
a = 2
|
|
1076
|
+
mean = scipy.special.gamma(1./a + 1)
|
|
1077
|
+
variance = scipy.special.gamma(2./a + 1) - scipy.special.gamma(1./a + 1)**2
|
|
1078
|
+
return self.mean + self.sigma*(random_state_noise.weibull(a=a, size=T) - mean)/np.sqrt(variance)
|
|
1079
|
+
def uniform(self, T):
|
|
1080
|
+
# Get zero-mean sigma variance uniform distribution
|
|
1081
|
+
mean = 0.5
|
|
1082
|
+
variance = 1./12.
|
|
1083
|
+
return self.mean + self.sigma*(random_state_noise.uniform(size=T) - mean)/np.sqrt(variance)
|
|
1084
|
+
|
|
1085
|
+
noises = []
|
|
1086
|
+
for j in links:
|
|
1087
|
+
noise_dist = random_state.choice(noise_dists)
|
|
1088
|
+
noise_mean = random_state.choice(noise_means)
|
|
1089
|
+
noise_sigma = random_state.choice(noise_sigmas)
|
|
1090
|
+
|
|
1091
|
+
if noise_dist in ['gaussian', 'weibull', 'uniform']:
|
|
1092
|
+
noise = getattr(NoiseModel(mean = noise_mean, sigma = noise_sigma), noise_dist)
|
|
1093
|
+
else:
|
|
1094
|
+
noise = noise_dist
|
|
1095
|
+
|
|
1096
|
+
noises.append(noise)
|
|
1097
|
+
|
|
1098
|
+
return links, noises
|
|
1099
|
+
|
|
1100
|
+
def check_stationarity(links):
|
|
1101
|
+
"""Returns stationarity according to a unit root test.
|
|
1102
|
+
|
|
1103
|
+
Assumes an at least asymptotically linear vector autoregressive process
|
|
1104
|
+
without contemporaneous links.
|
|
1105
|
+
|
|
1106
|
+
Parameters
|
|
1107
|
+
---------
|
|
1108
|
+
links : dict
|
|
1109
|
+
Dictionary of form {0:[((0, -1), coeff, func), ...], 1:[...], ...}.
|
|
1110
|
+
Also format {0:[(0, -1), ...], 1:[...], ...} is allowed.
|
|
1111
|
+
|
|
1112
|
+
Returns
|
|
1113
|
+
-------
|
|
1114
|
+
stationary : bool
|
|
1115
|
+
True if VAR process is stationary.
|
|
1116
|
+
"""
|
|
1117
|
+
|
|
1118
|
+
N = len(links)
|
|
1119
|
+
# Check parameters
|
|
1120
|
+
max_lag = 0
|
|
1121
|
+
|
|
1122
|
+
for j in range(N):
|
|
1123
|
+
for link_props in links[j]:
|
|
1124
|
+
var, lag = link_props[0]
|
|
1125
|
+
# coeff = link_props[1]
|
|
1126
|
+
# coupling = link_props[2]
|
|
1127
|
+
|
|
1128
|
+
max_lag = max(max_lag, abs(lag))
|
|
1129
|
+
|
|
1130
|
+
graph = np.zeros((N,N,max_lag))
|
|
1131
|
+
couplings = []
|
|
1132
|
+
|
|
1133
|
+
for j in range(N):
|
|
1134
|
+
for link_props in links[j]:
|
|
1135
|
+
var, lag = link_props[0]
|
|
1136
|
+
coeff = link_props[1]
|
|
1137
|
+
coupling = link_props[2]
|
|
1138
|
+
if abs(lag) > 0:
|
|
1139
|
+
graph[j,var,abs(lag)-1] = coeff
|
|
1140
|
+
couplings.append(coupling)
|
|
1141
|
+
|
|
1142
|
+
stabmat = np.zeros((N*max_lag,N*max_lag))
|
|
1143
|
+
index = 0
|
|
1144
|
+
|
|
1145
|
+
for i in range(0,N*max_lag,N):
|
|
1146
|
+
stabmat[:N,i:i+N] = graph[:,:,index]
|
|
1147
|
+
if index < max_lag-1:
|
|
1148
|
+
stabmat[i+N:i+2*N,i:i+N] = np.identity(N)
|
|
1149
|
+
index += 1
|
|
1150
|
+
|
|
1151
|
+
eig = np.linalg.eig(stabmat)[0]
|
|
1152
|
+
|
|
1153
|
+
if np.all(np.abs(eig) < 1.):
|
|
1154
|
+
stationary = True
|
|
1155
|
+
else:
|
|
1156
|
+
stationary = False
|
|
1157
|
+
|
|
1158
|
+
return stationary
|
|
1159
|
+
# if len(eig) == 0:
|
|
1160
|
+
# return stationary, 0.
|
|
1161
|
+
# else:
|
|
1162
|
+
# return stationary, np.abs(eig).max()
|
|
1163
|
+
|
|
1164
|
+
class _Logger(object):
|
|
1165
|
+
"""Class to append print output to a string which can be saved"""
|
|
1166
|
+
def __init__(self):
|
|
1167
|
+
self.terminal = sys.stdout
|
|
1168
|
+
self.log = "" # open("log.dat", "a")
|
|
1169
|
+
|
|
1170
|
+
def write(self, message):
|
|
1171
|
+
self.terminal.write(message)
|
|
1172
|
+
self.log += message # .write(message)
|
|
1173
|
+
|
|
1174
|
+
|
|
1175
|
+
if __name__ == '__main__':
|
|
1176
|
+
|
|
1177
|
+
## Generate some time series from a structural causal process
|
|
1178
|
+
def lin_f(x): return x
|
|
1179
|
+
def nonlin_f(x): return (x + 5. * x**2 * np.exp(-x**2 / 20.))
|
|
1180
|
+
|
|
1181
|
+
links, noises = generate_structural_causal_process(seed=1, noise_seed=1)
|
|
1182
|
+
|
|
1183
|
+
# data, nonstat = structural_causal_process(links, seed=1,
|
|
1184
|
+
# T=10, noises=noises)
|
|
1185
|
+
|
|
1186
|
+
data, nonstat = structural_causal_process_ensemble(realizations=2, ensemble_seed=0,
|
|
1187
|
+
links=links, T=2, noises=noises)
|
|
1188
|
+
|
|
1189
|
+
print(data)
|
|
1190
|
+
print(data.shape)
|
|
1191
|
+
|
|
1192
|
+
|
|
1193
|
+
# links = {0: [((0, -1), 0.9, lin_f)],
|
|
1194
|
+
# 1: [((1, -1), 0.8, lin_f), ((0, -1), 0.3, nonlin_f)],
|
|
1195
|
+
# 2: [((2, -1), 0.7, lin_f), ((1, 0), -0.2, lin_f)],
|
|
1196
|
+
# }
|
|
1197
|
+
# noises = [np.random.randn, np.random.randn, np.random.randn]
|
|
1198
|
+
|
|
1199
|
+
# data, nonstat = structural_causal_process(links,
|
|
1200
|
+
# T=100, noises=noises)
|
|
1201
|
+
|