timeawarepc 1.2.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- timeawarepc/__init__.py +6 -0
- timeawarepc/find_cfc.py +38 -0
- timeawarepc/gc.py +31 -0
- timeawarepc/pcalg.py +1821 -0
- timeawarepc/pcalg_helpers.py +55 -0
- timeawarepc/simulate_data.py +82 -0
- timeawarepc/tpc.py +155 -0
- timeawarepc/tpc_helpers.py +129 -0
- timeawarepc/tutorial.py +60 -0
- timeawarepc-1.2.0.dist-info/METADATA +101 -0
- timeawarepc-1.2.0.dist-info/RECORD +14 -0
- timeawarepc-1.2.0.dist-info/WHEEL +6 -0
- timeawarepc-1.2.0.dist-info/licenses/LICENSE +21 -0
- timeawarepc-1.2.0.dist-info/top_level.txt +1 -0
timeawarepc/pcalg.py
ADDED
|
@@ -0,0 +1,1821 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
"""A graph generator based on the PC algorithm [Kalisch2007].
|
|
5
|
+
|
|
6
|
+
[Kalisch2007] Markus Kalisch and Peter Bhlmann. Estimating
|
|
7
|
+
high-dimensional directed acyclic graphs with the pc-algorithm. In The
|
|
8
|
+
Journal of Machine Learning Research, Vol. 8, pp. 613-636, 2007.
|
|
9
|
+
|
|
10
|
+
License: BSD
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import print_function
|
|
14
|
+
|
|
15
|
+
from itertools import combinations, permutations
|
|
16
|
+
import logging
|
|
17
|
+
import numpy as np
|
|
18
|
+
import networkx as nx
|
|
19
|
+
|
|
20
|
+
_logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
def _create_complete_graph(node_ids):
|
|
23
|
+
"""Create a complete graph from the list of node ids.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
node_ids: a list of node ids
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
An undirected graph (as a networkx.Graph)
|
|
30
|
+
"""
|
|
31
|
+
g = nx.Graph()
|
|
32
|
+
g.add_nodes_from(node_ids)
|
|
33
|
+
for (i, j) in combinations(node_ids, 2):
|
|
34
|
+
g.add_edge(i, j)
|
|
35
|
+
return g
|
|
36
|
+
|
|
37
|
+
def estimate_skeleton(indep_test_func, data_matrix, alpha, **kwargs):
|
|
38
|
+
"""Estimate a skeleton graph from the statistis information.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
indep_test_func: the function name for a conditional
|
|
42
|
+
independency test.
|
|
43
|
+
data_matrix: data (as a numpy array).
|
|
44
|
+
alpha: the significance level.
|
|
45
|
+
kwargs:
|
|
46
|
+
'max_reach': maximum value of l (see the code). The
|
|
47
|
+
value depends on the underlying distribution.
|
|
48
|
+
'method': if 'stable' given, use stable-PC algorithm
|
|
49
|
+
(see [Colombo2014]).
|
|
50
|
+
'init_graph': initial structure of skeleton graph
|
|
51
|
+
(as a networkx.Graph). If not specified,
|
|
52
|
+
a complete graph is used.
|
|
53
|
+
other parameters may be passed depending on the
|
|
54
|
+
indep_test_func()s.
|
|
55
|
+
Returns:
|
|
56
|
+
g: a skeleton graph (as a networkx.Graph).
|
|
57
|
+
sep_set: a separation set (as an 2D-array of set()).
|
|
58
|
+
|
|
59
|
+
[Colombo2014] Diego Colombo and Marloes H Maathuis. Order-independent
|
|
60
|
+
constraint-based causal structure learning. In The Journal of Machine
|
|
61
|
+
Learning Research, Vol. 15, pp. 3741-3782, 2014.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def method_stable(kwargs):
|
|
65
|
+
return ('method' in kwargs) and kwargs['method'] == "stable"
|
|
66
|
+
|
|
67
|
+
node_ids = range(data_matrix.shape[1])
|
|
68
|
+
node_size = data_matrix.shape[1]
|
|
69
|
+
sep_set = [[set() for i in range(node_size)] for j in range(node_size)]
|
|
70
|
+
if 'init_graph' in kwargs:
|
|
71
|
+
g = kwargs['init_graph']
|
|
72
|
+
if not isinstance(g, nx.Graph):
|
|
73
|
+
raise ValueError
|
|
74
|
+
elif not g.number_of_nodes() == len(node_ids):
|
|
75
|
+
raise ValueError('init_graph not matching data_matrix shape')
|
|
76
|
+
for (i, j) in combinations(node_ids, 2):
|
|
77
|
+
if (not g.has_edge(i, j)):
|
|
78
|
+
sep_set[i][j] = None
|
|
79
|
+
sep_set[j][i] = None
|
|
80
|
+
else:
|
|
81
|
+
g = _create_complete_graph(node_ids)
|
|
82
|
+
|
|
83
|
+
l = 0
|
|
84
|
+
while True:
|
|
85
|
+
cont = False
|
|
86
|
+
remove_edges = []
|
|
87
|
+
for (i, j) in permutations(node_ids, 2):
|
|
88
|
+
adj_i = list(g.neighbors(i))
|
|
89
|
+
if j not in adj_i:
|
|
90
|
+
continue
|
|
91
|
+
else:
|
|
92
|
+
adj_i.remove(j)
|
|
93
|
+
#if g.has_edge(i,j):
|
|
94
|
+
if len(adj_i) >= l:
|
|
95
|
+
_logger.debug('testing %s and %s' % (i,j))
|
|
96
|
+
_logger.debug('neighbors of %s are %s' % (i, str(adj_i)))
|
|
97
|
+
if len(adj_i) < l:
|
|
98
|
+
continue
|
|
99
|
+
for k in combinations(adj_i, l):
|
|
100
|
+
_logger.debug('indep prob of %s and %s with subset %s'
|
|
101
|
+
% (i, j, str(k)))
|
|
102
|
+
p_val = indep_test_func(data_matrix, i, j, set(k),
|
|
103
|
+
**kwargs)
|
|
104
|
+
_logger.debug('p_val is %s' % str(p_val))
|
|
105
|
+
if p_val > alpha:
|
|
106
|
+
if g.has_edge(i, j):
|
|
107
|
+
_logger.debug('p: remove edge (%s, %s)' % (i, j))
|
|
108
|
+
if method_stable(kwargs):
|
|
109
|
+
remove_edges.append((i, j))
|
|
110
|
+
else:
|
|
111
|
+
g.remove_edge(i, j)
|
|
112
|
+
sep_set[i][j] |= set(k)
|
|
113
|
+
sep_set[j][i] |= set(k)
|
|
114
|
+
break
|
|
115
|
+
cont = True
|
|
116
|
+
l += 1
|
|
117
|
+
if method_stable(kwargs):
|
|
118
|
+
g.remove_edges_from(remove_edges)
|
|
119
|
+
if cont is False:
|
|
120
|
+
break
|
|
121
|
+
if ('max_reach' in kwargs) and (l > kwargs['max_reach']):
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
return (g, sep_set)
|
|
125
|
+
|
|
126
|
+
def estimate_cpdag(skel_graph, sep_set):
|
|
127
|
+
"""Estimate a CPDAG from the skeleton graph and separation sets
|
|
128
|
+
returned by the estimate_skeleton() function.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
skel_graph: A skeleton graph (an undirected networkx.Graph).
|
|
132
|
+
sep_set: An 2D-array of separation set.
|
|
133
|
+
The contents look like something like below.
|
|
134
|
+
sep_set[i][j] = set([k, l, m])
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
An estimated DAG.
|
|
138
|
+
"""
|
|
139
|
+
dag = skel_graph.to_directed()
|
|
140
|
+
node_ids = skel_graph.nodes()
|
|
141
|
+
for (i, j) in combinations(node_ids, 2):
|
|
142
|
+
adj_i = set(dag.successors(i))
|
|
143
|
+
if j in adj_i:
|
|
144
|
+
continue
|
|
145
|
+
adj_j = set(dag.successors(j))
|
|
146
|
+
if i in adj_j:
|
|
147
|
+
continue
|
|
148
|
+
if sep_set[i][j] is None:
|
|
149
|
+
continue
|
|
150
|
+
common_k = adj_i & adj_j
|
|
151
|
+
for k in common_k:
|
|
152
|
+
if k not in sep_set[i][j]:
|
|
153
|
+
if dag.has_edge(k, i):
|
|
154
|
+
_logger.debug('S: remove edge (%s, %s)' % (k, i))
|
|
155
|
+
dag.remove_edge(k, i)
|
|
156
|
+
if dag.has_edge(k, j):
|
|
157
|
+
_logger.debug('S: remove edge (%s, %s)' % (k, j))
|
|
158
|
+
dag.remove_edge(k, j)
|
|
159
|
+
|
|
160
|
+
def _has_both_edges(dag, i, j):
|
|
161
|
+
return dag.has_edge(i, j) and dag.has_edge(j, i)
|
|
162
|
+
|
|
163
|
+
def _has_any_edge(dag, i, j):
|
|
164
|
+
return dag.has_edge(i, j) or dag.has_edge(j, i)
|
|
165
|
+
|
|
166
|
+
def _has_one_edge(dag, i, j):
|
|
167
|
+
return ((dag.has_edge(i, j) and (not dag.has_edge(j, i))) or
|
|
168
|
+
(not dag.has_edge(i, j)) and dag.has_edge(j, i))
|
|
169
|
+
|
|
170
|
+
def _has_no_edge(dag, i, j):
|
|
171
|
+
return (not dag.has_edge(i, j)) and (not dag.has_edge(j, i))
|
|
172
|
+
|
|
173
|
+
# For all the combination of nodes i and j, apply the following
|
|
174
|
+
# rules.
|
|
175
|
+
old_dag = dag.copy()
|
|
176
|
+
while True:
|
|
177
|
+
for (i, j) in combinations(node_ids, 2):
|
|
178
|
+
# Rule 1: Orient i-j into i->j whenever there is an arrow k->i
|
|
179
|
+
# such that k and j are nonadjacent.
|
|
180
|
+
#
|
|
181
|
+
# Check if i-j.
|
|
182
|
+
if _has_both_edges(dag, i, j):
|
|
183
|
+
# Look all the predecessors of i.
|
|
184
|
+
for k in dag.predecessors(i):
|
|
185
|
+
# Skip if there is an arrow i->k.
|
|
186
|
+
if dag.has_edge(i, k):
|
|
187
|
+
continue
|
|
188
|
+
# Skip if k and j are adjacent.
|
|
189
|
+
if _has_any_edge(dag, k, j):
|
|
190
|
+
continue
|
|
191
|
+
# Make i-j into i->j
|
|
192
|
+
_logger.debug('R1: remove edge (%s, %s)' % (j, i))
|
|
193
|
+
dag.remove_edge(j, i)
|
|
194
|
+
break
|
|
195
|
+
|
|
196
|
+
# Rule 2: Orient i-j into i->j whenever there is a chain
|
|
197
|
+
# i->k->j.
|
|
198
|
+
#
|
|
199
|
+
# Check if i-j.
|
|
200
|
+
if _has_both_edges(dag, i, j):
|
|
201
|
+
# Find nodes k where k is i->k.
|
|
202
|
+
succs_i = set()
|
|
203
|
+
for k in dag.successors(i):
|
|
204
|
+
if not dag.has_edge(k, i):
|
|
205
|
+
succs_i.add(k)
|
|
206
|
+
# Find nodes j where j is k->j.
|
|
207
|
+
preds_j = set()
|
|
208
|
+
for k in dag.predecessors(j):
|
|
209
|
+
if not dag.has_edge(j, k):
|
|
210
|
+
preds_j.add(k)
|
|
211
|
+
# Check if there is any node k where i->k->j.
|
|
212
|
+
if len(succs_i & preds_j) > 0:
|
|
213
|
+
# Make i-j into i->j
|
|
214
|
+
_logger.debug('R2: remove edge (%s, %s)' % (j, i))
|
|
215
|
+
dag.remove_edge(j, i)
|
|
216
|
+
|
|
217
|
+
# Rule 3: Orient i-j into i->j whenever there are two chains
|
|
218
|
+
# i-k->j and i-l->j such that k and l are nonadjacent.
|
|
219
|
+
#
|
|
220
|
+
# Check if i-j.
|
|
221
|
+
if _has_both_edges(dag, i, j):
|
|
222
|
+
# Find nodes k where i-k.
|
|
223
|
+
adj_i = set()
|
|
224
|
+
for k in dag.successors(i):
|
|
225
|
+
if dag.has_edge(k, i):
|
|
226
|
+
adj_i.add(k)
|
|
227
|
+
# For all the pairs of nodes in adj_i,
|
|
228
|
+
for (k, l) in combinations(adj_i, 2):
|
|
229
|
+
# Skip if k and l are adjacent.
|
|
230
|
+
if _has_any_edge(dag, k, l):
|
|
231
|
+
continue
|
|
232
|
+
# Skip if not k->j.
|
|
233
|
+
if dag.has_edge(j, k) or (not dag.has_edge(k, j)):
|
|
234
|
+
continue
|
|
235
|
+
# Skip if not l->j.
|
|
236
|
+
if dag.has_edge(j, l) or (not dag.has_edge(l, j)):
|
|
237
|
+
continue
|
|
238
|
+
# Make i-j into i->j.
|
|
239
|
+
_logger.debug('R3: remove edge (%s, %s)' % (j, i))
|
|
240
|
+
dag.remove_edge(j, i)
|
|
241
|
+
break
|
|
242
|
+
|
|
243
|
+
# Rule 4: Orient i-j into i->j whenever there are two chains
|
|
244
|
+
# i-k->l and k->l->j such that k and j are nonadjacent.
|
|
245
|
+
#
|
|
246
|
+
# However, this rule is not necessary when the PC-algorithm
|
|
247
|
+
# is used to estimate a DAG.
|
|
248
|
+
|
|
249
|
+
if nx.is_isomorphic(dag, old_dag):
|
|
250
|
+
break
|
|
251
|
+
old_dag = dag.copy()
|
|
252
|
+
|
|
253
|
+
return dag
|
|
254
|
+
# def pre_whiten(data,S=None):
|
|
255
|
+
# import numpy as np
|
|
256
|
+
# from scipy import stats, linalg
|
|
257
|
+
# import GPy
|
|
258
|
+
# import GPyOpt
|
|
259
|
+
# import seaborn as sns
|
|
260
|
+
# S=np.tile(np.arange(data.shape[0]),reps=[data.shape[1],1]).T
|
|
261
|
+
# sigma_f, l = 1.5, 2
|
|
262
|
+
# kernel = GPy.kern.RBF(1, sigma_f, l)
|
|
263
|
+
# p = data.shape[1]
|
|
264
|
+
# #model = GPy.models.GPRegression(X,y,kernel)
|
|
265
|
+
# #r = ndw_corr(A,B,S,data)
|
|
266
|
+
# #kernel = RBF(0.1, (10,10))
|
|
267
|
+
# #gp = gpr(kernel=kernel, n_restarts_optimizer=100, alpha = 0.04)
|
|
268
|
+
# #gp.fit(X,y)
|
|
269
|
+
# r=np.zeros(data.shape)
|
|
270
|
+
# for A in range(p):
|
|
271
|
+
# print((A,p))
|
|
272
|
+
# model_A = GPy.models.GPRegression(S[:,A].reshape((-1,1)),data[:,A].reshape((-1,1)),kernel)#
|
|
273
|
+
# model_A.optimize()#_restarts(num_restarts=20)
|
|
274
|
+
# r[:,A] = data[:,A] - model_A.predict(S[:,A].reshape((-1,1)))[0].reshape(data[:,A].shape)#model_A.predict(S[:,A].reshape(-1,1))
|
|
275
|
+
# return r
|
|
276
|
+
|
|
277
|
+
# def ci_test_gp(data,A,B,C,**kwargs):
|
|
278
|
+
# #use pre-whitened data in data.
|
|
279
|
+
# import numpy as np
|
|
280
|
+
# from scipy import stats, linalg
|
|
281
|
+
# import GPy
|
|
282
|
+
# import GPyOpt
|
|
283
|
+
# import seaborn as sns
|
|
284
|
+
# from HSIC import hsic_gam
|
|
285
|
+
# S=np.tile(np.arange(data.shape[0]),reps=[data.shape[1],1]).T
|
|
286
|
+
# sigma_f, l = 1.5, 2
|
|
287
|
+
# kernel = GPy.kern.RBF(1, sigma_f, l)
|
|
288
|
+
# p = data.shape[1]
|
|
289
|
+
# #model = GPy.models.GPRegression(X,y,kernel)
|
|
290
|
+
# C2 = np.zeros(p, dtype=np.bool)
|
|
291
|
+
# for i in range(p):
|
|
292
|
+
# if i in C:
|
|
293
|
+
# C2[i]=True
|
|
294
|
+
# #r = ndw_corr(A,B,S,data)
|
|
295
|
+
|
|
296
|
+
# # if whiten == True:
|
|
297
|
+
# # r = pre_whiten(data)
|
|
298
|
+
# # else:
|
|
299
|
+
# # r=data
|
|
300
|
+
# #print(r)
|
|
301
|
+
# # if r==1:
|
|
302
|
+
# # pval = 0
|
|
303
|
+
# # else:
|
|
304
|
+
# # z = 0.5 * np.log((1+r)/(1-r))
|
|
305
|
+
# # T = np.sqrt(data.shape[0]-len(S)-3)*np.abs(z)
|
|
306
|
+
# # pval = 2*(1 - stats.norm.cdf(T))
|
|
307
|
+
# if len(C) != 0:
|
|
308
|
+
# if len(C)>1:
|
|
309
|
+
# model_A = GPy.models.GPRegression(data[:,C2],data[:,A].reshape((-1,1)),kernel)#
|
|
310
|
+
# model_B = GPy.models.GPRegression(data[:,C2],data[:,B].reshape((-1,1)),kernel)
|
|
311
|
+
# if len(C)==1:
|
|
312
|
+
# model_A = GPy.models.GPRegression(data[:,C2].reshape((-1,1)),data[:,A].reshape((-1,1)),kernel)
|
|
313
|
+
# model_B = GPy.models.GPRegression(data[:,C2].reshape((-1,1)),data[:,B].reshape((-1,1)),kernel)
|
|
314
|
+
# model_A.optimize()#_restarts(num_restarts=20,verbose=False);
|
|
315
|
+
# model_B.optimize()
|
|
316
|
+
# rA = data[:,A] - model_A.predict(data[:,C2])[0].reshape(data[:,A].shape)#model_A.predict(S[:,A].reshape(-1,1))
|
|
317
|
+
# rB = data[:,B] - model_B.predict(data[:,C2])[0].reshape(data[:,B].shape)#model_A.predict(S[:,A].reshape(-1,1))
|
|
318
|
+
# else:
|
|
319
|
+
# rA = data[:,A]
|
|
320
|
+
# rB = data[:,B]
|
|
321
|
+
# pval = hsic_gam(rA.reshape((-1,1)),rB.reshape((-1,1)))
|
|
322
|
+
# return pval
|
|
323
|
+
|
|
324
|
+
def ci_test_gauss(data,A,B,S,**kwargs):
|
|
325
|
+
import numpy as np
|
|
326
|
+
from scipy import stats, linalg
|
|
327
|
+
r = partial_corr(A,B,S,data)
|
|
328
|
+
#print(r)
|
|
329
|
+
if r==1:
|
|
330
|
+
pval = 0
|
|
331
|
+
else:
|
|
332
|
+
z = 0.5 * np.log((1+r)/(1-r))
|
|
333
|
+
T = np.sqrt(data.shape[0]-len(S)-3)*np.abs(z)
|
|
334
|
+
pval = 2*(1 - stats.norm.cdf(T))
|
|
335
|
+
return pval
|
|
336
|
+
def ci_test_gauss_btp(data,A,B,S,**kwargs):
|
|
337
|
+
import numpy as np
|
|
338
|
+
from scipy import stats, linalg
|
|
339
|
+
from arch import bootstrap
|
|
340
|
+
#import stationarybootstrap as SBB
|
|
341
|
+
from numpy.random import RandomState
|
|
342
|
+
r = partial_corr(A,B,S,data)
|
|
343
|
+
#print(r)
|
|
344
|
+
if r==1:
|
|
345
|
+
pval = 0
|
|
346
|
+
else:
|
|
347
|
+
z = 0.5 * np.log((1+r)/(1-r))
|
|
348
|
+
T = np.abs(z)
|
|
349
|
+
band = bootstrap.optimal_block_length(data)
|
|
350
|
+
n = data.shape[0]
|
|
351
|
+
p = data.shape[1]
|
|
352
|
+
nbtp = 50
|
|
353
|
+
Tbtp = np.zeros(nbtp)
|
|
354
|
+
idx=0
|
|
355
|
+
bs = bootstrap.StationaryBootstrap(np.median(band.iloc[:,0]),data)
|
|
356
|
+
#bs = bootstrap.StationaryBootstrap(50,data)
|
|
357
|
+
#bs = bootstrap.CircularBlockBootstrap(50,data)
|
|
358
|
+
#for data1 in bs.bootstrap(nbtp):
|
|
359
|
+
#ystar, yindices, yindicedict = SBB.resample(data, 0.04)
|
|
360
|
+
#for idx1 in range(50):
|
|
361
|
+
# data1, yindices, yindicedict = SBB.resample(data, 0.04)# = ystar[idx1,:,:]
|
|
362
|
+
for data1 in bs.bootstrap(nbtp):
|
|
363
|
+
rbtp = partial_corr(A,B,S,data1[0][0])
|
|
364
|
+
zbtp = 0.5 * np.log((1+rbtp)/(1-rbtp))
|
|
365
|
+
Tbtp[idx] = np.abs(zbtp)
|
|
366
|
+
idx=idx+1
|
|
367
|
+
# blower = np.quantile(Tbtp,alpha/2)-T
|
|
368
|
+
# bupper = np.quantile(Tbtp,1-alpha/2)-T
|
|
369
|
+
# T-bupper
|
|
370
|
+
# T+blower
|
|
371
|
+
# 2*T-np.quantile(Tbtp,1-alpha/2)
|
|
372
|
+
pval = np.sum(Tbtp>2*T)/nbtp
|
|
373
|
+
#print(pval)
|
|
374
|
+
return pval
|
|
375
|
+
def hsic_condind(data,A,B,S,**kwargs):
|
|
376
|
+
import pandas as pd
|
|
377
|
+
from hsiccondTestIC import hsic_CI
|
|
378
|
+
#from pcalg import estimate_skeleton, estimate_cpdag, causaleff_ida, ci_test_gauss
|
|
379
|
+
if len(S) == 0:
|
|
380
|
+
X=data[:,A]
|
|
381
|
+
Y=data[:,B]
|
|
382
|
+
pval=hsic_CI(X,Y)
|
|
383
|
+
# kpc(**{'suffStat' : rlc.TaggedList((df,"hsic.perm"),tags=('data','ic.method')),
|
|
384
|
+
# 'indepTest' : kpcalg.kernelCItest,
|
|
385
|
+
# 'alpha' : alpha,
|
|
386
|
+
# 'labels' : data_trans_pd.columns.astype(str),
|
|
387
|
+
# 'u2pd' : "relaxed",
|
|
388
|
+
# 'skel.method' : "stable",
|
|
389
|
+
# #'fixedGaps' : fixedgaps_r,
|
|
390
|
+
# 'verbose' : robjects.r('F')})
|
|
391
|
+
else:
|
|
392
|
+
p = data.shape[1]
|
|
393
|
+
idx = np.zeros(p, dtype=bool)
|
|
394
|
+
for i in range(p):
|
|
395
|
+
if i in S:
|
|
396
|
+
idx[i]=True
|
|
397
|
+
X=data[:,A]
|
|
398
|
+
Y=data[:,B]
|
|
399
|
+
Z=data[:,idx]
|
|
400
|
+
pval = hsic_CI(X,Y,Z)
|
|
401
|
+
return pval
|
|
402
|
+
# if len(S) == 0:
|
|
403
|
+
# sig,pval,T=hsiccondTestIC(data[:,A],data[:,B])
|
|
404
|
+
# else:
|
|
405
|
+
# p = data.shape[1]
|
|
406
|
+
# idx = np.zeros(p, dtype=np.bool)
|
|
407
|
+
# for i in range(p):
|
|
408
|
+
# if i in S:
|
|
409
|
+
# idx[i]=True
|
|
410
|
+
# sig,pval,T=hsiccondTestIC(data[:,A],data[:,B],data[:,idx])
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def partial_corr(A,B,S,data):
|
|
414
|
+
import numpy as np
|
|
415
|
+
from scipy import stats, linalg
|
|
416
|
+
p = data.shape[1]
|
|
417
|
+
idx = np.zeros(p, dtype=bool)
|
|
418
|
+
|
|
419
|
+
for i in range(p):
|
|
420
|
+
if i in S:
|
|
421
|
+
idx[i]=True
|
|
422
|
+
C=data
|
|
423
|
+
beta_A = linalg.lstsq(C[:,idx], C[:,A])[0]
|
|
424
|
+
beta_B = linalg.lstsq(C[:,idx], C[:,B])[0]
|
|
425
|
+
|
|
426
|
+
res_A = C[:,A] - C[:, idx].dot(beta_A)
|
|
427
|
+
res_B = C[:,B] - C[:, idx].dot(beta_B)
|
|
428
|
+
|
|
429
|
+
p_corr = stats.pearsonr(res_A, res_B)[0]
|
|
430
|
+
|
|
431
|
+
return p_corr
|
|
432
|
+
if __name__ == '__main__':
|
|
433
|
+
import networkx as nx
|
|
434
|
+
import numpy as np
|
|
435
|
+
|
|
436
|
+
from gsq.ci_tests import ci_test_bin, ci_test_dis
|
|
437
|
+
from gsq.gsq_testdata import bin_data, dis_data
|
|
438
|
+
|
|
439
|
+
# ch = logging.StreamHandler()
|
|
440
|
+
# ch.setLevel(logging.DEBUG)
|
|
441
|
+
# _logger.setLevel(logging.DEBUG)
|
|
442
|
+
# _logger.addHandler(ch)
|
|
443
|
+
|
|
444
|
+
dm = np.array(bin_data).reshape((5000, 5))
|
|
445
|
+
(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_bin,
|
|
446
|
+
data_matrix=dm,
|
|
447
|
+
alpha=0.01)
|
|
448
|
+
g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
449
|
+
g_answer = nx.DiGraph()
|
|
450
|
+
g_answer.add_nodes_from([0, 1, 2, 3, 4])
|
|
451
|
+
g_answer.add_edges_from([(0, 1), (2, 3), (3, 2), (3, 1),
|
|
452
|
+
(2, 4), (4, 2), (4, 1)])
|
|
453
|
+
print('Edges are:', g.edges(), end='')
|
|
454
|
+
if nx.is_isomorphic(g, g_answer):
|
|
455
|
+
print(' => GOOD')
|
|
456
|
+
else:
|
|
457
|
+
print(' => WRONG')
|
|
458
|
+
print('True edges should be:', g_answer.edges())
|
|
459
|
+
|
|
460
|
+
dm = np.array(dis_data).reshape((10000, 5))
|
|
461
|
+
(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_dis,
|
|
462
|
+
data_matrix=dm,
|
|
463
|
+
alpha=0.01,
|
|
464
|
+
levels=[3,2,3,4,2])
|
|
465
|
+
g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
466
|
+
g_answer = nx.DiGraph()
|
|
467
|
+
g_answer.add_nodes_from([0, 1, 2, 3, 4])
|
|
468
|
+
g_answer.add_edges_from([(0, 2), (1, 2), (1, 3), (4, 3)])
|
|
469
|
+
print('Edges are:', g.edges(), end='')
|
|
470
|
+
if nx.is_isomorphic(g, g_answer):
|
|
471
|
+
print(' => GOOD')
|
|
472
|
+
else:
|
|
473
|
+
print(' => WRONG')
|
|
474
|
+
print('True edges should be:', g_answer.edges())
|
|
475
|
+
|
|
476
|
+
dm1 = np.random.normal(0,1,1000)
|
|
477
|
+
dm2 = np.random.normal(0,1,1000)
|
|
478
|
+
dm3 = dm1 + 0.5*dm2 + np.random.normal(0,1,1000)
|
|
479
|
+
dm4 = dm3 + np.random.normal(0,1,1000)
|
|
480
|
+
|
|
481
|
+
data=np.column_stack((dm1,dm2,dm3,dm4))
|
|
482
|
+
data -= data.mean(axis=0)
|
|
483
|
+
data /= data.std(axis=0)
|
|
484
|
+
# (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
485
|
+
# data_matrix=data,
|
|
486
|
+
# alpha=0.01,
|
|
487
|
+
# method='stable')
|
|
488
|
+
(g, sep_set) = estimate_skeleton(indep_test_func=hsic_condind,
|
|
489
|
+
data_matrix=data,
|
|
490
|
+
alpha=0.01,
|
|
491
|
+
method='stable')
|
|
492
|
+
g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
493
|
+
g_answer = nx.DiGraph()
|
|
494
|
+
g_answer.add_nodes_from([0, 1, 2, 3])
|
|
495
|
+
g_answer.add_edges_from([(0, 2), (1, 2), (2, 3)])
|
|
496
|
+
print('Edges are:', g.edges(), end='')
|
|
497
|
+
if nx.is_isomorphic(g, g_answer):
|
|
498
|
+
print(' => GOOD')
|
|
499
|
+
else:
|
|
500
|
+
print(' => WRONG')
|
|
501
|
+
print('True edges should be:', g_answer.edges())
|
|
502
|
+
#%%
|
|
503
|
+
# def cmiknn_indeptest(data,A,B,S,**kwargs):
|
|
504
|
+
# from tigramite.independence_tests import CMIknn
|
|
505
|
+
# cmi_knn = CMIknn(significance='shuffle_test', knn=0.1, shuffle_neighbors=5, transform='ranks')
|
|
506
|
+
# if len(S)==0:
|
|
507
|
+
# data_x = data[:,A]
|
|
508
|
+
# data_y = data[:,B]
|
|
509
|
+
# arr = np.row_stack((data_x,data_y))
|
|
510
|
+
# xyz = np.array([0,1])
|
|
511
|
+
# val = cmi_knn.get_dependence_measure(arr,xyz)
|
|
512
|
+
# p = cmi_knn.get_shuffle_significance(arr,xyz,val)
|
|
513
|
+
# else:
|
|
514
|
+
# data_x = data[:,A]
|
|
515
|
+
# data_y = data[:,B]
|
|
516
|
+
# print(S)
|
|
517
|
+
# data_z = data[:,list(S)].T
|
|
518
|
+
# arr = np.row_stack((data_x,data_y,data_z))
|
|
519
|
+
# xyz = np.array([0,1]+[2]*data_z.shape[0])
|
|
520
|
+
# val = cmi_knn.get_dependence_measure(arr,xyz)
|
|
521
|
+
# p = cmi_knn.get_shuffle_significance(arr,xyz,val)
|
|
522
|
+
# return p
|
|
523
|
+
# #%%
|
|
524
|
+
# dm1 = np.random.normal(0,1,1000)
|
|
525
|
+
# dm2 = np.random.normal(0,1,1000)
|
|
526
|
+
# dm3 = dm1 + 0.5*dm2 + np.random.normal(0,1,1000)
|
|
527
|
+
# dm4 = dm3 + np.random.normal(0,1,1000)
|
|
528
|
+
|
|
529
|
+
# dm=np.column_stack((dm1,dm2,dm3,dm4))
|
|
530
|
+
# (g, sep_set) = estimate_skeleton(indep_test_func=cmiknn_indeptest,
|
|
531
|
+
# data_matrix=dm,
|
|
532
|
+
# alpha=0.01,
|
|
533
|
+
# method='stable')
|
|
534
|
+
# g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
535
|
+
# g_answer = nx.DiGraph()
|
|
536
|
+
# g_answer.add_nodes_from([0, 1, 2, 3])
|
|
537
|
+
# g_answer.add_edges_from([(0, 2), (1, 2), (2, 3)])
|
|
538
|
+
# print('Edges are:', g.edges(), end='')
|
|
539
|
+
# if nx.is_isomorphic(g, g_answer):
|
|
540
|
+
# print(' => GOOD')
|
|
541
|
+
# else:
|
|
542
|
+
# print(' => WRONG')
|
|
543
|
+
# print('True edges should be:', g_answer.edges())
|
|
544
|
+
#%%
|
|
545
|
+
def causaleff_parcorr(g,data):
|
|
546
|
+
import numpy as np
|
|
547
|
+
Edges = list(g.edges)
|
|
548
|
+
Nodes = list(g.nodes)
|
|
549
|
+
causaleff=np.zeros((len(Nodes),len(Nodes)))
|
|
550
|
+
for x in Nodes:
|
|
551
|
+
for y in Nodes:
|
|
552
|
+
if x !=y:
|
|
553
|
+
S= [elem for elem in Nodes if elem not in [x,y]]
|
|
554
|
+
causaleff[x,y] = partial_corr(x,y,S,data)
|
|
555
|
+
return causaleff
|
|
556
|
+
def causaleff_ida_single(g,data,x,y):
|
|
557
|
+
from sklearn import linear_model
|
|
558
|
+
import numpy as np
|
|
559
|
+
Nodes = list(g.nodes)
|
|
560
|
+
if x in Nodes and y in Nodes:
|
|
561
|
+
if x!=y and x in list(nx.ancestors(g,y)):
|
|
562
|
+
lm = linear_model.LinearRegression()
|
|
563
|
+
pa_x = list(g.predecessors(x))
|
|
564
|
+
pa_y = list(g.predecessors(y))
|
|
565
|
+
if x not in pa_x:
|
|
566
|
+
regressors = pa_x + [x]
|
|
567
|
+
else:
|
|
568
|
+
regressors = pa_x
|
|
569
|
+
if y in pa_x:
|
|
570
|
+
causaleff = 0
|
|
571
|
+
else:
|
|
572
|
+
#if len(regressors)>1:
|
|
573
|
+
X=data[:,regressors]
|
|
574
|
+
#else:
|
|
575
|
+
# X=data[:,regressors].reshape(-1,1)
|
|
576
|
+
Y=data[:,y]
|
|
577
|
+
lm_out = lm.fit(X,Y)
|
|
578
|
+
causaleff = lm_out.coef_[regressors.index(x)]#lm_out.coef_[0]#lm_out.coef_[regressors.index(x)]#
|
|
579
|
+
return causaleff
|
|
580
|
+
# def causaleff_ida(g,data):
|
|
581
|
+
# from sklearn import linear_model
|
|
582
|
+
# import numpy as np
|
|
583
|
+
# #from gen_data_fns import sigmoid, relu
|
|
584
|
+
# #Edges = list(g.edges)
|
|
585
|
+
# Nodes = list(g.nodes)
|
|
586
|
+
# causaleff=np.zeros((len(Nodes),len(Nodes)))
|
|
587
|
+
# # if transformed == True and lag is None:
|
|
588
|
+
# # print("Please provide lag used in transformation")
|
|
589
|
+
# # elif transformed == True and lag is not None:
|
|
590
|
+
# # h=np.repeat(np.arange(0,len(Nodes)),lag)
|
|
591
|
+
# # if activation == 'centred-sigmoid':
|
|
592
|
+
# # activationfn = lambda x: sigmoid(x) - 0.5
|
|
593
|
+
# # elif activation == 'tanh':
|
|
594
|
+
# # activationfn = lambda x: np.tanh(x)
|
|
595
|
+
# # elif activation == 'linear':
|
|
596
|
+
# # activationfn = lambda x: x
|
|
597
|
+
# # elif activation == 'relu':
|
|
598
|
+
# # activationfn = lambda x: relu(x)
|
|
599
|
+
# for x in Nodes:
|
|
600
|
+
# for y in Nodes:
|
|
601
|
+
# if x!=y and x in list(nx.ancestors(g,y)):#list(g.predecessors(y)):#list(nx.ancestors(g,y)):
|
|
602
|
+
# #if (x,y) in Edges:
|
|
603
|
+
# #if y not in list(g.predecessors(x)):
|
|
604
|
+
# #if x in list(nx.ancestors(g,y)):
|
|
605
|
+
# lm = linear_model.LinearRegression()
|
|
606
|
+
# pa_x = list(g.predecessors(x))
|
|
607
|
+
# pa_y = list(g.predecessors(y))
|
|
608
|
+
# if x not in pa_x:
|
|
609
|
+
# regressors = pa_x + [x]
|
|
610
|
+
# else:
|
|
611
|
+
# regressors = pa_x
|
|
612
|
+
# if y in pa_x:
|
|
613
|
+
# causaleff[x,y] = 0
|
|
614
|
+
# else:
|
|
615
|
+
# # if y in pa_y:
|
|
616
|
+
# # pa_y = pa_y.pop(y)
|
|
617
|
+
# # if x not in pa_y:
|
|
618
|
+
# # regressors = pa_y + [x]
|
|
619
|
+
# # else:
|
|
620
|
+
# # regressors = pa_y
|
|
621
|
+
# # regressors=[x] + pa_x
|
|
622
|
+
# #if x in pa_x:
|
|
623
|
+
# # print("x in pa_x")
|
|
624
|
+
# X=data[:,regressors]#.reshape(-1,1)
|
|
625
|
+
# # if transformed == True and lag is not None:
|
|
626
|
+
# # for iter in range(X.shape[1]):
|
|
627
|
+
# # if h(regressors[iter]) != h(y):
|
|
628
|
+
# # X[iter] = activationfn(X[iter])
|
|
629
|
+
# Y=data[:,y]
|
|
630
|
+
# lm_out = lm.fit(X,Y)
|
|
631
|
+
# causaleff[x,y] = lm_out.coef_[regressors.index(x)]#lm_out.coef_[0]#lm_out.coef_[regressors.index(x)]#
|
|
632
|
+
# # if causaleff[x,y]>0:
|
|
633
|
+
# # causaleff[x,y] = np.log(causaleff[x,y]+1)
|
|
634
|
+
# # else:
|
|
635
|
+
# # causaleff[x,y] = -np.log(-causaleff[x,y]+1)
|
|
636
|
+
# return causaleff
|
|
637
|
+
def causaleff_ida(g,data):
|
|
638
|
+
#from sklearn import linear_model
|
|
639
|
+
#import statsmodels.api as sm
|
|
640
|
+
import numpy as np
|
|
641
|
+
#from gen_data_fns import sigmoid, relu
|
|
642
|
+
#Edges = list(g.edges)
|
|
643
|
+
Nodes = list(g.nodes)
|
|
644
|
+
causaleff=np.zeros((len(Nodes),len(Nodes)))
|
|
645
|
+
|
|
646
|
+
for x in Nodes:
|
|
647
|
+
for y in Nodes:
|
|
648
|
+
if x!=y:# and x in list(nx.ancestors(g,y)):#list(g.predecessors(y)):#list(nx.ancestors(g,y)):
|
|
649
|
+
#if (x,y) in Edges:
|
|
650
|
+
#if y not in list(g.predecessors(x)):
|
|
651
|
+
#if x in list(nx.ancestors(g,y)):
|
|
652
|
+
#lm = linear_model.LinearRegression()
|
|
653
|
+
pa_x = list(g.predecessors(x))
|
|
654
|
+
pa_y = list(g.predecessors(y))
|
|
655
|
+
if x not in pa_x:
|
|
656
|
+
regressors = pa_x + [x]
|
|
657
|
+
else:
|
|
658
|
+
regressors = pa_x
|
|
659
|
+
if y in pa_x:
|
|
660
|
+
causaleff[x,y] = 0
|
|
661
|
+
else:
|
|
662
|
+
X=np.asarray(data[:,regressors])
|
|
663
|
+
Y=np.asarray(data[:,y])
|
|
664
|
+
X0=np.hstack((np.ones((X.shape[0],1)),X))
|
|
665
|
+
lm_out = np.linalg.lstsq(X0,Y,rcond=None)[0]
|
|
666
|
+
causaleff[x,y] = lm_out[regressors.index(x)+1]
|
|
667
|
+
return causaleff
|
|
668
|
+
def causaleff_ida_fin(g,data):
|
|
669
|
+
#from sklearn import linear_model
|
|
670
|
+
#import statsmodels.api as sm
|
|
671
|
+
import numpy as np
|
|
672
|
+
#from gen_data_fns import sigmoid, relu
|
|
673
|
+
#Edges = list(g.edges)
|
|
674
|
+
Nodes = list(g.nodes)
|
|
675
|
+
causaleff=np.zeros((len(Nodes),len(Nodes)))
|
|
676
|
+
|
|
677
|
+
for x in Nodes:
|
|
678
|
+
for y in Nodes:
|
|
679
|
+
if x!=y and x in list(nx.ancestors(g,y)):#list(g.predecessors(y)):#list(nx.ancestors(g,y)):
|
|
680
|
+
pa_x = list(g.predecessors(x))
|
|
681
|
+
pa_y = list(g.predecessors(y))
|
|
682
|
+
if x not in pa_x:
|
|
683
|
+
regressors = [Nodes.index(i) for i in pa_x] + [Nodes.index(x)]
|
|
684
|
+
else:
|
|
685
|
+
regressors = [Nodes.index(i) for i in pa_x]#pa_x
|
|
686
|
+
if y in pa_x:
|
|
687
|
+
causaleff[Nodes.index(x),Nodes.index(y)] = 0
|
|
688
|
+
else:
|
|
689
|
+
X=np.asarray(data[:,regressors])
|
|
690
|
+
Y=np.asarray(data[:,Nodes.index(y)])
|
|
691
|
+
X0=np.hstack((np.ones((X.shape[0],1)),X))
|
|
692
|
+
lm_out = np.linalg.lstsq(X0,Y,rcond=None)[0]#lm_out = np.linalg.inv(X0.T @ X0) @ X0.T @ Y#model.fit()
|
|
693
|
+
#causaleff[x,y] = lm_out.coef_[regressors.index(x)]
|
|
694
|
+
causaleff[Nodes.index(x),Nodes.index(y)] = lm_out[regressors.index(Nodes.index(x))+1]
|
|
695
|
+
return causaleff
|
|
696
|
+
def causaleff_ida_pconly(g,data,transformed=True,lag=None):
|
|
697
|
+
from sklearn import linear_model
|
|
698
|
+
import numpy as np
|
|
699
|
+
from gen_data_fns import sigmoid, relu
|
|
700
|
+
Edges = list(g.edges)
|
|
701
|
+
Nodes = list(g.nodes)
|
|
702
|
+
causaleff=np.zeros((len(Nodes),len(Nodes)))
|
|
703
|
+
# if transformed == True and lag is None:
|
|
704
|
+
# print("Please provide lag used in transformation")
|
|
705
|
+
# elif transformed == True and lag is not None:
|
|
706
|
+
# h=np.repeat(np.arange(0,len(Nodes)),lag)
|
|
707
|
+
# if activation == 'centred-sigmoid':
|
|
708
|
+
# activationfn = lambda x: sigmoid(x) - 0.5
|
|
709
|
+
# elif activation == 'tanh':
|
|
710
|
+
# activationfn = lambda x: np.tanh(x)
|
|
711
|
+
# elif activation == 'linear':
|
|
712
|
+
# activationfn = lambda x: x
|
|
713
|
+
# elif activation == 'relu':
|
|
714
|
+
# activationfn = lambda x: relu(x)
|
|
715
|
+
for x in Nodes:
|
|
716
|
+
for y in Nodes:
|
|
717
|
+
#if x<=y:
|
|
718
|
+
if x!=y and x in list(g.predecessors(y)):
|
|
719
|
+
#if (x,y) in Edges:
|
|
720
|
+
#if y not in list(g.predecessors(x)):
|
|
721
|
+
#if x in list(nx.ancestors(g,y)):
|
|
722
|
+
lm = linear_model.LinearRegression()
|
|
723
|
+
pa_x = list(g.predecessors(x))
|
|
724
|
+
# pa_y = list(g.predecessors(y))
|
|
725
|
+
# # if x not in pa_x:
|
|
726
|
+
# # regressors = pa_x + [x]
|
|
727
|
+
# # else:
|
|
728
|
+
# # regressors = pa_x
|
|
729
|
+
# if y in pa_y:
|
|
730
|
+
# pa_y = pa_y.pop(y)
|
|
731
|
+
# if x not in pa_y:
|
|
732
|
+
# regressors = pa_y + [x]
|
|
733
|
+
# else:
|
|
734
|
+
# regressors = pa_y
|
|
735
|
+
regressors=[x] + pa_x
|
|
736
|
+
X=data[:,regressors]#.reshape(-1,1)
|
|
737
|
+
# if transformed == True and lag is not None:
|
|
738
|
+
# for iter in range(X.shape[1]):
|
|
739
|
+
# if h(regressors[iter]) != h(y):
|
|
740
|
+
# X[iter] = activationfn(X[iter])
|
|
741
|
+
Y=data[:,y]
|
|
742
|
+
lm_out = lm.fit(X,Y)
|
|
743
|
+
causaleff[x,y] = lm_out.coef_[regressors.index(x)]#lm_out.coef_[0]#lm_out.coef_[regressors.index(x)]#
|
|
744
|
+
# if causaleff[x,y]>0:
|
|
745
|
+
# causaleff[x,y] = np.log(causaleff[x,y]+1)
|
|
746
|
+
# else:
|
|
747
|
+
# causaleff[x,y] = -np.log(-causaleff[x,y]+1)
|
|
748
|
+
return causaleff
|
|
749
|
+
# %%
|
|
750
|
+
|
|
751
|
+
# def pc_plot_out(lag,alpha,motif,n_ctrnn,n_samp,m,w,tau):
|
|
752
|
+
# import networkx as nx
|
|
753
|
+
# import matplotlib.pyplot as plt
|
|
754
|
+
# from gen_data_fns import create_dataset4, plot_matrix
|
|
755
|
+
# import numpy as np
|
|
756
|
+
# from pcalg import causaleff_ida
|
|
757
|
+
# dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
|
|
758
|
+
# for i in range(n_samp - 1):
|
|
759
|
+
# dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
|
|
760
|
+
# dataset2=data_transformed(dataset, lag)
|
|
761
|
+
# (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
762
|
+
# data_matrix=dataset2,
|
|
763
|
+
# alpha=alpha,method='stable')
|
|
764
|
+
# g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
765
|
+
# causaleff = causaleff_ida(g,dataset2)
|
|
766
|
+
# g1, causaleff1 = return_finaledges(g,causaleff,lag,m)
|
|
767
|
+
# causaleff2 = causaleff1/np.max(np.abs(causaleff1))
|
|
768
|
+
# plot_matrix(causaleff2,motif,'pc_causaleff')
|
|
769
|
+
# fig, ax = plt.subplots(1,1,figsize=(10,10))
|
|
770
|
+
# nx.draw(g1, with_labels= True,ax=ax)
|
|
771
|
+
# fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
|
|
772
|
+
# return g1,causaleff2
|
|
773
|
+
def pc_original_bootstrap_plot(alpha,motif,n_ctrnn,n_samp,m,w,tau,niter=50):
|
|
774
|
+
import networkx as nx
|
|
775
|
+
import matplotlib.pyplot as plt
|
|
776
|
+
from gen_data_fns import create_dataset4, plot_matrix
|
|
777
|
+
import numpy as np
|
|
778
|
+
from pcalg import causaleff_ida
|
|
779
|
+
import random
|
|
780
|
+
dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
|
|
781
|
+
for i in range(n_samp - 1):
|
|
782
|
+
dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
|
|
783
|
+
#dataset2=data_transformed(dataset, lag)
|
|
784
|
+
dataset2=dataset
|
|
785
|
+
g1={}
|
|
786
|
+
causaleff2={}
|
|
787
|
+
for iter in range(niter):
|
|
788
|
+
idx=random.randint(0,dataset2.shape[0]-10000)
|
|
789
|
+
#dataset3 = dataset2[random.sample(range(dataset2.shape[0]),10000),:]
|
|
790
|
+
dataset3 = dataset2[idx:(idx+10000),:]
|
|
791
|
+
#dataset3 = dataset2[random.sample(range(dataset2.shape[0]),10000),:]
|
|
792
|
+
(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
793
|
+
data_matrix=dataset3,
|
|
794
|
+
alpha=alpha,method='stable')
|
|
795
|
+
g1[iter] = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
796
|
+
causaleff2[iter] = causaleff_ida(g,dataset3)
|
|
797
|
+
#g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
|
|
798
|
+
#causaleff2[iter] = causaleff1#/np.max(np.abs(causaleff1))
|
|
799
|
+
edgemat=np.zeros((m,m))
|
|
800
|
+
for i in range(m):
|
|
801
|
+
for j in range(m):
|
|
802
|
+
for iter in range(niter):
|
|
803
|
+
if (i,j) in list(g1[iter].edges):
|
|
804
|
+
edgemat[i,j] = edgemat[i,j]+1
|
|
805
|
+
edgefinal = set([])
|
|
806
|
+
for i in range(m):
|
|
807
|
+
for j in range(m):
|
|
808
|
+
if edgemat[i,j]>= (75*niter/100.):
|
|
809
|
+
edgefinal = edgefinal | {(i,j)}
|
|
810
|
+
g2=nx.DiGraph()
|
|
811
|
+
g2.add_nodes_from(range(m))
|
|
812
|
+
g2.add_edges_from(edgefinal)
|
|
813
|
+
#nx.draw(g2,with_labels=True)
|
|
814
|
+
causaleff3 = np.zeros((m,m))
|
|
815
|
+
for iter in range(niter):
|
|
816
|
+
causaleff3 = causaleff3+causaleff2[iter]
|
|
817
|
+
causaleff3 = causaleff3/niter
|
|
818
|
+
# for i in list(g2.nodes):
|
|
819
|
+
# for j in list(g2.nodes):
|
|
820
|
+
# if i not in nx.ancestors(g2,j):
|
|
821
|
+
# causaleff3[i,j] = 0
|
|
822
|
+
plot_matrix(causaleff3/np.max(np.abs(causaleff3)),motif,'pc_causaleff_orig')
|
|
823
|
+
plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn_orig')
|
|
824
|
+
#plot_matrix(0.5*(causaleff3>0),motif,'pc_causalconn')
|
|
825
|
+
#fig, ax = plt.subplots(1,1,figsize=(10,10))
|
|
826
|
+
#nx.draw(G, with_labels= True,ax=ax)
|
|
827
|
+
#fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
|
|
828
|
+
return g2,causaleff3
|
|
829
|
+
def pc_bootstrap(dataset2,lag,alpha,m,niter=50):
|
|
830
|
+
import networkx as nx
|
|
831
|
+
import matplotlib.pyplot as plt
|
|
832
|
+
#from gen_data_fns import create_dataset4, plot_matrix
|
|
833
|
+
import numpy as np
|
|
834
|
+
from pcalg import causaleff_ida
|
|
835
|
+
from tqdm import tqdm
|
|
836
|
+
import random
|
|
837
|
+
g1={}
|
|
838
|
+
causaleff2={}
|
|
839
|
+
for iter in tqdm(range(niter)):
|
|
840
|
+
print(0)
|
|
841
|
+
idx=random.randint(0,dataset2.shape[0]-100)
|
|
842
|
+
#dataset3 = dataset2[random.sample(range(dataset2.shape[0]),1000),:]
|
|
843
|
+
dataset3 = dataset2[idx:(idx+500),:]
|
|
844
|
+
#3:41 AM started
|
|
845
|
+
#(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gp,
|
|
846
|
+
# data_matrix=dataset3,
|
|
847
|
+
# alpha=alpha,method='stable')
|
|
848
|
+
(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
849
|
+
data_matrix=dataset3,
|
|
850
|
+
alpha=alpha,method='stable')
|
|
851
|
+
g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
852
|
+
causaleff = causaleff_ida(g,dataset3)
|
|
853
|
+
g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
|
|
854
|
+
causaleff2[iter] = causaleff1#np.max(np.abs(causaleff1))
|
|
855
|
+
print(1)
|
|
856
|
+
edgemat=np.zeros((m,m))
|
|
857
|
+
for i in range(m):
|
|
858
|
+
for j in range(m):
|
|
859
|
+
for iter in range(niter):
|
|
860
|
+
if (i,j) in list(g1[iter].edges):
|
|
861
|
+
edgemat[i,j] = edgemat[i,j]+1
|
|
862
|
+
edgefinal = set([])
|
|
863
|
+
for i in range(m):
|
|
864
|
+
for j in range(m):
|
|
865
|
+
if edgemat[i,j]>= (25*niter/100.):
|
|
866
|
+
edgefinal = edgefinal | {(i,j)}
|
|
867
|
+
g2=nx.DiGraph()
|
|
868
|
+
g2.add_nodes_from(range(m))
|
|
869
|
+
g2.add_edges_from(edgefinal)
|
|
870
|
+
#nx.draw(g2,with_labels=True)
|
|
871
|
+
causaleff3 = np.zeros((m,m))
|
|
872
|
+
for i in range(m):
|
|
873
|
+
for j in range(m):
|
|
874
|
+
s=0
|
|
875
|
+
for iter in range(niter):
|
|
876
|
+
if causaleff2[iter][i,j]!=0:
|
|
877
|
+
causaleff3[i,j] = causaleff3[i,j]+causaleff2[iter][i,j]
|
|
878
|
+
s=s+1
|
|
879
|
+
if s>0:
|
|
880
|
+
causaleff3[i,j] = causaleff3[i,j]/s
|
|
881
|
+
# for i in list(g2.nodes):
|
|
882
|
+
# for j in list(g2.nodes):
|
|
883
|
+
# if i not in nx.ancestors(g2,j):
|
|
884
|
+
# causaleff3[i,j] = 0
|
|
885
|
+
return g2,causaleff3
|
|
886
|
+
def pc_bootstrap_2(dataset,lag,alpha,m, n_ctrnn, n_samp,niter=50):
|
|
887
|
+
import networkx as nx
|
|
888
|
+
import matplotlib.pyplot as plt
|
|
889
|
+
from gen_data_fns import create_dataset4, plot_matrix
|
|
890
|
+
import numpy as np
|
|
891
|
+
from pcalg import causaleff_ida
|
|
892
|
+
from tqdm import tqdm
|
|
893
|
+
import random
|
|
894
|
+
g1={}
|
|
895
|
+
causaleff2={}
|
|
896
|
+
for iter in tqdm(range(niter)):
|
|
897
|
+
dataset2 = data_transformed_2(dataset, lag, n_ctrnn, n_samp)
|
|
898
|
+
#idx=random.randint(0,dataset2.shape[0]-100)
|
|
899
|
+
#dataset3 = dataset2[random.sample(range(dataset2.shape[0]),1000),:]
|
|
900
|
+
dataset3 = dataset2#[idx:(idx+500),:]
|
|
901
|
+
(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
902
|
+
data_matrix=dataset3,
|
|
903
|
+
alpha=alpha,method='stable')
|
|
904
|
+
g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
905
|
+
causaleff = causaleff_ida(g,dataset3)
|
|
906
|
+
gr1, causaleff1 = return_finaledges(g,causaleff,lag,m)
|
|
907
|
+
g1[iter]=gr1
|
|
908
|
+
causaleff2[iter] = causaleff1#np.max(np.abs(causaleff1))
|
|
909
|
+
edgemat=np.zeros((m,m))
|
|
910
|
+
for i in range(m):
|
|
911
|
+
for j in range(m):
|
|
912
|
+
for iter in range(niter):
|
|
913
|
+
if (i,j) in list(g1[iter].edges):
|
|
914
|
+
edgemat[i,j] = edgemat[i,j]+1
|
|
915
|
+
edgefinal = set([])
|
|
916
|
+
for i in range(m):
|
|
917
|
+
for j in range(m):
|
|
918
|
+
if edgemat[i,j]>= (25*niter/100.):
|
|
919
|
+
edgefinal = edgefinal | {(i,j)}
|
|
920
|
+
g2=nx.DiGraph()
|
|
921
|
+
g2.add_nodes_from(range(m))
|
|
922
|
+
g2.add_edges_from(edgefinal)
|
|
923
|
+
#nx.draw(g2,with_labels=True)
|
|
924
|
+
causaleff3 = np.zeros((m,m))
|
|
925
|
+
for i in range(m):
|
|
926
|
+
for j in range(m):
|
|
927
|
+
s=0
|
|
928
|
+
for iter in range(niter):
|
|
929
|
+
if causaleff2[iter][i,j]!=0:
|
|
930
|
+
causaleff3[i,j] = causaleff3[i,j]+causaleff2[iter][i,j]
|
|
931
|
+
s=s+1
|
|
932
|
+
causaleff3[i,j] = causaleff3[i,j]/s
|
|
933
|
+
# for i in list(g2.nodes):
|
|
934
|
+
# for j in list(g2.nodes):
|
|
935
|
+
# if i not in nx.ancestors(g2,j):
|
|
936
|
+
# causaleff3[i,j] = 0
|
|
937
|
+
return g2,causaleff3
|
|
938
|
+
# def pc_bootstrap(dataset2,lag,alpha,m,niter=50):
|
|
939
|
+
# import networkx as nx
|
|
940
|
+
# import matplotlib.pyplot as plt
|
|
941
|
+
# from gen_data_fns import create_dataset4, plot_matrix
|
|
942
|
+
# import numpy as np
|
|
943
|
+
# from pcalg import causaleff_ida
|
|
944
|
+
# from tqdm import tqdm
|
|
945
|
+
# import random
|
|
946
|
+
# g1={}
|
|
947
|
+
# causaleff2={}
|
|
948
|
+
# for iter in tqdm(range(niter)):
|
|
949
|
+
# idx=random.randint(0,dataset2.shape[0]-100)
|
|
950
|
+
# #dataset3 = dataset2[random.sample(range(dataset2.shape[0]),1000),:]
|
|
951
|
+
# dataset3 = dataset2[idx:(idx+500),:]
|
|
952
|
+
# (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
953
|
+
# data_matrix=dataset3,
|
|
954
|
+
# alpha=alpha,method='stable')
|
|
955
|
+
# g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
956
|
+
# causaleff = causaleff_ida(g,dataset3)
|
|
957
|
+
# g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
|
|
958
|
+
# causaleff2[iter] = causaleff1#np.max(np.abs(causaleff1))
|
|
959
|
+
# edgemat=np.zeros((m,m))
|
|
960
|
+
# for i in range(m):
|
|
961
|
+
# for j in range(m):
|
|
962
|
+
# for iter in range(niter):
|
|
963
|
+
# if (i,j) in list(g1[iter].edges):
|
|
964
|
+
# edgemat[i,j] = edgemat[i,j]+1
|
|
965
|
+
# edgefinal = set([])
|
|
966
|
+
# for i in range(m):
|
|
967
|
+
# for j in range(m):
|
|
968
|
+
# if edgemat[i,j]>= (25*niter/100.):
|
|
969
|
+
# edgefinal = edgefinal | {(i,j)}
|
|
970
|
+
# g2=nx.DiGraph()
|
|
971
|
+
# g2.add_nodes_from(range(m))
|
|
972
|
+
# g2.add_edges_from(edgefinal)
|
|
973
|
+
# #nx.draw(g2,with_labels=True)
|
|
974
|
+
# causaleff3 = np.zeros((m,m))
|
|
975
|
+
# for i in range(m):
|
|
976
|
+
# for j in range(m):
|
|
977
|
+
# s=0
|
|
978
|
+
# for iter in range(niter):
|
|
979
|
+
# if causaleff2[iter][i,j]!=0:
|
|
980
|
+
# causaleff3[i,j] = causaleff3[i,j]+causaleff2[iter][i,j]
|
|
981
|
+
# s=s+1
|
|
982
|
+
# causaleff3[i,j] = causaleff3[i,j]/s
|
|
983
|
+
# # for i in list(g2.nodes):
|
|
984
|
+
# # for j in list(g2.nodes):
|
|
985
|
+
# # if i not in nx.ancestors(g2,j):
|
|
986
|
+
# # causaleff3[i,j] = 0
|
|
987
|
+
# return g2,causaleff3
|
|
988
|
+
def pc_plot_out(lag,alpha,motif,n_ctrnn,n_samp,m,w,tau,niter=50,isPlot=True):
|
|
989
|
+
import networkx as nx
|
|
990
|
+
import matplotlib.pyplot as plt
|
|
991
|
+
from gen_data_fns import create_dataset4, plot_matrix
|
|
992
|
+
import numpy as np
|
|
993
|
+
from pcalg import causaleff_ida, pc_bootstrap
|
|
994
|
+
import random
|
|
995
|
+
dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
|
|
996
|
+
for i in range(n_samp - 1):
|
|
997
|
+
dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
|
|
998
|
+
dataset2=data_transformed(dataset, lag)
|
|
999
|
+
# (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
1000
|
+
# data_matrix=dataset2, alpha=alpha,method='stable')
|
|
1001
|
+
# g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
1002
|
+
# causaleff = causaleff_ida(g,dataset2)
|
|
1003
|
+
# g1, causaleff1 = return_finaledges(g,causaleff,lag,m)
|
|
1004
|
+
# causaleff1 = causaleff1/np.max(np.abs(causaleff1))
|
|
1005
|
+
# nx.draw(g1,with_labels=True)
|
|
1006
|
+
#%%
|
|
1007
|
+
#g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
|
|
1008
|
+
#causaleff2[iter] = causaleff1/np.max(np.abs(causaleff1))
|
|
1009
|
+
g2,causaleff2 = pc_bootstrap(dataset2,lag,alpha,m,niter=50)
|
|
1010
|
+
#%%
|
|
1011
|
+
if isPlot is True:
|
|
1012
|
+
plot_matrix(causaleff2,motif,'pc_causaleff')
|
|
1013
|
+
fig, ax = plt.subplots(1,1,figsize=(10,10))
|
|
1014
|
+
nx.draw(g2, with_labels= True,ax=ax)
|
|
1015
|
+
fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
|
|
1016
|
+
return g2,causaleff2
|
|
1017
|
+
def pc_plot_out2(lag,alpha,motif,n_ctrnn,n_samp,m,w,tau,niter=50):
|
|
1018
|
+
import networkx as nx
|
|
1019
|
+
import matplotlib.pyplot as plt
|
|
1020
|
+
from gen_data_fns import create_dataset4, plot_matrix, create_dataset5
|
|
1021
|
+
import numpy as np
|
|
1022
|
+
from pcalg import causaleff_ida, pc_bootstrap
|
|
1023
|
+
import random
|
|
1024
|
+
from tqdm import tqdm
|
|
1025
|
+
#%%
|
|
1026
|
+
dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
|
|
1027
|
+
for i in range(n_samp - 1):
|
|
1028
|
+
dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
|
|
1029
|
+
#dataset2=data_transformed(dataset, lag)
|
|
1030
|
+
#dataset0=dataset#[random.sample(range(dataset.shape[0]),10000),:]
|
|
1031
|
+
dataset01=data_transformed(dataset, lag)
|
|
1032
|
+
# (g0, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
1033
|
+
# data_matrix=dataset01,
|
|
1034
|
+
# alpha=alpha,method='stable')
|
|
1035
|
+
# g0 = estimate_cpdag(skel_graph=g0, sep_set=sep_set)
|
|
1036
|
+
# causaleff0 = causaleff_ida(g0,data_transformed(dataset01, lag))
|
|
1037
|
+
# g0, causaleff0 = return_finaledges(g0,causaleff0,lag,m)
|
|
1038
|
+
#%%
|
|
1039
|
+
g0, causaleff0 = pc_bootstrap(dataset01,lag,alpha,m,niter=25)
|
|
1040
|
+
g01=g0.copy()
|
|
1041
|
+
g01.remove_edges_from(g01.selfloop_edges())
|
|
1042
|
+
G=g0
|
|
1043
|
+
g1={}
|
|
1044
|
+
causaleff2={}
|
|
1045
|
+
#niter=50
|
|
1046
|
+
#edgemat_btrsp={}
|
|
1047
|
+
#%%
|
|
1048
|
+
#niter=50
|
|
1049
|
+
#for iter0 in tqdm(range(niter)):#tqdm(range(50)):
|
|
1050
|
+
for k in tqdm(range(m)):
|
|
1051
|
+
#if g01.out_degree(k)==0 and g01.in_degree(k)==1:
|
|
1052
|
+
iter = k
|
|
1053
|
+
# dataset3 = data_transformed(np.delete(dataset2,[k],axis=1),lag)
|
|
1054
|
+
# for node in range(m):
|
|
1055
|
+
# if len(g0.predecessors(node))>0:
|
|
1056
|
+
# lm = linear_model.LinearRegression()
|
|
1057
|
+
# pa_x = list(g0.predecessors(node))
|
|
1058
|
+
# regressors = pa_x
|
|
1059
|
+
# X=data[:,regressors]
|
|
1060
|
+
# Y=data[:,y]
|
|
1061
|
+
# lm_out = lm.fit(X,Y)
|
|
1062
|
+
# causaleff[x,y] = lm_out.coef_[regressors.index(x)]
|
|
1063
|
+
dataset2 = data_transformed(np.delete(dataset, [k],axis=1),lag)
|
|
1064
|
+
idx=random.randint(0,dataset2.shape[0]-100)
|
|
1065
|
+
#dataset2[:,random.sample(range(m),1)[0]]=np.random.normal(10,10,dataset2.shape[0])
|
|
1066
|
+
#k=random.sample(range(m),1)[0]
|
|
1067
|
+
dataset3 = dataset2[idx:(idx+500),:]
|
|
1068
|
+
labels = list(range(m))
|
|
1069
|
+
labels.remove(k)
|
|
1070
|
+
relabels = dict(zip(range(m-1),labels))
|
|
1071
|
+
(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
1072
|
+
data_matrix=dataset3,
|
|
1073
|
+
alpha=alpha,method='stable')
|
|
1074
|
+
g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
1075
|
+
causaleff = causaleff_ida(g,dataset3)
|
|
1076
|
+
#g1, causaleff1 = return_finaledges(g,causaleff,lag,m-1)
|
|
1077
|
+
#g1 = nx.relabel_nodes(g1,relabels)
|
|
1078
|
+
#nx.draw(g1,with_labels=True)
|
|
1079
|
+
g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m-1)
|
|
1080
|
+
g1[iter] = nx.relabel_nodes(g1[iter],relabels)
|
|
1081
|
+
causaleff2[iter] = causaleff1/np.max(np.abs(causaleff1))
|
|
1082
|
+
|
|
1083
|
+
# for (node1,node2) in list(g1[iter].edges):
|
|
1084
|
+
# if (node1,node2) not in list(g01.edges):
|
|
1085
|
+
# G.add_edge(node1,node2)
|
|
1086
|
+
for (node1,node2) in list(g1[iter].edges):
|
|
1087
|
+
if (k,node2) not in list(G.edges):
|
|
1088
|
+
if node1 != node2 and node1 in nx.ancestors(G,k):
|
|
1089
|
+
ch=0
|
|
1090
|
+
for z in [elem for elem in range(m) if elem not in [k,node2]]:
|
|
1091
|
+
if (z,node2) in list(G.edges):
|
|
1092
|
+
ch=1#(node1,node2) not in list(g01.edges) and node1 in nx.ancestors(g01,k) and (k,node2) not in list(g01.edges):#(node1, k) in list(g01.edges) and (k,node2) not in list(g01.edges):
|
|
1093
|
+
if ch==0:
|
|
1094
|
+
G.add_edge(k,node2)
|
|
1095
|
+
#%%
|
|
1096
|
+
# for nodeorig in list(g0.nodes):
|
|
1097
|
+
# # if (node1,node2) not in list(g0.edges) and (node1,k) not in list
|
|
1098
|
+
# edgemat=np.zeros((m,m))
|
|
1099
|
+
# for i in range(m):
|
|
1100
|
+
# for j in range(m):
|
|
1101
|
+
# for iter1 in range(niter):
|
|
1102
|
+
# if (i,j) in list(g1[iter1].edges):
|
|
1103
|
+
# edgemat[i,j] = edgemat[i,j]+1
|
|
1104
|
+
#edgemat_btrsp[iter0]=edgemat
|
|
1105
|
+
#%%
|
|
1106
|
+
# out=sum([i for i in list(edgemat_btrsp.values())])/len(edgemat_btrsp)
|
|
1107
|
+
|
|
1108
|
+
#%%
|
|
1109
|
+
# g2=g0.to_undirected()
|
|
1110
|
+
# edgefinal = set([])
|
|
1111
|
+
# for i in range(m):
|
|
1112
|
+
# temp = set([])
|
|
1113
|
+
# for j in range(m):
|
|
1114
|
+
# if nx.has_path(g2,j,i):
|
|
1115
|
+
# continue
|
|
1116
|
+
# else:
|
|
1117
|
+
# if out[j,i]>=0.5:#edgemat[i,j]>=20*niter/100.:
|
|
1118
|
+
# temp = temp | {(j,i)}
|
|
1119
|
+
# if len(temp)>1:
|
|
1120
|
+
# temp2 = set([])
|
|
1121
|
+
# for (j1,i1) in temp:
|
|
1122
|
+
# test_temp = sum([(l[0] in nx.ancestors(g0,i1)) for l in temp-{(j1,i1)}])
|
|
1123
|
+
# if test_temp == len(temp-{(j1,i1)}):
|
|
1124
|
+
# temp2 = {(j1,i1)}
|
|
1125
|
+
# else:
|
|
1126
|
+
# temp2 = temp
|
|
1127
|
+
# g2=nx.DiGraph()
|
|
1128
|
+
# g2.add_nodes_from(range(m))
|
|
1129
|
+
# g2.add_edges_from(edgefinal)
|
|
1130
|
+
#%%
|
|
1131
|
+
nx.draw(G,with_labels=True)
|
|
1132
|
+
#G1=G.copy()
|
|
1133
|
+
#G1.remove_edges_from(G1.selfloop_edges())
|
|
1134
|
+
# for (node1,node2) in list(G.edges):
|
|
1135
|
+
# if causaleff0[node1,node2]/np.max(np.abs(causaleff0))<0.05:
|
|
1136
|
+
# G.remove_edge(node1,node2)
|
|
1137
|
+
causaleff3 = np.zeros((m,m))
|
|
1138
|
+
for i in list(G.nodes):
|
|
1139
|
+
for j in list(G.nodes):
|
|
1140
|
+
# for iter in range(niter):
|
|
1141
|
+
if i in list(nx.ancestors(G,j)):
|
|
1142
|
+
causaleff3[i,j] = causaleff0[i,j]#causaleff3[i,j]+causaleff2[iter][i,j]
|
|
1143
|
+
# causaleff3[i,j] = causaleff3[i,j]/niter
|
|
1144
|
+
|
|
1145
|
+
|
|
1146
|
+
|
|
1147
|
+
plot_matrix(causaleff3,motif,'pc_causaleff')
|
|
1148
|
+
plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn')
|
|
1149
|
+
fig, ax = plt.subplots(1,1,figsize=(10,10))
|
|
1150
|
+
nx.draw(G, with_labels= True,ax=ax)
|
|
1151
|
+
fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
|
|
1152
|
+
return G,causaleff3
|
|
1153
|
+
#%%
|
|
1154
|
+
def pc_plot_out3(lag,alpha,motif,n_ctrnn,n_samp,m,w,tau,niter=50):
|
|
1155
|
+
import networkx as nx
|
|
1156
|
+
import matplotlib.pyplot as plt
|
|
1157
|
+
from gen_data_fns import create_dataset4, plot_matrix, create_dataset5
|
|
1158
|
+
import numpy as np
|
|
1159
|
+
from pcalg import causaleff_ida, pc_bootstrap
|
|
1160
|
+
import random
|
|
1161
|
+
from tqdm import tqdm
|
|
1162
|
+
#%%
|
|
1163
|
+
dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
|
|
1164
|
+
for i in range(n_samp - 1):
|
|
1165
|
+
dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
|
|
1166
|
+
#%%
|
|
1167
|
+
g0, causaleff0 = pc_bootstrap_2(dataset,lag,alpha,m,n_ctrnn, n_samp,niter)
|
|
1168
|
+
g01=g0.copy()
|
|
1169
|
+
g01.remove_edges_from(g01.selfloop_edges())
|
|
1170
|
+
G=g0
|
|
1171
|
+
g1={}
|
|
1172
|
+
causaleff2={}
|
|
1173
|
+
#niter=50
|
|
1174
|
+
#edgemat_btrsp={}
|
|
1175
|
+
#%%
|
|
1176
|
+
#niter=50
|
|
1177
|
+
#for iter0 in tqdm(range(niter)):#tqdm(range(50)):
|
|
1178
|
+
# for k in tqdm(range(m)):
|
|
1179
|
+
# iter = k
|
|
1180
|
+
# dataset2 = data_transformed(np.delete(dataset, [k],axis=1),lag)
|
|
1181
|
+
# idx=random.randint(0,dataset2.shape[0]-100)
|
|
1182
|
+
# #dataset2[:,random.sample(range(m),1)[0]]=np.random.normal(10,10,dataset2.shape[0])
|
|
1183
|
+
# #k=random.sample(range(m),1)[0]
|
|
1184
|
+
# dataset3 = dataset2[idx:(idx+500),:]
|
|
1185
|
+
# labels = list(range(m))
|
|
1186
|
+
# labels.remove(k)
|
|
1187
|
+
# relabels = dict(zip(range(m-1),labels))
|
|
1188
|
+
# (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
1189
|
+
# data_matrix=dataset3,
|
|
1190
|
+
# alpha=alpha,method='stable')
|
|
1191
|
+
# g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
1192
|
+
# causaleff = causaleff_ida(g,dataset3)
|
|
1193
|
+
# g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m-1)
|
|
1194
|
+
# g1[iter] = nx.relabel_nodes(g1[iter],relabels)
|
|
1195
|
+
# causaleff2[iter] = causaleff1/np.max(np.abs(causaleff1))
|
|
1196
|
+
|
|
1197
|
+
# for (node1,node2) in list(g1[iter].edges):
|
|
1198
|
+
# if node1 !=node2 and (node1,node2) not in list(g01.edges) and node1 in nx.ancestors(g01,k) and (k,node2) not in list(g01.edges):#(node1, k) in list(g01.edges) and (k,node2) not in list(g01.edges):
|
|
1199
|
+
# G.add_edge(k,node2)
|
|
1200
|
+
|
|
1201
|
+
causaleff3 = np.zeros((m,m))
|
|
1202
|
+
for i in list(G.nodes):
|
|
1203
|
+
for j in list(G.nodes):
|
|
1204
|
+
if i in list(nx.ancestors(G,j)):
|
|
1205
|
+
causaleff3[i,j] = causaleff0[i,j]#causaleff3[i,j]+causaleff2[iter][i,j]
|
|
1206
|
+
|
|
1207
|
+
plot_matrix(causaleff3/np.max(np.abs(causaleff3)),motif,'pc_causaleff')
|
|
1208
|
+
plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn')
|
|
1209
|
+
fig, ax = plt.subplots(1,1,figsize=(10,10))
|
|
1210
|
+
nx.draw(G, with_labels= True,ax=ax)
|
|
1211
|
+
fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
|
|
1212
|
+
return G,causaleff3
|
|
1213
|
+
#%%
|
|
1214
|
+
def ablation(lag,dataset,g01,G,k):
|
|
1215
|
+
m=dataset.shape[1]
|
|
1216
|
+
dataset3 = data_transformed(np.delete(dataset, [k],axis=1),lag)
|
|
1217
|
+
g2=G.copy()
|
|
1218
|
+
labels = list(range(m))
|
|
1219
|
+
labels.remove(k)
|
|
1220
|
+
relabels = dict(zip(range(m-1),labels))
|
|
1221
|
+
(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
1222
|
+
data_matrix=dataset3,
|
|
1223
|
+
alpha=alpha,method='stable')
|
|
1224
|
+
g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
1225
|
+
causaleff = causaleff_ida(g,dataset3)
|
|
1226
|
+
|
|
1227
|
+
g1, causaleff1 = return_finaledges(g,causaleff,lag,m-1)
|
|
1228
|
+
g1 = nx.relabel_nodes(g1,relabels)
|
|
1229
|
+
#causaleff2 = causaleff1/np.max(np.abs(causaleff1))
|
|
1230
|
+
|
|
1231
|
+
for (node1,node2) in list(g1.edges):
|
|
1232
|
+
if node1 !=node2 and (node1,node2) not in list(g01.edges) and node1 in nx.ancestors(g01,k) and (k,node2) not in list(g01.edges):#(node1, k) in list(g01.edges) and (k,node2) not in list(g01.edges):
|
|
1233
|
+
g2.add_edge(k,node2)
|
|
1234
|
+
return g2
|
|
1235
|
+
|
|
1236
|
+
def pc_plot_realdata(dataset,lag,alpha,motif,niter=10):
|
|
1237
|
+
import networkx as nx
|
|
1238
|
+
import matplotlib.pyplot as plt
|
|
1239
|
+
from gen_data_fns import create_dataset4, plot_matrix
|
|
1240
|
+
import numpy as np
|
|
1241
|
+
from pcalg import causaleff_ida, pc_bootstrap
|
|
1242
|
+
import random
|
|
1243
|
+
from tqdm import tqdm
|
|
1244
|
+
import multiprocessing as mp
|
|
1245
|
+
from functools import partial
|
|
1246
|
+
import time
|
|
1247
|
+
m=dataset.shape[1]
|
|
1248
|
+
dataset01=data_transformed(dataset, lag)
|
|
1249
|
+
g0, causaleff0 = pc_bootstrap_realdata(dataset01,lag,alpha,m,niter)
|
|
1250
|
+
g01=g0.copy()
|
|
1251
|
+
g01.remove_edges_from(list(nx.selfloop_edges(g01)))
|
|
1252
|
+
G=g0
|
|
1253
|
+
#nx.draw(G, with_labels= True,ax=ax)
|
|
1254
|
+
#g1=[]
|
|
1255
|
+
causaleff2={}
|
|
1256
|
+
|
|
1257
|
+
pool = mp.Pool(4)
|
|
1258
|
+
func= partial(ablation,lag,dataset,g01,G)
|
|
1259
|
+
t1=time.time()
|
|
1260
|
+
g1 = pool.map(func,range(m))
|
|
1261
|
+
print(time.time()-t1)
|
|
1262
|
+
#for k in tqdm(range(m)):
|
|
1263
|
+
#idx=random.randint(0,dataset.shape[0]-1000)
|
|
1264
|
+
#k=random.sample(range(m),1)[0]
|
|
1265
|
+
#dataset2 = dataset[idx:(idx+5000),:]
|
|
1266
|
+
#g1.append(ablation(k,lag,dataset,g01,G))
|
|
1267
|
+
G=nx.compose_all(g1)
|
|
1268
|
+
nx.draw(G,with_labels=True)
|
|
1269
|
+
causaleff3 = np.zeros((m,m))
|
|
1270
|
+
for i in list(G.nodes):
|
|
1271
|
+
for j in list(G.nodes):
|
|
1272
|
+
if i in nx.ancestors(G,j):
|
|
1273
|
+
causaleff3[i,j] = causaleff0[i,j]#causaleff3[i,j]+causaleff2[iter][i,j]
|
|
1274
|
+
causaleff4 = np.zeros((m,m))
|
|
1275
|
+
for i in list(G.nodes):
|
|
1276
|
+
for j in list(G.nodes):
|
|
1277
|
+
if i in G.predecessors(j):
|
|
1278
|
+
causaleff4[i,j] = causaleff0[i,j]
|
|
1279
|
+
plot_matrix(causaleff3/np.max(np.abs(causaleff3)),motif,'pc_causaleff')
|
|
1280
|
+
plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn')
|
|
1281
|
+
fig, ax = plt.subplots(1,1,figsize=(10,10))
|
|
1282
|
+
nx.draw(G, with_labels= True,ax=ax)
|
|
1283
|
+
fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
|
|
1284
|
+
return G,causaleff3,causaleff4
|
|
1285
|
+
# #%%
|
|
1286
|
+
# def pc_plot_realdata(dataset,lag,alpha,motif,niter=10):
|
|
1287
|
+
# import networkx as nx
|
|
1288
|
+
# import matplotlib.pyplot as plt
|
|
1289
|
+
# from gen_data_fns import create_dataset4, plot_matrix
|
|
1290
|
+
# import numpy as np
|
|
1291
|
+
# from pcalg import causaleff_ida, pc_bootstrap
|
|
1292
|
+
# import random
|
|
1293
|
+
# from tqdm import tqdm
|
|
1294
|
+
# m=dataset.shape[1]
|
|
1295
|
+
# dataset01=data_transformed(dataset, lag)
|
|
1296
|
+
# g0, causaleff0 = pc_bootstrap_realdata(dataset01,lag,alpha,m,niter)
|
|
1297
|
+
# g01=g0.copy()
|
|
1298
|
+
# g01.remove_edges_from(g01.selfloop_edges())
|
|
1299
|
+
# G=g0
|
|
1300
|
+
# #nx.draw(G, with_labels= True,ax=ax)
|
|
1301
|
+
# g1={}
|
|
1302
|
+
# causaleff2={}
|
|
1303
|
+
# g2=[]
|
|
1304
|
+
# for k in tqdm(range(m)):
|
|
1305
|
+
# #idx=random.randint(0,dataset.shape[0]-1000)
|
|
1306
|
+
# #k=random.sample(range(m),1)[0]
|
|
1307
|
+
# #dataset2 = dataset[idx:(idx+5000),:]
|
|
1308
|
+
# dataset3 = data_transformed(np.delete(dataset, [k],axis=1),lag)
|
|
1309
|
+
# g_iter=G.copy()
|
|
1310
|
+
# labels = list(range(m))
|
|
1311
|
+
# labels.remove(k)
|
|
1312
|
+
# relabels = dict(zip(range(m-1),labels))
|
|
1313
|
+
# (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
1314
|
+
# data_matrix=dataset3,
|
|
1315
|
+
# alpha=alpha,method='stable')
|
|
1316
|
+
# g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
1317
|
+
# causaleff = causaleff_ida(g,dataset3)
|
|
1318
|
+
|
|
1319
|
+
# g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m-1)
|
|
1320
|
+
# g1[iter] = nx.relabel_nodes(g1[iter],relabels)
|
|
1321
|
+
# causaleff2[iter] = causaleff1/np.max(np.abs(causaleff1))
|
|
1322
|
+
|
|
1323
|
+
# for (node1,node2) in list(g1[iter].edges):
|
|
1324
|
+
# if node1 !=node2 and (node1,node2) not in list(g01.edges) and node1 in nx.ancestors(g01,k) and (k,node2) not in list(g01.edges):#(node1, k) in list(g01.edges) and (k,node2) not in list(g01.edges):
|
|
1325
|
+
# g_iter.add_edge(k,node2)
|
|
1326
|
+
# nx.draw(G,with_labels=True)
|
|
1327
|
+
# causaleff3 = np.zeros((m,m))
|
|
1328
|
+
# for i in list(G.nodes):
|
|
1329
|
+
# for j in list(G.nodes):
|
|
1330
|
+
# if i in nx.ancestors(G,j):
|
|
1331
|
+
# causaleff3[i,j] = causaleff0[i,j]#causaleff3[i,j]+causaleff2[iter][i,j]
|
|
1332
|
+
# causaleff4 = np.zeros((m,m))
|
|
1333
|
+
# for i in list(G.nodes):
|
|
1334
|
+
# for j in list(G.nodes):
|
|
1335
|
+
# if i in G.predecessors(j):
|
|
1336
|
+
# causaleff4[i,j] = causaleff0[i,j]
|
|
1337
|
+
# plot_matrix(causaleff3/np.max(np.abs(causaleff3)),motif,'pc_causaleff')
|
|
1338
|
+
# plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn')
|
|
1339
|
+
# fig, ax = plt.subplots(1,1,figsize=(10,10))
|
|
1340
|
+
# nx.draw(G, with_labels= True,ax=ax)
|
|
1341
|
+
# fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
|
|
1342
|
+
# return G,causaleff3,causaleff4
|
|
1343
|
+
#%%
|
|
1344
|
+
#lag=2
|
|
1345
|
+
def pc_bootstrap_realdata(dataset2,lag,alpha,m,niter=10):
|
|
1346
|
+
import networkx as nx
|
|
1347
|
+
import matplotlib.pyplot as plt
|
|
1348
|
+
from gen_data_fns import create_dataset4, plot_matrix
|
|
1349
|
+
import numpy as np
|
|
1350
|
+
from pcalg import causaleff_ida
|
|
1351
|
+
import random
|
|
1352
|
+
from tqdm import tqdm
|
|
1353
|
+
g1={}
|
|
1354
|
+
causaleff2={}
|
|
1355
|
+
for iter in tqdm(range(niter)):
|
|
1356
|
+
idx=iter#random.randint(0,dataset2.shape[0]-10)
|
|
1357
|
+
#dataset3 = dataset2[random.sample(range(dataset2.shape[0]),1000),:]
|
|
1358
|
+
dataset3 = dataset2[idx:(dataset2.shape[0]-niter+idx+1),:]#(idx+dataset2.shape[0]-10),:]
|
|
1359
|
+
(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
|
|
1360
|
+
data_matrix=dataset3,
|
|
1361
|
+
alpha=alpha,method='stable')
|
|
1362
|
+
g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
|
|
1363
|
+
causaleff = causaleff_ida(g,dataset3)
|
|
1364
|
+
g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
|
|
1365
|
+
causaleff2[iter] = causaleff1#/np.max(np.abs(causaleff1))
|
|
1366
|
+
edgemat=np.zeros((m,m))
|
|
1367
|
+
for i in range(m):
|
|
1368
|
+
for j in range(m):
|
|
1369
|
+
for iter in range(niter):
|
|
1370
|
+
if (i,j) in list(g1[iter].edges):
|
|
1371
|
+
edgemat[i,j] = edgemat[i,j]+1
|
|
1372
|
+
edgefinal = set([])
|
|
1373
|
+
for i in range(m):
|
|
1374
|
+
for j in range(m):
|
|
1375
|
+
if edgemat[i,j]>= (25*niter/100.):
|
|
1376
|
+
edgefinal = edgefinal | {(i,j)}
|
|
1377
|
+
g2=nx.DiGraph()
|
|
1378
|
+
g2.add_nodes_from(range(m))
|
|
1379
|
+
g2.add_edges_from(edgefinal)
|
|
1380
|
+
#nx.draw(g2,with_labels=True)
|
|
1381
|
+
causaleff3 = np.zeros((m,m))
|
|
1382
|
+
for iter in range(niter):
|
|
1383
|
+
causaleff3 = causaleff3+causaleff2[iter]
|
|
1384
|
+
causaleff3 = causaleff3/niter
|
|
1385
|
+
# for i in list(g2.nodes):
|
|
1386
|
+
# for j in list(g2.nodes):
|
|
1387
|
+
# if i not in nx.ancestors(g2,j):
|
|
1388
|
+
# causaleff3[i,j] = 0
|
|
1389
|
+
return g2,causaleff3
|
|
1390
|
+
def data_transformed(data, lag):
|
|
1391
|
+
import numpy as np
|
|
1392
|
+
n = data.shape[0]
|
|
1393
|
+
p = data.shape[1]
|
|
1394
|
+
lag1=lag+1
|
|
1395
|
+
new_n = int(np.floor((n-lag)/(2*lag1))*(2*lag1))
|
|
1396
|
+
data=data[:new_n,:]
|
|
1397
|
+
data2=np.zeros((int(new_n/(2*lag1)),p*lag1))
|
|
1398
|
+
for i in range(p):
|
|
1399
|
+
for j in range(lag1):
|
|
1400
|
+
data2[:,lag1*i+j]=data[j::(2*lag1),i]
|
|
1401
|
+
return data2
|
|
1402
|
+
def data_transformed_mod(data, lag, node):
|
|
1403
|
+
import numpy as np
|
|
1404
|
+
n = data.shape[0]
|
|
1405
|
+
p = data.shape[1]
|
|
1406
|
+
lag1=lag+1
|
|
1407
|
+
new_n = int(np.floor((n-lag)/(2*lag1))*(2*lag1))
|
|
1408
|
+
data=data[:new_n,:]
|
|
1409
|
+
data2=np.zeros((int(new_n/(2*lag1)),p*(lag1-1)+1))
|
|
1410
|
+
#data2[:,lag1*i]=data[j::(2*lag1),i]
|
|
1411
|
+
for i in range(p):
|
|
1412
|
+
for j in range(1,lag1):
|
|
1413
|
+
data2[:,(lag1-1)*i+j]=data[j::(2*lag1),i]
|
|
1414
|
+
data2[:,0] = data[0::(2*lag1),node]
|
|
1415
|
+
return data2
|
|
1416
|
+
|
|
1417
|
+
def data_transformed_overlapping(data, tau):
|
|
1418
|
+
import numpy as np
|
|
1419
|
+
n = data.shape[0]
|
|
1420
|
+
p = data.shape[1]
|
|
1421
|
+
#lag1=lag+1
|
|
1422
|
+
#lag = lag+1
|
|
1423
|
+
new_n = n-tau+1#int(np.floor((n-lag)/(2*lag1))*(2*lag1))
|
|
1424
|
+
p_new = p*tau
|
|
1425
|
+
if tau <1 :
|
|
1426
|
+
return('lag should be >= 1')
|
|
1427
|
+
elif tau == 1:
|
|
1428
|
+
data2 = data
|
|
1429
|
+
else:
|
|
1430
|
+
data2=np.zeros((new_n,p_new))
|
|
1431
|
+
for t in range(0,n-tau+1):
|
|
1432
|
+
data2[t,:]=np.hstack(data[t:t+tau,])
|
|
1433
|
+
return data2
|
|
1434
|
+
|
|
1435
|
+
def data_transformed_btstrp(data):
|
|
1436
|
+
import numpy as np
|
|
1437
|
+
from arch import bootstrap
|
|
1438
|
+
from numpy.random import RandomState
|
|
1439
|
+
band = bootstrap.optimal_block_length(data)
|
|
1440
|
+
n = data.shape[0]
|
|
1441
|
+
p = data.shape[1]
|
|
1442
|
+
data2=np.zeros((2*n*p,n*p))
|
|
1443
|
+
bs = bootstrap.StationaryBootstrap(np.median(band.iloc[:,0]),data,random_state=RandomState(111))
|
|
1444
|
+
t=0
|
|
1445
|
+
for data1 in bs.bootstrap(2*n*p):
|
|
1446
|
+
data2[t,:]=np.hstack(data1[0][0])
|
|
1447
|
+
t=t+1
|
|
1448
|
+
return data2
|
|
1449
|
+
# def data_transformed_v2(data, lag, window):
|
|
1450
|
+
# import numpy as np
|
|
1451
|
+
# n = data.shape[0]
|
|
1452
|
+
# m = data.shape[1]
|
|
1453
|
+
# lag1=lag+1
|
|
1454
|
+
# data1 = np.zeros((int(n/(2*window)),m*lag1))
|
|
1455
|
+
# for t1 in range(int(n/(2*window))):
|
|
1456
|
+
# t=2*window*t1
|
|
1457
|
+
# for p1 in range(m):
|
|
1458
|
+
# for l in range(lag1):
|
|
1459
|
+
# data1[t1,lag1*p1+l]=np.mean(data[(t+l):(t+l+window),p1])
|
|
1460
|
+
# return data1
|
|
1461
|
+
|
|
1462
|
+
def data_transformed1(data, lag):
|
|
1463
|
+
import numpy as np
|
|
1464
|
+
n = data.shape[0]
|
|
1465
|
+
p = data.shape[1]
|
|
1466
|
+
lag1=2
|
|
1467
|
+
lag2={}
|
|
1468
|
+
lag2[0]=0
|
|
1469
|
+
lag2[1]=lag
|
|
1470
|
+
lag2[2]=lag+1
|
|
1471
|
+
new_n = int(np.floor((n-lag)/(2*lag2[2]))*(2*lag2[2]))
|
|
1472
|
+
data=data[:new_n,:]
|
|
1473
|
+
data2=np.zeros((int(np.floor((n-lag)/(2*lag2[2]))),p*lag1))#np.zeros((int(new_n/(2*lag2[2])),p*lag1))
|
|
1474
|
+
for i in range(p):
|
|
1475
|
+
for j in range(lag1):
|
|
1476
|
+
data2[:,lag1*i+j]=data[lag2[j]::(2*lag2[2]),i]
|
|
1477
|
+
return data2
|
|
1478
|
+
def data_transformed_nogaps(data, lag):
|
|
1479
|
+
import numpy as np
|
|
1480
|
+
n = data.shape[0]
|
|
1481
|
+
p = data.shape[1]
|
|
1482
|
+
lag1=lag+1
|
|
1483
|
+
new_n = int(np.floor((n-lag)/(lag1))*(lag1))
|
|
1484
|
+
data=data[:new_n,:]
|
|
1485
|
+
data2=np.zeros((int(new_n/(lag1)),p*lag1))
|
|
1486
|
+
for i in range(p):
|
|
1487
|
+
for j in range(lag1):
|
|
1488
|
+
data2[:,lag1*i+j]=data[j::(lag1),i]
|
|
1489
|
+
return data2
|
|
1490
|
+
# def data_transformed(data, lag):
|
|
1491
|
+
# import numpy as np
|
|
1492
|
+
# n = data.shape[0]
|
|
1493
|
+
# p = data.shape[1]
|
|
1494
|
+
# lag1=lag+1
|
|
1495
|
+
# new_n = int(np.floor(n/(2*lag1))*(2*lag1))
|
|
1496
|
+
# data=data[:new_n,:]
|
|
1497
|
+
# data2=np.zeros((int(new_n/(2*lag1)),p*lag1))
|
|
1498
|
+
# for i in range(p):
|
|
1499
|
+
# for j in range(lag1):
|
|
1500
|
+
# data2[:,lag1*i+j]=data[j::(2*lag1),i]
|
|
1501
|
+
# return data2
|
|
1502
|
+
|
|
1503
|
+
# def data_transformed_old(data, lag):
|
|
1504
|
+
# import numpy as np
|
|
1505
|
+
# n = data.shape[0]
|
|
1506
|
+
# p = data.shape[1]
|
|
1507
|
+
# lag1=lag+1
|
|
1508
|
+
# new_n = int(np.floor(n/lag1)*lag1)
|
|
1509
|
+
# data=data[:new_n,:]
|
|
1510
|
+
# data2=np.zeros((int(new_n/lag1),p*lag1))
|
|
1511
|
+
# for i in range(p):
|
|
1512
|
+
# for j in range(lag1):
|
|
1513
|
+
# data2[:,lag1*i+j]=data[j::lag1,i]
|
|
1514
|
+
# return data2
|
|
1515
|
+
def data_transformed_fin(data,lag,maxdeg=1):
|
|
1516
|
+
n = data.shape[0]
|
|
1517
|
+
p = data.shape[1]
|
|
1518
|
+
#data2=np.zeros(data.shape)
|
|
1519
|
+
data1 = data
|
|
1520
|
+
if maxdeg>1:
|
|
1521
|
+
for diter in range(2,maxdeg+1):
|
|
1522
|
+
data1=data1+ (data**diter)/np.math.factorial(diter)
|
|
1523
|
+
data2=np.cumsum(data1,axis=0)[lag:,] - np.cumsum(data1,axis=0)[:-lag,]
|
|
1524
|
+
data3=np.zeros((int(n/(2*lag)),p))#np.zeros((int((n-lag-1)/(2*lag)),2*p))
|
|
1525
|
+
k=0
|
|
1526
|
+
for i in range(lag+1,n,2*lag):
|
|
1527
|
+
data3[k,:] = data2[i-1-lag,:]#np.hstack((data[i,:],data2[i-1-lag,:]))
|
|
1528
|
+
k=k+1
|
|
1529
|
+
return data3
|
|
1530
|
+
def data_transformed_2(data, lag,n_ctrnn,n_samp):
|
|
1531
|
+
import numpy as np
|
|
1532
|
+
import random as random
|
|
1533
|
+
r = random.randint(0,n_ctrnn-lag-1)
|
|
1534
|
+
lag1=lag+1
|
|
1535
|
+
n = data.shape[0]
|
|
1536
|
+
p = data.shape[1]
|
|
1537
|
+
n_new = n_samp
|
|
1538
|
+
p_new = (lag+1)*p
|
|
1539
|
+
y=np.zeros((n_new,p_new))
|
|
1540
|
+
for i in range(r,r+lag+1):
|
|
1541
|
+
for j in range(p):
|
|
1542
|
+
y[:,lag*(j-1)+i-r] = data[i::n_ctrnn,j]
|
|
1543
|
+
return y
|
|
1544
|
+
#causaleff = causaleff_ida(g,dataset2)
|
|
1545
|
+
#g1, causaleff1 = return_finaledges(g,causaleff,lag1,p)
|
|
1546
|
+
#nx.draw(g1,with_labels=True)
|
|
1547
|
+
#%%plot graph
|
|
1548
|
+
def return_relabels(lag,p):
|
|
1549
|
+
lag1=lag+1
|
|
1550
|
+
labels={}#strs = ["" for x in range(lag1*p)]#np.empty(lag1*p,dtype=str)
|
|
1551
|
+
for i in range(p):
|
|
1552
|
+
for j in range(lag1):
|
|
1553
|
+
labels[lag1*i+j]=str(i)+'_'+str(j)
|
|
1554
|
+
return labels
|
|
1555
|
+
|
|
1556
|
+
def orient(g,lag,m):
|
|
1557
|
+
import networkx as nx
|
|
1558
|
+
import numpy as np
|
|
1559
|
+
labels=np.arange(0,(lag+1)*m)
|
|
1560
|
+
|
|
1561
|
+
edge=set([])
|
|
1562
|
+
labelmat=labels.reshape((m,lag+1))
|
|
1563
|
+
for i in range(m):
|
|
1564
|
+
for k in range(m):
|
|
1565
|
+
for j in range(lag+1):
|
|
1566
|
+
for l in range(lag+1):
|
|
1567
|
+
if j<=l:# and j>=l-lag:
|
|
1568
|
+
#if j==l-1:
|
|
1569
|
+
if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
|
|
1570
|
+
edge = edge | {(labelmat[i,j],labelmat[k,l])}
|
|
1571
|
+
g1 = nx.DiGraph()
|
|
1572
|
+
g1.add_nodes_from(g.nodes)
|
|
1573
|
+
g1.add_edges_from(edge)
|
|
1574
|
+
return g1
|
|
1575
|
+
#%%
|
|
1576
|
+
def return_finaledges(g,causaleff,lag,m):
|
|
1577
|
+
import networkx as nx
|
|
1578
|
+
import numpy as np
|
|
1579
|
+
labels=np.arange(0,(lag+1)*m)
|
|
1580
|
+
|
|
1581
|
+
# edge=set([])
|
|
1582
|
+
labelmat=labels.reshape((m,lag+1))
|
|
1583
|
+
causaleff1 = np.zeros((m,m))
|
|
1584
|
+
causaleff2 = np.zeros((m,m))
|
|
1585
|
+
#pval_fin = np.zeros((m,m))
|
|
1586
|
+
edge_mat = np.zeros((m,m))==1
|
|
1587
|
+
# for i in range(m):
|
|
1588
|
+
# for k in range(m):
|
|
1589
|
+
# for j in range(lag+1):
|
|
1590
|
+
# for l in range(lag+1):
|
|
1591
|
+
# if j<=l:# and j>=l-lag:
|
|
1592
|
+
# #if j==l-1:
|
|
1593
|
+
# if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
|
|
1594
|
+
# edge = edge | {(labelmat[i,j],labelmat[k,l])}
|
|
1595
|
+
# g1 = nx.DiGraph()
|
|
1596
|
+
# g1.add_nodes_from(g.nodes)
|
|
1597
|
+
# g1.add_edges_from(edge)
|
|
1598
|
+
g1=g
|
|
1599
|
+
for i in range(m):
|
|
1600
|
+
for k in range(m):
|
|
1601
|
+
acc=[]
|
|
1602
|
+
ansacc=[]
|
|
1603
|
+
for j in range(lag+1):
|
|
1604
|
+
for l in range(lag+1):
|
|
1605
|
+
if j<=l:
|
|
1606
|
+
if (labelmat[i,j],labelmat[k,l]) in g1.edges:
|
|
1607
|
+
#print((labelmat[i,j],labelmat[k,l]))
|
|
1608
|
+
# edge = edge | {(i,k)}
|
|
1609
|
+
edge_mat[i,k]=True
|
|
1610
|
+
#acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1611
|
+
#for lagit in range(1,lag+1):
|
|
1612
|
+
#if labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
|
|
1613
|
+
acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1614
|
+
if labelmat[i,j] in nx.ancestors(g1,labelmat[k,l]):
|
|
1615
|
+
ansacc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1616
|
+
if len(acc)>0:
|
|
1617
|
+
causaleff1[i,k] = np.mean(acc)#acc[iacc]#np.mean(acc)
|
|
1618
|
+
if len(ansacc)>0:
|
|
1619
|
+
causaleff2[i,k] = np.mean(ansacc)
|
|
1620
|
+
return edge_mat.astype(int), causaleff1,causaleff2#, pval_fin
|
|
1621
|
+
|
|
1622
|
+
def return_finaledges_v2(g,causaleff,lag,m):
|
|
1623
|
+
import networkx as nx
|
|
1624
|
+
import numpy as np
|
|
1625
|
+
labels=np.arange(0,lag*m)
|
|
1626
|
+
|
|
1627
|
+
# edge=set([])
|
|
1628
|
+
labelmat=labels.reshape((m,lag))
|
|
1629
|
+
causaleff1 = np.zeros((m,m))
|
|
1630
|
+
causaleff2 = np.zeros((m,m))
|
|
1631
|
+
#pval_fin = np.zeros((m,m))
|
|
1632
|
+
edge_mat = np.zeros((m,m))==1
|
|
1633
|
+
# for i in range(m):
|
|
1634
|
+
# for k in range(m):
|
|
1635
|
+
# for j in range(lag+1):
|
|
1636
|
+
# for l in range(lag+1):
|
|
1637
|
+
# if j<=l:# and j>=l-lag:
|
|
1638
|
+
# #if j==l-1:
|
|
1639
|
+
# if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
|
|
1640
|
+
# edge = edge | {(labelmat[i,j],labelmat[k,l])}
|
|
1641
|
+
# g1 = nx.DiGraph()
|
|
1642
|
+
# g1.add_nodes_from(g.nodes)
|
|
1643
|
+
# g1.add_edges_from(edge)
|
|
1644
|
+
g1=g
|
|
1645
|
+
for i in range(m):
|
|
1646
|
+
for k in range(m):
|
|
1647
|
+
acc=[]
|
|
1648
|
+
ansacc=[]
|
|
1649
|
+
for j in range(lag):
|
|
1650
|
+
for l in range(lag):
|
|
1651
|
+
if j<=l:
|
|
1652
|
+
if (labelmat[i,j],labelmat[k,l]) in g1.edges:
|
|
1653
|
+
#print((labelmat[i,j],labelmat[k,l]))
|
|
1654
|
+
# edge = edge | {(i,k)}
|
|
1655
|
+
edge_mat[i,k]=True
|
|
1656
|
+
#acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1657
|
+
#for lagit in range(1,lag+1):
|
|
1658
|
+
#if labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
|
|
1659
|
+
acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1660
|
+
if labelmat[i,j] in nx.ancestors(g1,labelmat[k,l]):
|
|
1661
|
+
ansacc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1662
|
+
if len(acc)>0:
|
|
1663
|
+
causaleff1[i,k] = np.mean(acc)#acc[iacc]#np.mean(acc)
|
|
1664
|
+
if len(ansacc)>0:
|
|
1665
|
+
causaleff2[i,k] = np.mean(ansacc)
|
|
1666
|
+
return edge_mat.astype(int), causaleff1,causaleff2#, pval_fin
|
|
1667
|
+
|
|
1668
|
+
# def return_finaledges(g,causaleff,lag,m):
|
|
1669
|
+
# import networkx as nx
|
|
1670
|
+
# import numpy as np
|
|
1671
|
+
# labels=np.arange(0,(lag+1)*m)
|
|
1672
|
+
|
|
1673
|
+
# edge=set([])
|
|
1674
|
+
# labelmat=labels.reshape((m,lag+1))
|
|
1675
|
+
# causaleff1 = np.zeros((m,m))
|
|
1676
|
+
# edge_mat = np.zeros((m,m))==1
|
|
1677
|
+
# for i in range(m):
|
|
1678
|
+
# for k in range(m):
|
|
1679
|
+
# acc=[]
|
|
1680
|
+
# for j in range(lag+1):
|
|
1681
|
+
# for l in range(lag+1):
|
|
1682
|
+
# if j<=l:# and j>=l-lag:
|
|
1683
|
+
# #if j==l-1:
|
|
1684
|
+
# if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
|
|
1685
|
+
# #print((labelmat[i,j],labelmat[k,l]))
|
|
1686
|
+
# # edge = edge | {(i,k)}
|
|
1687
|
+
# edge_mat[i,k]=True
|
|
1688
|
+
# #acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1689
|
+
# #for lagit in range(1,lag+1):
|
|
1690
|
+
# #if labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
|
|
1691
|
+
# acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1692
|
+
# if len(acc)>0:
|
|
1693
|
+
# #iacc=np.argmax(np.abs(acc))
|
|
1694
|
+
# causaleff1[i,k] = np.mean(acc)#acc[iacc]#np.mean(acc)
|
|
1695
|
+
# return edge_mat.astype(int), causaleff1
|
|
1696
|
+
|
|
1697
|
+
# def return_finaledges_v2(g,causaleff,lag,m):
|
|
1698
|
+
# import networkx as nx
|
|
1699
|
+
# import numpy as np
|
|
1700
|
+
# labels=np.arange(0,(lag+1)*m)
|
|
1701
|
+
|
|
1702
|
+
# edge=set([])
|
|
1703
|
+
# labelmat=labels.reshape((m,lag+1))
|
|
1704
|
+
# causaleff1 = np.zeros((m,m))
|
|
1705
|
+
# edge_mat = np.zeros((m,m))==1
|
|
1706
|
+
# for i in range(m):
|
|
1707
|
+
# for k in range(m):
|
|
1708
|
+
# acc=[]
|
|
1709
|
+
# for j in range(lag+1):
|
|
1710
|
+
# for l in range(lag+1):
|
|
1711
|
+
# if j<=l:# and j>=l-lag:
|
|
1712
|
+
# #if j==l-1:
|
|
1713
|
+
# if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
|
|
1714
|
+
# #print((labelmat[i,j],labelmat[k,l]))
|
|
1715
|
+
# # edge = edge | {(i,k)}
|
|
1716
|
+
# edge_mat[i,k]=True
|
|
1717
|
+
# #acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1718
|
+
# #for lagit in range(1,lag+1):
|
|
1719
|
+
# #if labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
|
|
1720
|
+
# acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1721
|
+
# if len(acc)>0:
|
|
1722
|
+
# #iacc=np.argmax(np.abs(acc))
|
|
1723
|
+
# causaleff1[i,k] = np.mean(acc)#acc[iacc]#np.mean(acc)
|
|
1724
|
+
# #g1=nx.DiGraph()
|
|
1725
|
+
# #g1.add_nodes_from(range(m))
|
|
1726
|
+
# #g1.add_edges_from(edge)
|
|
1727
|
+
# #g1.graph['edge'] = {'arrowsize': '0.6', 'splines': 'curved'}
|
|
1728
|
+
# #g1.graph['graph'] = {'scale': '3'}
|
|
1729
|
+
|
|
1730
|
+
# return edge_mat.astype(int), causaleff1
|
|
1731
|
+
|
|
1732
|
+
def return_finaledges_fin(g,causaleff,p):
|
|
1733
|
+
import networkx as nx
|
|
1734
|
+
import numpy as np
|
|
1735
|
+
causaleff1 = np.zeros(p)
|
|
1736
|
+
edge_mat = np.zeros(p)==1
|
|
1737
|
+
for k in range(p):
|
|
1738
|
+
#for k in range(p):
|
|
1739
|
+
if (k+1,0) in g.edges:#(k,i) in g.edges or
|
|
1740
|
+
edge_mat[k]=True
|
|
1741
|
+
causaleff1[k]=causaleff[k+1,0]#causaleff[k,i]*int((k,i) in g.edges) +
|
|
1742
|
+
return edge_mat.astype(int), causaleff1
|
|
1743
|
+
|
|
1744
|
+
# def return_finaledges(g,causaleff,lag,m):
|
|
1745
|
+
# import networkx as nx
|
|
1746
|
+
# import numpy as np
|
|
1747
|
+
# labels=np.arange(0,(lag+1)*m)
|
|
1748
|
+
|
|
1749
|
+
# edge=set([])
|
|
1750
|
+
# labelmat=labels.reshape((m,lag+1))
|
|
1751
|
+
# causaleff1 = np.zeros((m,m))
|
|
1752
|
+
# for i in range(m):
|
|
1753
|
+
# for k in range(m):
|
|
1754
|
+
# acc=[]
|
|
1755
|
+
# for j in range(lag+1):
|
|
1756
|
+
# for l in range(lag+1):
|
|
1757
|
+
# if j<=l:
|
|
1758
|
+
# #if j==l-1:
|
|
1759
|
+
# if (labelmat[i,j],labelmat[k,l]) in g.edges:
|
|
1760
|
+
# #print((labelmat[i,j],labelmat[k,l]))
|
|
1761
|
+
# edge = edge | {(i,k)}
|
|
1762
|
+
# #acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1763
|
+
# if j==l-lag and labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
|
|
1764
|
+
# acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
|
|
1765
|
+
# if len(acc)>0:
|
|
1766
|
+
# causaleff1[i,k] = np.mean(acc)
|
|
1767
|
+
|
|
1768
|
+
# g1=nx.DiGraph()
|
|
1769
|
+
# g1.add_nodes_from(range(m))
|
|
1770
|
+
# g1.add_edges_from(edge)
|
|
1771
|
+
# #g1.graph['edge'] = {'arrowsize': '0.6', 'splines': 'curved'}
|
|
1772
|
+
# #g1.graph['graph'] = {'scale': '3'}
|
|
1773
|
+
# return g1, causaleff1
|
|
1774
|
+
|
|
1775
|
+
def find_lag(data):
|
|
1776
|
+
p=data.shape[1]
|
|
1777
|
+
n=data.shape[0]
|
|
1778
|
+
valuelag= np.zeros(int(n/2))
|
|
1779
|
+
for lag in range(1,int(n/2)):
|
|
1780
|
+
vecnorm = np.zeros(n)
|
|
1781
|
+
for i in range(n-lag):
|
|
1782
|
+
vecnorm[i]=np.linalg.norm(data[i,:]-data[(i+lag),:])
|
|
1783
|
+
valuelag[lag]=np.mean(vecnorm)
|
|
1784
|
+
#np.where(valuelag>=np.quantile(valuelag,0.1))[0][0]
|
|
1785
|
+
return valuelag
|
|
1786
|
+
# %%
|
|
1787
|
+
def causaleffin(G,data_trans,lag):
|
|
1788
|
+
import numpy as np
|
|
1789
|
+
Nodes = list(G.nodes)
|
|
1790
|
+
m = len(Nodes)
|
|
1791
|
+
causaleff=np.zeros(m,m)
|
|
1792
|
+
causaleff2=np.zeros(m,m)
|
|
1793
|
+
labels=np.arange(0,(lag+1)*m)
|
|
1794
|
+
labelmat=labels.reshape((m,lag+1))
|
|
1795
|
+
|
|
1796
|
+
for x1 in Nodes:
|
|
1797
|
+
for y1 in Nodes:
|
|
1798
|
+
if x1!=y1 and (x1,y1) in G.edges:#list(nx.ancestors(g,y)):#list(g.predecessors(y)):#list(nx.ancestors(g,y)):
|
|
1799
|
+
for i in range(lag+1):
|
|
1800
|
+
for j in range(i,lag+1):
|
|
1801
|
+
#if (x,y) in Edges:
|
|
1802
|
+
#if y not in list(g.predecessors(x)):
|
|
1803
|
+
#if x in list(nx.ancestors(g,y)):
|
|
1804
|
+
#lm = linear_model.LinearRegression()
|
|
1805
|
+
x = labelmat[x1,i]
|
|
1806
|
+
y = labelmat[y1,j]
|
|
1807
|
+
pa_x = list(g.predecessors(x))
|
|
1808
|
+
pa_y = list(g.predecessors(y))
|
|
1809
|
+
if x not in pa_x:
|
|
1810
|
+
regressors = pa_x + [x]
|
|
1811
|
+
else:
|
|
1812
|
+
regressors = pa_x
|
|
1813
|
+
if y in pa_x:
|
|
1814
|
+
causaleff[x,y] = 0
|
|
1815
|
+
else:
|
|
1816
|
+
X=np.asarray(data[:,regressors])
|
|
1817
|
+
Y=np.asarray(data[:,y])
|
|
1818
|
+
X0=np.hstack((np.ones((X.shape[0],1)),X))
|
|
1819
|
+
lm_out = np.linalg.lstsq(X0,Y,rcond=None)[0]
|
|
1820
|
+
causaleff[x,y] = lm_out[regressors.index(x)+1]
|
|
1821
|
+
return causaleff
|