timeawarepc 1.2.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
timeawarepc/pcalg.py ADDED
@@ -0,0 +1,1821 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+
4
+ """A graph generator based on the PC algorithm [Kalisch2007].
5
+
6
+ [Kalisch2007] Markus Kalisch and Peter Bhlmann. Estimating
7
+ high-dimensional directed acyclic graphs with the pc-algorithm. In The
8
+ Journal of Machine Learning Research, Vol. 8, pp. 613-636, 2007.
9
+
10
+ License: BSD
11
+ """
12
+
13
+ from __future__ import print_function
14
+
15
+ from itertools import combinations, permutations
16
+ import logging
17
+ import numpy as np
18
+ import networkx as nx
19
+
20
+ _logger = logging.getLogger(__name__)
21
+
22
+ def _create_complete_graph(node_ids):
23
+ """Create a complete graph from the list of node ids.
24
+
25
+ Args:
26
+ node_ids: a list of node ids
27
+
28
+ Returns:
29
+ An undirected graph (as a networkx.Graph)
30
+ """
31
+ g = nx.Graph()
32
+ g.add_nodes_from(node_ids)
33
+ for (i, j) in combinations(node_ids, 2):
34
+ g.add_edge(i, j)
35
+ return g
36
+
37
+ def estimate_skeleton(indep_test_func, data_matrix, alpha, **kwargs):
38
+ """Estimate a skeleton graph from the statistis information.
39
+
40
+ Args:
41
+ indep_test_func: the function name for a conditional
42
+ independency test.
43
+ data_matrix: data (as a numpy array).
44
+ alpha: the significance level.
45
+ kwargs:
46
+ 'max_reach': maximum value of l (see the code). The
47
+ value depends on the underlying distribution.
48
+ 'method': if 'stable' given, use stable-PC algorithm
49
+ (see [Colombo2014]).
50
+ 'init_graph': initial structure of skeleton graph
51
+ (as a networkx.Graph). If not specified,
52
+ a complete graph is used.
53
+ other parameters may be passed depending on the
54
+ indep_test_func()s.
55
+ Returns:
56
+ g: a skeleton graph (as a networkx.Graph).
57
+ sep_set: a separation set (as an 2D-array of set()).
58
+
59
+ [Colombo2014] Diego Colombo and Marloes H Maathuis. Order-independent
60
+ constraint-based causal structure learning. In The Journal of Machine
61
+ Learning Research, Vol. 15, pp. 3741-3782, 2014.
62
+ """
63
+
64
+ def method_stable(kwargs):
65
+ return ('method' in kwargs) and kwargs['method'] == "stable"
66
+
67
+ node_ids = range(data_matrix.shape[1])
68
+ node_size = data_matrix.shape[1]
69
+ sep_set = [[set() for i in range(node_size)] for j in range(node_size)]
70
+ if 'init_graph' in kwargs:
71
+ g = kwargs['init_graph']
72
+ if not isinstance(g, nx.Graph):
73
+ raise ValueError
74
+ elif not g.number_of_nodes() == len(node_ids):
75
+ raise ValueError('init_graph not matching data_matrix shape')
76
+ for (i, j) in combinations(node_ids, 2):
77
+ if (not g.has_edge(i, j)):
78
+ sep_set[i][j] = None
79
+ sep_set[j][i] = None
80
+ else:
81
+ g = _create_complete_graph(node_ids)
82
+
83
+ l = 0
84
+ while True:
85
+ cont = False
86
+ remove_edges = []
87
+ for (i, j) in permutations(node_ids, 2):
88
+ adj_i = list(g.neighbors(i))
89
+ if j not in adj_i:
90
+ continue
91
+ else:
92
+ adj_i.remove(j)
93
+ #if g.has_edge(i,j):
94
+ if len(adj_i) >= l:
95
+ _logger.debug('testing %s and %s' % (i,j))
96
+ _logger.debug('neighbors of %s are %s' % (i, str(adj_i)))
97
+ if len(adj_i) < l:
98
+ continue
99
+ for k in combinations(adj_i, l):
100
+ _logger.debug('indep prob of %s and %s with subset %s'
101
+ % (i, j, str(k)))
102
+ p_val = indep_test_func(data_matrix, i, j, set(k),
103
+ **kwargs)
104
+ _logger.debug('p_val is %s' % str(p_val))
105
+ if p_val > alpha:
106
+ if g.has_edge(i, j):
107
+ _logger.debug('p: remove edge (%s, %s)' % (i, j))
108
+ if method_stable(kwargs):
109
+ remove_edges.append((i, j))
110
+ else:
111
+ g.remove_edge(i, j)
112
+ sep_set[i][j] |= set(k)
113
+ sep_set[j][i] |= set(k)
114
+ break
115
+ cont = True
116
+ l += 1
117
+ if method_stable(kwargs):
118
+ g.remove_edges_from(remove_edges)
119
+ if cont is False:
120
+ break
121
+ if ('max_reach' in kwargs) and (l > kwargs['max_reach']):
122
+ break
123
+
124
+ return (g, sep_set)
125
+
126
+ def estimate_cpdag(skel_graph, sep_set):
127
+ """Estimate a CPDAG from the skeleton graph and separation sets
128
+ returned by the estimate_skeleton() function.
129
+
130
+ Args:
131
+ skel_graph: A skeleton graph (an undirected networkx.Graph).
132
+ sep_set: An 2D-array of separation set.
133
+ The contents look like something like below.
134
+ sep_set[i][j] = set([k, l, m])
135
+
136
+ Returns:
137
+ An estimated DAG.
138
+ """
139
+ dag = skel_graph.to_directed()
140
+ node_ids = skel_graph.nodes()
141
+ for (i, j) in combinations(node_ids, 2):
142
+ adj_i = set(dag.successors(i))
143
+ if j in adj_i:
144
+ continue
145
+ adj_j = set(dag.successors(j))
146
+ if i in adj_j:
147
+ continue
148
+ if sep_set[i][j] is None:
149
+ continue
150
+ common_k = adj_i & adj_j
151
+ for k in common_k:
152
+ if k not in sep_set[i][j]:
153
+ if dag.has_edge(k, i):
154
+ _logger.debug('S: remove edge (%s, %s)' % (k, i))
155
+ dag.remove_edge(k, i)
156
+ if dag.has_edge(k, j):
157
+ _logger.debug('S: remove edge (%s, %s)' % (k, j))
158
+ dag.remove_edge(k, j)
159
+
160
+ def _has_both_edges(dag, i, j):
161
+ return dag.has_edge(i, j) and dag.has_edge(j, i)
162
+
163
+ def _has_any_edge(dag, i, j):
164
+ return dag.has_edge(i, j) or dag.has_edge(j, i)
165
+
166
+ def _has_one_edge(dag, i, j):
167
+ return ((dag.has_edge(i, j) and (not dag.has_edge(j, i))) or
168
+ (not dag.has_edge(i, j)) and dag.has_edge(j, i))
169
+
170
+ def _has_no_edge(dag, i, j):
171
+ return (not dag.has_edge(i, j)) and (not dag.has_edge(j, i))
172
+
173
+ # For all the combination of nodes i and j, apply the following
174
+ # rules.
175
+ old_dag = dag.copy()
176
+ while True:
177
+ for (i, j) in combinations(node_ids, 2):
178
+ # Rule 1: Orient i-j into i->j whenever there is an arrow k->i
179
+ # such that k and j are nonadjacent.
180
+ #
181
+ # Check if i-j.
182
+ if _has_both_edges(dag, i, j):
183
+ # Look all the predecessors of i.
184
+ for k in dag.predecessors(i):
185
+ # Skip if there is an arrow i->k.
186
+ if dag.has_edge(i, k):
187
+ continue
188
+ # Skip if k and j are adjacent.
189
+ if _has_any_edge(dag, k, j):
190
+ continue
191
+ # Make i-j into i->j
192
+ _logger.debug('R1: remove edge (%s, %s)' % (j, i))
193
+ dag.remove_edge(j, i)
194
+ break
195
+
196
+ # Rule 2: Orient i-j into i->j whenever there is a chain
197
+ # i->k->j.
198
+ #
199
+ # Check if i-j.
200
+ if _has_both_edges(dag, i, j):
201
+ # Find nodes k where k is i->k.
202
+ succs_i = set()
203
+ for k in dag.successors(i):
204
+ if not dag.has_edge(k, i):
205
+ succs_i.add(k)
206
+ # Find nodes j where j is k->j.
207
+ preds_j = set()
208
+ for k in dag.predecessors(j):
209
+ if not dag.has_edge(j, k):
210
+ preds_j.add(k)
211
+ # Check if there is any node k where i->k->j.
212
+ if len(succs_i & preds_j) > 0:
213
+ # Make i-j into i->j
214
+ _logger.debug('R2: remove edge (%s, %s)' % (j, i))
215
+ dag.remove_edge(j, i)
216
+
217
+ # Rule 3: Orient i-j into i->j whenever there are two chains
218
+ # i-k->j and i-l->j such that k and l are nonadjacent.
219
+ #
220
+ # Check if i-j.
221
+ if _has_both_edges(dag, i, j):
222
+ # Find nodes k where i-k.
223
+ adj_i = set()
224
+ for k in dag.successors(i):
225
+ if dag.has_edge(k, i):
226
+ adj_i.add(k)
227
+ # For all the pairs of nodes in adj_i,
228
+ for (k, l) in combinations(adj_i, 2):
229
+ # Skip if k and l are adjacent.
230
+ if _has_any_edge(dag, k, l):
231
+ continue
232
+ # Skip if not k->j.
233
+ if dag.has_edge(j, k) or (not dag.has_edge(k, j)):
234
+ continue
235
+ # Skip if not l->j.
236
+ if dag.has_edge(j, l) or (not dag.has_edge(l, j)):
237
+ continue
238
+ # Make i-j into i->j.
239
+ _logger.debug('R3: remove edge (%s, %s)' % (j, i))
240
+ dag.remove_edge(j, i)
241
+ break
242
+
243
+ # Rule 4: Orient i-j into i->j whenever there are two chains
244
+ # i-k->l and k->l->j such that k and j are nonadjacent.
245
+ #
246
+ # However, this rule is not necessary when the PC-algorithm
247
+ # is used to estimate a DAG.
248
+
249
+ if nx.is_isomorphic(dag, old_dag):
250
+ break
251
+ old_dag = dag.copy()
252
+
253
+ return dag
254
+ # def pre_whiten(data,S=None):
255
+ # import numpy as np
256
+ # from scipy import stats, linalg
257
+ # import GPy
258
+ # import GPyOpt
259
+ # import seaborn as sns
260
+ # S=np.tile(np.arange(data.shape[0]),reps=[data.shape[1],1]).T
261
+ # sigma_f, l = 1.5, 2
262
+ # kernel = GPy.kern.RBF(1, sigma_f, l)
263
+ # p = data.shape[1]
264
+ # #model = GPy.models.GPRegression(X,y,kernel)
265
+ # #r = ndw_corr(A,B,S,data)
266
+ # #kernel = RBF(0.1, (10,10))
267
+ # #gp = gpr(kernel=kernel, n_restarts_optimizer=100, alpha = 0.04)
268
+ # #gp.fit(X,y)
269
+ # r=np.zeros(data.shape)
270
+ # for A in range(p):
271
+ # print((A,p))
272
+ # model_A = GPy.models.GPRegression(S[:,A].reshape((-1,1)),data[:,A].reshape((-1,1)),kernel)#
273
+ # model_A.optimize()#_restarts(num_restarts=20)
274
+ # r[:,A] = data[:,A] - model_A.predict(S[:,A].reshape((-1,1)))[0].reshape(data[:,A].shape)#model_A.predict(S[:,A].reshape(-1,1))
275
+ # return r
276
+
277
+ # def ci_test_gp(data,A,B,C,**kwargs):
278
+ # #use pre-whitened data in data.
279
+ # import numpy as np
280
+ # from scipy import stats, linalg
281
+ # import GPy
282
+ # import GPyOpt
283
+ # import seaborn as sns
284
+ # from HSIC import hsic_gam
285
+ # S=np.tile(np.arange(data.shape[0]),reps=[data.shape[1],1]).T
286
+ # sigma_f, l = 1.5, 2
287
+ # kernel = GPy.kern.RBF(1, sigma_f, l)
288
+ # p = data.shape[1]
289
+ # #model = GPy.models.GPRegression(X,y,kernel)
290
+ # C2 = np.zeros(p, dtype=np.bool)
291
+ # for i in range(p):
292
+ # if i in C:
293
+ # C2[i]=True
294
+ # #r = ndw_corr(A,B,S,data)
295
+
296
+ # # if whiten == True:
297
+ # # r = pre_whiten(data)
298
+ # # else:
299
+ # # r=data
300
+ # #print(r)
301
+ # # if r==1:
302
+ # # pval = 0
303
+ # # else:
304
+ # # z = 0.5 * np.log((1+r)/(1-r))
305
+ # # T = np.sqrt(data.shape[0]-len(S)-3)*np.abs(z)
306
+ # # pval = 2*(1 - stats.norm.cdf(T))
307
+ # if len(C) != 0:
308
+ # if len(C)>1:
309
+ # model_A = GPy.models.GPRegression(data[:,C2],data[:,A].reshape((-1,1)),kernel)#
310
+ # model_B = GPy.models.GPRegression(data[:,C2],data[:,B].reshape((-1,1)),kernel)
311
+ # if len(C)==1:
312
+ # model_A = GPy.models.GPRegression(data[:,C2].reshape((-1,1)),data[:,A].reshape((-1,1)),kernel)
313
+ # model_B = GPy.models.GPRegression(data[:,C2].reshape((-1,1)),data[:,B].reshape((-1,1)),kernel)
314
+ # model_A.optimize()#_restarts(num_restarts=20,verbose=False);
315
+ # model_B.optimize()
316
+ # rA = data[:,A] - model_A.predict(data[:,C2])[0].reshape(data[:,A].shape)#model_A.predict(S[:,A].reshape(-1,1))
317
+ # rB = data[:,B] - model_B.predict(data[:,C2])[0].reshape(data[:,B].shape)#model_A.predict(S[:,A].reshape(-1,1))
318
+ # else:
319
+ # rA = data[:,A]
320
+ # rB = data[:,B]
321
+ # pval = hsic_gam(rA.reshape((-1,1)),rB.reshape((-1,1)))
322
+ # return pval
323
+
324
+ def ci_test_gauss(data,A,B,S,**kwargs):
325
+ import numpy as np
326
+ from scipy import stats, linalg
327
+ r = partial_corr(A,B,S,data)
328
+ #print(r)
329
+ if r==1:
330
+ pval = 0
331
+ else:
332
+ z = 0.5 * np.log((1+r)/(1-r))
333
+ T = np.sqrt(data.shape[0]-len(S)-3)*np.abs(z)
334
+ pval = 2*(1 - stats.norm.cdf(T))
335
+ return pval
336
+ def ci_test_gauss_btp(data,A,B,S,**kwargs):
337
+ import numpy as np
338
+ from scipy import stats, linalg
339
+ from arch import bootstrap
340
+ #import stationarybootstrap as SBB
341
+ from numpy.random import RandomState
342
+ r = partial_corr(A,B,S,data)
343
+ #print(r)
344
+ if r==1:
345
+ pval = 0
346
+ else:
347
+ z = 0.5 * np.log((1+r)/(1-r))
348
+ T = np.abs(z)
349
+ band = bootstrap.optimal_block_length(data)
350
+ n = data.shape[0]
351
+ p = data.shape[1]
352
+ nbtp = 50
353
+ Tbtp = np.zeros(nbtp)
354
+ idx=0
355
+ bs = bootstrap.StationaryBootstrap(np.median(band.iloc[:,0]),data)
356
+ #bs = bootstrap.StationaryBootstrap(50,data)
357
+ #bs = bootstrap.CircularBlockBootstrap(50,data)
358
+ #for data1 in bs.bootstrap(nbtp):
359
+ #ystar, yindices, yindicedict = SBB.resample(data, 0.04)
360
+ #for idx1 in range(50):
361
+ # data1, yindices, yindicedict = SBB.resample(data, 0.04)# = ystar[idx1,:,:]
362
+ for data1 in bs.bootstrap(nbtp):
363
+ rbtp = partial_corr(A,B,S,data1[0][0])
364
+ zbtp = 0.5 * np.log((1+rbtp)/(1-rbtp))
365
+ Tbtp[idx] = np.abs(zbtp)
366
+ idx=idx+1
367
+ # blower = np.quantile(Tbtp,alpha/2)-T
368
+ # bupper = np.quantile(Tbtp,1-alpha/2)-T
369
+ # T-bupper
370
+ # T+blower
371
+ # 2*T-np.quantile(Tbtp,1-alpha/2)
372
+ pval = np.sum(Tbtp>2*T)/nbtp
373
+ #print(pval)
374
+ return pval
375
+ def hsic_condind(data,A,B,S,**kwargs):
376
+ import pandas as pd
377
+ from hsiccondTestIC import hsic_CI
378
+ #from pcalg import estimate_skeleton, estimate_cpdag, causaleff_ida, ci_test_gauss
379
+ if len(S) == 0:
380
+ X=data[:,A]
381
+ Y=data[:,B]
382
+ pval=hsic_CI(X,Y)
383
+ # kpc(**{'suffStat' : rlc.TaggedList((df,"hsic.perm"),tags=('data','ic.method')),
384
+ # 'indepTest' : kpcalg.kernelCItest,
385
+ # 'alpha' : alpha,
386
+ # 'labels' : data_trans_pd.columns.astype(str),
387
+ # 'u2pd' : "relaxed",
388
+ # 'skel.method' : "stable",
389
+ # #'fixedGaps' : fixedgaps_r,
390
+ # 'verbose' : robjects.r('F')})
391
+ else:
392
+ p = data.shape[1]
393
+ idx = np.zeros(p, dtype=bool)
394
+ for i in range(p):
395
+ if i in S:
396
+ idx[i]=True
397
+ X=data[:,A]
398
+ Y=data[:,B]
399
+ Z=data[:,idx]
400
+ pval = hsic_CI(X,Y,Z)
401
+ return pval
402
+ # if len(S) == 0:
403
+ # sig,pval,T=hsiccondTestIC(data[:,A],data[:,B])
404
+ # else:
405
+ # p = data.shape[1]
406
+ # idx = np.zeros(p, dtype=np.bool)
407
+ # for i in range(p):
408
+ # if i in S:
409
+ # idx[i]=True
410
+ # sig,pval,T=hsiccondTestIC(data[:,A],data[:,B],data[:,idx])
411
+
412
+
413
+ def partial_corr(A,B,S,data):
414
+ import numpy as np
415
+ from scipy import stats, linalg
416
+ p = data.shape[1]
417
+ idx = np.zeros(p, dtype=bool)
418
+
419
+ for i in range(p):
420
+ if i in S:
421
+ idx[i]=True
422
+ C=data
423
+ beta_A = linalg.lstsq(C[:,idx], C[:,A])[0]
424
+ beta_B = linalg.lstsq(C[:,idx], C[:,B])[0]
425
+
426
+ res_A = C[:,A] - C[:, idx].dot(beta_A)
427
+ res_B = C[:,B] - C[:, idx].dot(beta_B)
428
+
429
+ p_corr = stats.pearsonr(res_A, res_B)[0]
430
+
431
+ return p_corr
432
+ if __name__ == '__main__':
433
+ import networkx as nx
434
+ import numpy as np
435
+
436
+ from gsq.ci_tests import ci_test_bin, ci_test_dis
437
+ from gsq.gsq_testdata import bin_data, dis_data
438
+
439
+ # ch = logging.StreamHandler()
440
+ # ch.setLevel(logging.DEBUG)
441
+ # _logger.setLevel(logging.DEBUG)
442
+ # _logger.addHandler(ch)
443
+
444
+ dm = np.array(bin_data).reshape((5000, 5))
445
+ (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_bin,
446
+ data_matrix=dm,
447
+ alpha=0.01)
448
+ g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
449
+ g_answer = nx.DiGraph()
450
+ g_answer.add_nodes_from([0, 1, 2, 3, 4])
451
+ g_answer.add_edges_from([(0, 1), (2, 3), (3, 2), (3, 1),
452
+ (2, 4), (4, 2), (4, 1)])
453
+ print('Edges are:', g.edges(), end='')
454
+ if nx.is_isomorphic(g, g_answer):
455
+ print(' => GOOD')
456
+ else:
457
+ print(' => WRONG')
458
+ print('True edges should be:', g_answer.edges())
459
+
460
+ dm = np.array(dis_data).reshape((10000, 5))
461
+ (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_dis,
462
+ data_matrix=dm,
463
+ alpha=0.01,
464
+ levels=[3,2,3,4,2])
465
+ g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
466
+ g_answer = nx.DiGraph()
467
+ g_answer.add_nodes_from([0, 1, 2, 3, 4])
468
+ g_answer.add_edges_from([(0, 2), (1, 2), (1, 3), (4, 3)])
469
+ print('Edges are:', g.edges(), end='')
470
+ if nx.is_isomorphic(g, g_answer):
471
+ print(' => GOOD')
472
+ else:
473
+ print(' => WRONG')
474
+ print('True edges should be:', g_answer.edges())
475
+
476
+ dm1 = np.random.normal(0,1,1000)
477
+ dm2 = np.random.normal(0,1,1000)
478
+ dm3 = dm1 + 0.5*dm2 + np.random.normal(0,1,1000)
479
+ dm4 = dm3 + np.random.normal(0,1,1000)
480
+
481
+ data=np.column_stack((dm1,dm2,dm3,dm4))
482
+ data -= data.mean(axis=0)
483
+ data /= data.std(axis=0)
484
+ # (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
485
+ # data_matrix=data,
486
+ # alpha=0.01,
487
+ # method='stable')
488
+ (g, sep_set) = estimate_skeleton(indep_test_func=hsic_condind,
489
+ data_matrix=data,
490
+ alpha=0.01,
491
+ method='stable')
492
+ g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
493
+ g_answer = nx.DiGraph()
494
+ g_answer.add_nodes_from([0, 1, 2, 3])
495
+ g_answer.add_edges_from([(0, 2), (1, 2), (2, 3)])
496
+ print('Edges are:', g.edges(), end='')
497
+ if nx.is_isomorphic(g, g_answer):
498
+ print(' => GOOD')
499
+ else:
500
+ print(' => WRONG')
501
+ print('True edges should be:', g_answer.edges())
502
+ #%%
503
+ # def cmiknn_indeptest(data,A,B,S,**kwargs):
504
+ # from tigramite.independence_tests import CMIknn
505
+ # cmi_knn = CMIknn(significance='shuffle_test', knn=0.1, shuffle_neighbors=5, transform='ranks')
506
+ # if len(S)==0:
507
+ # data_x = data[:,A]
508
+ # data_y = data[:,B]
509
+ # arr = np.row_stack((data_x,data_y))
510
+ # xyz = np.array([0,1])
511
+ # val = cmi_knn.get_dependence_measure(arr,xyz)
512
+ # p = cmi_knn.get_shuffle_significance(arr,xyz,val)
513
+ # else:
514
+ # data_x = data[:,A]
515
+ # data_y = data[:,B]
516
+ # print(S)
517
+ # data_z = data[:,list(S)].T
518
+ # arr = np.row_stack((data_x,data_y,data_z))
519
+ # xyz = np.array([0,1]+[2]*data_z.shape[0])
520
+ # val = cmi_knn.get_dependence_measure(arr,xyz)
521
+ # p = cmi_knn.get_shuffle_significance(arr,xyz,val)
522
+ # return p
523
+ # #%%
524
+ # dm1 = np.random.normal(0,1,1000)
525
+ # dm2 = np.random.normal(0,1,1000)
526
+ # dm3 = dm1 + 0.5*dm2 + np.random.normal(0,1,1000)
527
+ # dm4 = dm3 + np.random.normal(0,1,1000)
528
+
529
+ # dm=np.column_stack((dm1,dm2,dm3,dm4))
530
+ # (g, sep_set) = estimate_skeleton(indep_test_func=cmiknn_indeptest,
531
+ # data_matrix=dm,
532
+ # alpha=0.01,
533
+ # method='stable')
534
+ # g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
535
+ # g_answer = nx.DiGraph()
536
+ # g_answer.add_nodes_from([0, 1, 2, 3])
537
+ # g_answer.add_edges_from([(0, 2), (1, 2), (2, 3)])
538
+ # print('Edges are:', g.edges(), end='')
539
+ # if nx.is_isomorphic(g, g_answer):
540
+ # print(' => GOOD')
541
+ # else:
542
+ # print(' => WRONG')
543
+ # print('True edges should be:', g_answer.edges())
544
+ #%%
545
+ def causaleff_parcorr(g,data):
546
+ import numpy as np
547
+ Edges = list(g.edges)
548
+ Nodes = list(g.nodes)
549
+ causaleff=np.zeros((len(Nodes),len(Nodes)))
550
+ for x in Nodes:
551
+ for y in Nodes:
552
+ if x !=y:
553
+ S= [elem for elem in Nodes if elem not in [x,y]]
554
+ causaleff[x,y] = partial_corr(x,y,S,data)
555
+ return causaleff
556
+ def causaleff_ida_single(g,data,x,y):
557
+ from sklearn import linear_model
558
+ import numpy as np
559
+ Nodes = list(g.nodes)
560
+ if x in Nodes and y in Nodes:
561
+ if x!=y and x in list(nx.ancestors(g,y)):
562
+ lm = linear_model.LinearRegression()
563
+ pa_x = list(g.predecessors(x))
564
+ pa_y = list(g.predecessors(y))
565
+ if x not in pa_x:
566
+ regressors = pa_x + [x]
567
+ else:
568
+ regressors = pa_x
569
+ if y in pa_x:
570
+ causaleff = 0
571
+ else:
572
+ #if len(regressors)>1:
573
+ X=data[:,regressors]
574
+ #else:
575
+ # X=data[:,regressors].reshape(-1,1)
576
+ Y=data[:,y]
577
+ lm_out = lm.fit(X,Y)
578
+ causaleff = lm_out.coef_[regressors.index(x)]#lm_out.coef_[0]#lm_out.coef_[regressors.index(x)]#
579
+ return causaleff
580
+ # def causaleff_ida(g,data):
581
+ # from sklearn import linear_model
582
+ # import numpy as np
583
+ # #from gen_data_fns import sigmoid, relu
584
+ # #Edges = list(g.edges)
585
+ # Nodes = list(g.nodes)
586
+ # causaleff=np.zeros((len(Nodes),len(Nodes)))
587
+ # # if transformed == True and lag is None:
588
+ # # print("Please provide lag used in transformation")
589
+ # # elif transformed == True and lag is not None:
590
+ # # h=np.repeat(np.arange(0,len(Nodes)),lag)
591
+ # # if activation == 'centred-sigmoid':
592
+ # # activationfn = lambda x: sigmoid(x) - 0.5
593
+ # # elif activation == 'tanh':
594
+ # # activationfn = lambda x: np.tanh(x)
595
+ # # elif activation == 'linear':
596
+ # # activationfn = lambda x: x
597
+ # # elif activation == 'relu':
598
+ # # activationfn = lambda x: relu(x)
599
+ # for x in Nodes:
600
+ # for y in Nodes:
601
+ # if x!=y and x in list(nx.ancestors(g,y)):#list(g.predecessors(y)):#list(nx.ancestors(g,y)):
602
+ # #if (x,y) in Edges:
603
+ # #if y not in list(g.predecessors(x)):
604
+ # #if x in list(nx.ancestors(g,y)):
605
+ # lm = linear_model.LinearRegression()
606
+ # pa_x = list(g.predecessors(x))
607
+ # pa_y = list(g.predecessors(y))
608
+ # if x not in pa_x:
609
+ # regressors = pa_x + [x]
610
+ # else:
611
+ # regressors = pa_x
612
+ # if y in pa_x:
613
+ # causaleff[x,y] = 0
614
+ # else:
615
+ # # if y in pa_y:
616
+ # # pa_y = pa_y.pop(y)
617
+ # # if x not in pa_y:
618
+ # # regressors = pa_y + [x]
619
+ # # else:
620
+ # # regressors = pa_y
621
+ # # regressors=[x] + pa_x
622
+ # #if x in pa_x:
623
+ # # print("x in pa_x")
624
+ # X=data[:,regressors]#.reshape(-1,1)
625
+ # # if transformed == True and lag is not None:
626
+ # # for iter in range(X.shape[1]):
627
+ # # if h(regressors[iter]) != h(y):
628
+ # # X[iter] = activationfn(X[iter])
629
+ # Y=data[:,y]
630
+ # lm_out = lm.fit(X,Y)
631
+ # causaleff[x,y] = lm_out.coef_[regressors.index(x)]#lm_out.coef_[0]#lm_out.coef_[regressors.index(x)]#
632
+ # # if causaleff[x,y]>0:
633
+ # # causaleff[x,y] = np.log(causaleff[x,y]+1)
634
+ # # else:
635
+ # # causaleff[x,y] = -np.log(-causaleff[x,y]+1)
636
+ # return causaleff
637
+ def causaleff_ida(g,data):
638
+ #from sklearn import linear_model
639
+ #import statsmodels.api as sm
640
+ import numpy as np
641
+ #from gen_data_fns import sigmoid, relu
642
+ #Edges = list(g.edges)
643
+ Nodes = list(g.nodes)
644
+ causaleff=np.zeros((len(Nodes),len(Nodes)))
645
+
646
+ for x in Nodes:
647
+ for y in Nodes:
648
+ if x!=y:# and x in list(nx.ancestors(g,y)):#list(g.predecessors(y)):#list(nx.ancestors(g,y)):
649
+ #if (x,y) in Edges:
650
+ #if y not in list(g.predecessors(x)):
651
+ #if x in list(nx.ancestors(g,y)):
652
+ #lm = linear_model.LinearRegression()
653
+ pa_x = list(g.predecessors(x))
654
+ pa_y = list(g.predecessors(y))
655
+ if x not in pa_x:
656
+ regressors = pa_x + [x]
657
+ else:
658
+ regressors = pa_x
659
+ if y in pa_x:
660
+ causaleff[x,y] = 0
661
+ else:
662
+ X=np.asarray(data[:,regressors])
663
+ Y=np.asarray(data[:,y])
664
+ X0=np.hstack((np.ones((X.shape[0],1)),X))
665
+ lm_out = np.linalg.lstsq(X0,Y,rcond=None)[0]
666
+ causaleff[x,y] = lm_out[regressors.index(x)+1]
667
+ return causaleff
668
+ def causaleff_ida_fin(g,data):
669
+ #from sklearn import linear_model
670
+ #import statsmodels.api as sm
671
+ import numpy as np
672
+ #from gen_data_fns import sigmoid, relu
673
+ #Edges = list(g.edges)
674
+ Nodes = list(g.nodes)
675
+ causaleff=np.zeros((len(Nodes),len(Nodes)))
676
+
677
+ for x in Nodes:
678
+ for y in Nodes:
679
+ if x!=y and x in list(nx.ancestors(g,y)):#list(g.predecessors(y)):#list(nx.ancestors(g,y)):
680
+ pa_x = list(g.predecessors(x))
681
+ pa_y = list(g.predecessors(y))
682
+ if x not in pa_x:
683
+ regressors = [Nodes.index(i) for i in pa_x] + [Nodes.index(x)]
684
+ else:
685
+ regressors = [Nodes.index(i) for i in pa_x]#pa_x
686
+ if y in pa_x:
687
+ causaleff[Nodes.index(x),Nodes.index(y)] = 0
688
+ else:
689
+ X=np.asarray(data[:,regressors])
690
+ Y=np.asarray(data[:,Nodes.index(y)])
691
+ X0=np.hstack((np.ones((X.shape[0],1)),X))
692
+ lm_out = np.linalg.lstsq(X0,Y,rcond=None)[0]#lm_out = np.linalg.inv(X0.T @ X0) @ X0.T @ Y#model.fit()
693
+ #causaleff[x,y] = lm_out.coef_[regressors.index(x)]
694
+ causaleff[Nodes.index(x),Nodes.index(y)] = lm_out[regressors.index(Nodes.index(x))+1]
695
+ return causaleff
696
+ def causaleff_ida_pconly(g,data,transformed=True,lag=None):
697
+ from sklearn import linear_model
698
+ import numpy as np
699
+ from gen_data_fns import sigmoid, relu
700
+ Edges = list(g.edges)
701
+ Nodes = list(g.nodes)
702
+ causaleff=np.zeros((len(Nodes),len(Nodes)))
703
+ # if transformed == True and lag is None:
704
+ # print("Please provide lag used in transformation")
705
+ # elif transformed == True and lag is not None:
706
+ # h=np.repeat(np.arange(0,len(Nodes)),lag)
707
+ # if activation == 'centred-sigmoid':
708
+ # activationfn = lambda x: sigmoid(x) - 0.5
709
+ # elif activation == 'tanh':
710
+ # activationfn = lambda x: np.tanh(x)
711
+ # elif activation == 'linear':
712
+ # activationfn = lambda x: x
713
+ # elif activation == 'relu':
714
+ # activationfn = lambda x: relu(x)
715
+ for x in Nodes:
716
+ for y in Nodes:
717
+ #if x<=y:
718
+ if x!=y and x in list(g.predecessors(y)):
719
+ #if (x,y) in Edges:
720
+ #if y not in list(g.predecessors(x)):
721
+ #if x in list(nx.ancestors(g,y)):
722
+ lm = linear_model.LinearRegression()
723
+ pa_x = list(g.predecessors(x))
724
+ # pa_y = list(g.predecessors(y))
725
+ # # if x not in pa_x:
726
+ # # regressors = pa_x + [x]
727
+ # # else:
728
+ # # regressors = pa_x
729
+ # if y in pa_y:
730
+ # pa_y = pa_y.pop(y)
731
+ # if x not in pa_y:
732
+ # regressors = pa_y + [x]
733
+ # else:
734
+ # regressors = pa_y
735
+ regressors=[x] + pa_x
736
+ X=data[:,regressors]#.reshape(-1,1)
737
+ # if transformed == True and lag is not None:
738
+ # for iter in range(X.shape[1]):
739
+ # if h(regressors[iter]) != h(y):
740
+ # X[iter] = activationfn(X[iter])
741
+ Y=data[:,y]
742
+ lm_out = lm.fit(X,Y)
743
+ causaleff[x,y] = lm_out.coef_[regressors.index(x)]#lm_out.coef_[0]#lm_out.coef_[regressors.index(x)]#
744
+ # if causaleff[x,y]>0:
745
+ # causaleff[x,y] = np.log(causaleff[x,y]+1)
746
+ # else:
747
+ # causaleff[x,y] = -np.log(-causaleff[x,y]+1)
748
+ return causaleff
749
+ # %%
750
+
751
+ # def pc_plot_out(lag,alpha,motif,n_ctrnn,n_samp,m,w,tau):
752
+ # import networkx as nx
753
+ # import matplotlib.pyplot as plt
754
+ # from gen_data_fns import create_dataset4, plot_matrix
755
+ # import numpy as np
756
+ # from pcalg import causaleff_ida
757
+ # dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
758
+ # for i in range(n_samp - 1):
759
+ # dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
760
+ # dataset2=data_transformed(dataset, lag)
761
+ # (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
762
+ # data_matrix=dataset2,
763
+ # alpha=alpha,method='stable')
764
+ # g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
765
+ # causaleff = causaleff_ida(g,dataset2)
766
+ # g1, causaleff1 = return_finaledges(g,causaleff,lag,m)
767
+ # causaleff2 = causaleff1/np.max(np.abs(causaleff1))
768
+ # plot_matrix(causaleff2,motif,'pc_causaleff')
769
+ # fig, ax = plt.subplots(1,1,figsize=(10,10))
770
+ # nx.draw(g1, with_labels= True,ax=ax)
771
+ # fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
772
+ # return g1,causaleff2
773
+ def pc_original_bootstrap_plot(alpha,motif,n_ctrnn,n_samp,m,w,tau,niter=50):
774
+ import networkx as nx
775
+ import matplotlib.pyplot as plt
776
+ from gen_data_fns import create_dataset4, plot_matrix
777
+ import numpy as np
778
+ from pcalg import causaleff_ida
779
+ import random
780
+ dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
781
+ for i in range(n_samp - 1):
782
+ dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
783
+ #dataset2=data_transformed(dataset, lag)
784
+ dataset2=dataset
785
+ g1={}
786
+ causaleff2={}
787
+ for iter in range(niter):
788
+ idx=random.randint(0,dataset2.shape[0]-10000)
789
+ #dataset3 = dataset2[random.sample(range(dataset2.shape[0]),10000),:]
790
+ dataset3 = dataset2[idx:(idx+10000),:]
791
+ #dataset3 = dataset2[random.sample(range(dataset2.shape[0]),10000),:]
792
+ (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
793
+ data_matrix=dataset3,
794
+ alpha=alpha,method='stable')
795
+ g1[iter] = estimate_cpdag(skel_graph=g, sep_set=sep_set)
796
+ causaleff2[iter] = causaleff_ida(g,dataset3)
797
+ #g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
798
+ #causaleff2[iter] = causaleff1#/np.max(np.abs(causaleff1))
799
+ edgemat=np.zeros((m,m))
800
+ for i in range(m):
801
+ for j in range(m):
802
+ for iter in range(niter):
803
+ if (i,j) in list(g1[iter].edges):
804
+ edgemat[i,j] = edgemat[i,j]+1
805
+ edgefinal = set([])
806
+ for i in range(m):
807
+ for j in range(m):
808
+ if edgemat[i,j]>= (75*niter/100.):
809
+ edgefinal = edgefinal | {(i,j)}
810
+ g2=nx.DiGraph()
811
+ g2.add_nodes_from(range(m))
812
+ g2.add_edges_from(edgefinal)
813
+ #nx.draw(g2,with_labels=True)
814
+ causaleff3 = np.zeros((m,m))
815
+ for iter in range(niter):
816
+ causaleff3 = causaleff3+causaleff2[iter]
817
+ causaleff3 = causaleff3/niter
818
+ # for i in list(g2.nodes):
819
+ # for j in list(g2.nodes):
820
+ # if i not in nx.ancestors(g2,j):
821
+ # causaleff3[i,j] = 0
822
+ plot_matrix(causaleff3/np.max(np.abs(causaleff3)),motif,'pc_causaleff_orig')
823
+ plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn_orig')
824
+ #plot_matrix(0.5*(causaleff3>0),motif,'pc_causalconn')
825
+ #fig, ax = plt.subplots(1,1,figsize=(10,10))
826
+ #nx.draw(G, with_labels= True,ax=ax)
827
+ #fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
828
+ return g2,causaleff3
829
+ def pc_bootstrap(dataset2,lag,alpha,m,niter=50):
830
+ import networkx as nx
831
+ import matplotlib.pyplot as plt
832
+ #from gen_data_fns import create_dataset4, plot_matrix
833
+ import numpy as np
834
+ from pcalg import causaleff_ida
835
+ from tqdm import tqdm
836
+ import random
837
+ g1={}
838
+ causaleff2={}
839
+ for iter in tqdm(range(niter)):
840
+ print(0)
841
+ idx=random.randint(0,dataset2.shape[0]-100)
842
+ #dataset3 = dataset2[random.sample(range(dataset2.shape[0]),1000),:]
843
+ dataset3 = dataset2[idx:(idx+500),:]
844
+ #3:41 AM started
845
+ #(g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gp,
846
+ # data_matrix=dataset3,
847
+ # alpha=alpha,method='stable')
848
+ (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
849
+ data_matrix=dataset3,
850
+ alpha=alpha,method='stable')
851
+ g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
852
+ causaleff = causaleff_ida(g,dataset3)
853
+ g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
854
+ causaleff2[iter] = causaleff1#np.max(np.abs(causaleff1))
855
+ print(1)
856
+ edgemat=np.zeros((m,m))
857
+ for i in range(m):
858
+ for j in range(m):
859
+ for iter in range(niter):
860
+ if (i,j) in list(g1[iter].edges):
861
+ edgemat[i,j] = edgemat[i,j]+1
862
+ edgefinal = set([])
863
+ for i in range(m):
864
+ for j in range(m):
865
+ if edgemat[i,j]>= (25*niter/100.):
866
+ edgefinal = edgefinal | {(i,j)}
867
+ g2=nx.DiGraph()
868
+ g2.add_nodes_from(range(m))
869
+ g2.add_edges_from(edgefinal)
870
+ #nx.draw(g2,with_labels=True)
871
+ causaleff3 = np.zeros((m,m))
872
+ for i in range(m):
873
+ for j in range(m):
874
+ s=0
875
+ for iter in range(niter):
876
+ if causaleff2[iter][i,j]!=0:
877
+ causaleff3[i,j] = causaleff3[i,j]+causaleff2[iter][i,j]
878
+ s=s+1
879
+ if s>0:
880
+ causaleff3[i,j] = causaleff3[i,j]/s
881
+ # for i in list(g2.nodes):
882
+ # for j in list(g2.nodes):
883
+ # if i not in nx.ancestors(g2,j):
884
+ # causaleff3[i,j] = 0
885
+ return g2,causaleff3
886
+ def pc_bootstrap_2(dataset,lag,alpha,m, n_ctrnn, n_samp,niter=50):
887
+ import networkx as nx
888
+ import matplotlib.pyplot as plt
889
+ from gen_data_fns import create_dataset4, plot_matrix
890
+ import numpy as np
891
+ from pcalg import causaleff_ida
892
+ from tqdm import tqdm
893
+ import random
894
+ g1={}
895
+ causaleff2={}
896
+ for iter in tqdm(range(niter)):
897
+ dataset2 = data_transformed_2(dataset, lag, n_ctrnn, n_samp)
898
+ #idx=random.randint(0,dataset2.shape[0]-100)
899
+ #dataset3 = dataset2[random.sample(range(dataset2.shape[0]),1000),:]
900
+ dataset3 = dataset2#[idx:(idx+500),:]
901
+ (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
902
+ data_matrix=dataset3,
903
+ alpha=alpha,method='stable')
904
+ g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
905
+ causaleff = causaleff_ida(g,dataset3)
906
+ gr1, causaleff1 = return_finaledges(g,causaleff,lag,m)
907
+ g1[iter]=gr1
908
+ causaleff2[iter] = causaleff1#np.max(np.abs(causaleff1))
909
+ edgemat=np.zeros((m,m))
910
+ for i in range(m):
911
+ for j in range(m):
912
+ for iter in range(niter):
913
+ if (i,j) in list(g1[iter].edges):
914
+ edgemat[i,j] = edgemat[i,j]+1
915
+ edgefinal = set([])
916
+ for i in range(m):
917
+ for j in range(m):
918
+ if edgemat[i,j]>= (25*niter/100.):
919
+ edgefinal = edgefinal | {(i,j)}
920
+ g2=nx.DiGraph()
921
+ g2.add_nodes_from(range(m))
922
+ g2.add_edges_from(edgefinal)
923
+ #nx.draw(g2,with_labels=True)
924
+ causaleff3 = np.zeros((m,m))
925
+ for i in range(m):
926
+ for j in range(m):
927
+ s=0
928
+ for iter in range(niter):
929
+ if causaleff2[iter][i,j]!=0:
930
+ causaleff3[i,j] = causaleff3[i,j]+causaleff2[iter][i,j]
931
+ s=s+1
932
+ causaleff3[i,j] = causaleff3[i,j]/s
933
+ # for i in list(g2.nodes):
934
+ # for j in list(g2.nodes):
935
+ # if i not in nx.ancestors(g2,j):
936
+ # causaleff3[i,j] = 0
937
+ return g2,causaleff3
938
+ # def pc_bootstrap(dataset2,lag,alpha,m,niter=50):
939
+ # import networkx as nx
940
+ # import matplotlib.pyplot as plt
941
+ # from gen_data_fns import create_dataset4, plot_matrix
942
+ # import numpy as np
943
+ # from pcalg import causaleff_ida
944
+ # from tqdm import tqdm
945
+ # import random
946
+ # g1={}
947
+ # causaleff2={}
948
+ # for iter in tqdm(range(niter)):
949
+ # idx=random.randint(0,dataset2.shape[0]-100)
950
+ # #dataset3 = dataset2[random.sample(range(dataset2.shape[0]),1000),:]
951
+ # dataset3 = dataset2[idx:(idx+500),:]
952
+ # (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
953
+ # data_matrix=dataset3,
954
+ # alpha=alpha,method='stable')
955
+ # g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
956
+ # causaleff = causaleff_ida(g,dataset3)
957
+ # g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
958
+ # causaleff2[iter] = causaleff1#np.max(np.abs(causaleff1))
959
+ # edgemat=np.zeros((m,m))
960
+ # for i in range(m):
961
+ # for j in range(m):
962
+ # for iter in range(niter):
963
+ # if (i,j) in list(g1[iter].edges):
964
+ # edgemat[i,j] = edgemat[i,j]+1
965
+ # edgefinal = set([])
966
+ # for i in range(m):
967
+ # for j in range(m):
968
+ # if edgemat[i,j]>= (25*niter/100.):
969
+ # edgefinal = edgefinal | {(i,j)}
970
+ # g2=nx.DiGraph()
971
+ # g2.add_nodes_from(range(m))
972
+ # g2.add_edges_from(edgefinal)
973
+ # #nx.draw(g2,with_labels=True)
974
+ # causaleff3 = np.zeros((m,m))
975
+ # for i in range(m):
976
+ # for j in range(m):
977
+ # s=0
978
+ # for iter in range(niter):
979
+ # if causaleff2[iter][i,j]!=0:
980
+ # causaleff3[i,j] = causaleff3[i,j]+causaleff2[iter][i,j]
981
+ # s=s+1
982
+ # causaleff3[i,j] = causaleff3[i,j]/s
983
+ # # for i in list(g2.nodes):
984
+ # # for j in list(g2.nodes):
985
+ # # if i not in nx.ancestors(g2,j):
986
+ # # causaleff3[i,j] = 0
987
+ # return g2,causaleff3
988
+ def pc_plot_out(lag,alpha,motif,n_ctrnn,n_samp,m,w,tau,niter=50,isPlot=True):
989
+ import networkx as nx
990
+ import matplotlib.pyplot as plt
991
+ from gen_data_fns import create_dataset4, plot_matrix
992
+ import numpy as np
993
+ from pcalg import causaleff_ida, pc_bootstrap
994
+ import random
995
+ dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
996
+ for i in range(n_samp - 1):
997
+ dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
998
+ dataset2=data_transformed(dataset, lag)
999
+ # (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
1000
+ # data_matrix=dataset2, alpha=alpha,method='stable')
1001
+ # g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
1002
+ # causaleff = causaleff_ida(g,dataset2)
1003
+ # g1, causaleff1 = return_finaledges(g,causaleff,lag,m)
1004
+ # causaleff1 = causaleff1/np.max(np.abs(causaleff1))
1005
+ # nx.draw(g1,with_labels=True)
1006
+ #%%
1007
+ #g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
1008
+ #causaleff2[iter] = causaleff1/np.max(np.abs(causaleff1))
1009
+ g2,causaleff2 = pc_bootstrap(dataset2,lag,alpha,m,niter=50)
1010
+ #%%
1011
+ if isPlot is True:
1012
+ plot_matrix(causaleff2,motif,'pc_causaleff')
1013
+ fig, ax = plt.subplots(1,1,figsize=(10,10))
1014
+ nx.draw(g2, with_labels= True,ax=ax)
1015
+ fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
1016
+ return g2,causaleff2
1017
+ def pc_plot_out2(lag,alpha,motif,n_ctrnn,n_samp,m,w,tau,niter=50):
1018
+ import networkx as nx
1019
+ import matplotlib.pyplot as plt
1020
+ from gen_data_fns import create_dataset4, plot_matrix, create_dataset5
1021
+ import numpy as np
1022
+ from pcalg import causaleff_ida, pc_bootstrap
1023
+ import random
1024
+ from tqdm import tqdm
1025
+ #%%
1026
+ dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
1027
+ for i in range(n_samp - 1):
1028
+ dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
1029
+ #dataset2=data_transformed(dataset, lag)
1030
+ #dataset0=dataset#[random.sample(range(dataset.shape[0]),10000),:]
1031
+ dataset01=data_transformed(dataset, lag)
1032
+ # (g0, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
1033
+ # data_matrix=dataset01,
1034
+ # alpha=alpha,method='stable')
1035
+ # g0 = estimate_cpdag(skel_graph=g0, sep_set=sep_set)
1036
+ # causaleff0 = causaleff_ida(g0,data_transformed(dataset01, lag))
1037
+ # g0, causaleff0 = return_finaledges(g0,causaleff0,lag,m)
1038
+ #%%
1039
+ g0, causaleff0 = pc_bootstrap(dataset01,lag,alpha,m,niter=25)
1040
+ g01=g0.copy()
1041
+ g01.remove_edges_from(g01.selfloop_edges())
1042
+ G=g0
1043
+ g1={}
1044
+ causaleff2={}
1045
+ #niter=50
1046
+ #edgemat_btrsp={}
1047
+ #%%
1048
+ #niter=50
1049
+ #for iter0 in tqdm(range(niter)):#tqdm(range(50)):
1050
+ for k in tqdm(range(m)):
1051
+ #if g01.out_degree(k)==0 and g01.in_degree(k)==1:
1052
+ iter = k
1053
+ # dataset3 = data_transformed(np.delete(dataset2,[k],axis=1),lag)
1054
+ # for node in range(m):
1055
+ # if len(g0.predecessors(node))>0:
1056
+ # lm = linear_model.LinearRegression()
1057
+ # pa_x = list(g0.predecessors(node))
1058
+ # regressors = pa_x
1059
+ # X=data[:,regressors]
1060
+ # Y=data[:,y]
1061
+ # lm_out = lm.fit(X,Y)
1062
+ # causaleff[x,y] = lm_out.coef_[regressors.index(x)]
1063
+ dataset2 = data_transformed(np.delete(dataset, [k],axis=1),lag)
1064
+ idx=random.randint(0,dataset2.shape[0]-100)
1065
+ #dataset2[:,random.sample(range(m),1)[0]]=np.random.normal(10,10,dataset2.shape[0])
1066
+ #k=random.sample(range(m),1)[0]
1067
+ dataset3 = dataset2[idx:(idx+500),:]
1068
+ labels = list(range(m))
1069
+ labels.remove(k)
1070
+ relabels = dict(zip(range(m-1),labels))
1071
+ (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
1072
+ data_matrix=dataset3,
1073
+ alpha=alpha,method='stable')
1074
+ g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
1075
+ causaleff = causaleff_ida(g,dataset3)
1076
+ #g1, causaleff1 = return_finaledges(g,causaleff,lag,m-1)
1077
+ #g1 = nx.relabel_nodes(g1,relabels)
1078
+ #nx.draw(g1,with_labels=True)
1079
+ g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m-1)
1080
+ g1[iter] = nx.relabel_nodes(g1[iter],relabels)
1081
+ causaleff2[iter] = causaleff1/np.max(np.abs(causaleff1))
1082
+
1083
+ # for (node1,node2) in list(g1[iter].edges):
1084
+ # if (node1,node2) not in list(g01.edges):
1085
+ # G.add_edge(node1,node2)
1086
+ for (node1,node2) in list(g1[iter].edges):
1087
+ if (k,node2) not in list(G.edges):
1088
+ if node1 != node2 and node1 in nx.ancestors(G,k):
1089
+ ch=0
1090
+ for z in [elem for elem in range(m) if elem not in [k,node2]]:
1091
+ if (z,node2) in list(G.edges):
1092
+ ch=1#(node1,node2) not in list(g01.edges) and node1 in nx.ancestors(g01,k) and (k,node2) not in list(g01.edges):#(node1, k) in list(g01.edges) and (k,node2) not in list(g01.edges):
1093
+ if ch==0:
1094
+ G.add_edge(k,node2)
1095
+ #%%
1096
+ # for nodeorig in list(g0.nodes):
1097
+ # # if (node1,node2) not in list(g0.edges) and (node1,k) not in list
1098
+ # edgemat=np.zeros((m,m))
1099
+ # for i in range(m):
1100
+ # for j in range(m):
1101
+ # for iter1 in range(niter):
1102
+ # if (i,j) in list(g1[iter1].edges):
1103
+ # edgemat[i,j] = edgemat[i,j]+1
1104
+ #edgemat_btrsp[iter0]=edgemat
1105
+ #%%
1106
+ # out=sum([i for i in list(edgemat_btrsp.values())])/len(edgemat_btrsp)
1107
+
1108
+ #%%
1109
+ # g2=g0.to_undirected()
1110
+ # edgefinal = set([])
1111
+ # for i in range(m):
1112
+ # temp = set([])
1113
+ # for j in range(m):
1114
+ # if nx.has_path(g2,j,i):
1115
+ # continue
1116
+ # else:
1117
+ # if out[j,i]>=0.5:#edgemat[i,j]>=20*niter/100.:
1118
+ # temp = temp | {(j,i)}
1119
+ # if len(temp)>1:
1120
+ # temp2 = set([])
1121
+ # for (j1,i1) in temp:
1122
+ # test_temp = sum([(l[0] in nx.ancestors(g0,i1)) for l in temp-{(j1,i1)}])
1123
+ # if test_temp == len(temp-{(j1,i1)}):
1124
+ # temp2 = {(j1,i1)}
1125
+ # else:
1126
+ # temp2 = temp
1127
+ # g2=nx.DiGraph()
1128
+ # g2.add_nodes_from(range(m))
1129
+ # g2.add_edges_from(edgefinal)
1130
+ #%%
1131
+ nx.draw(G,with_labels=True)
1132
+ #G1=G.copy()
1133
+ #G1.remove_edges_from(G1.selfloop_edges())
1134
+ # for (node1,node2) in list(G.edges):
1135
+ # if causaleff0[node1,node2]/np.max(np.abs(causaleff0))<0.05:
1136
+ # G.remove_edge(node1,node2)
1137
+ causaleff3 = np.zeros((m,m))
1138
+ for i in list(G.nodes):
1139
+ for j in list(G.nodes):
1140
+ # for iter in range(niter):
1141
+ if i in list(nx.ancestors(G,j)):
1142
+ causaleff3[i,j] = causaleff0[i,j]#causaleff3[i,j]+causaleff2[iter][i,j]
1143
+ # causaleff3[i,j] = causaleff3[i,j]/niter
1144
+
1145
+
1146
+
1147
+ plot_matrix(causaleff3,motif,'pc_causaleff')
1148
+ plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn')
1149
+ fig, ax = plt.subplots(1,1,figsize=(10,10))
1150
+ nx.draw(G, with_labels= True,ax=ax)
1151
+ fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
1152
+ return G,causaleff3
1153
+ #%%
1154
+ def pc_plot_out3(lag,alpha,motif,n_ctrnn,n_samp,m,w,tau,niter=50):
1155
+ import networkx as nx
1156
+ import matplotlib.pyplot as plt
1157
+ from gen_data_fns import create_dataset4, plot_matrix, create_dataset5
1158
+ import numpy as np
1159
+ from pcalg import causaleff_ida, pc_bootstrap
1160
+ import random
1161
+ from tqdm import tqdm
1162
+ #%%
1163
+ dataset = create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T
1164
+ for i in range(n_samp - 1):
1165
+ dataset = np.vstack((dataset, create_dataset4(n_ctrnn,n_ctrnn,m,w,tau).T))
1166
+ #%%
1167
+ g0, causaleff0 = pc_bootstrap_2(dataset,lag,alpha,m,n_ctrnn, n_samp,niter)
1168
+ g01=g0.copy()
1169
+ g01.remove_edges_from(g01.selfloop_edges())
1170
+ G=g0
1171
+ g1={}
1172
+ causaleff2={}
1173
+ #niter=50
1174
+ #edgemat_btrsp={}
1175
+ #%%
1176
+ #niter=50
1177
+ #for iter0 in tqdm(range(niter)):#tqdm(range(50)):
1178
+ # for k in tqdm(range(m)):
1179
+ # iter = k
1180
+ # dataset2 = data_transformed(np.delete(dataset, [k],axis=1),lag)
1181
+ # idx=random.randint(0,dataset2.shape[0]-100)
1182
+ # #dataset2[:,random.sample(range(m),1)[0]]=np.random.normal(10,10,dataset2.shape[0])
1183
+ # #k=random.sample(range(m),1)[0]
1184
+ # dataset3 = dataset2[idx:(idx+500),:]
1185
+ # labels = list(range(m))
1186
+ # labels.remove(k)
1187
+ # relabels = dict(zip(range(m-1),labels))
1188
+ # (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
1189
+ # data_matrix=dataset3,
1190
+ # alpha=alpha,method='stable')
1191
+ # g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
1192
+ # causaleff = causaleff_ida(g,dataset3)
1193
+ # g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m-1)
1194
+ # g1[iter] = nx.relabel_nodes(g1[iter],relabels)
1195
+ # causaleff2[iter] = causaleff1/np.max(np.abs(causaleff1))
1196
+
1197
+ # for (node1,node2) in list(g1[iter].edges):
1198
+ # if node1 !=node2 and (node1,node2) not in list(g01.edges) and node1 in nx.ancestors(g01,k) and (k,node2) not in list(g01.edges):#(node1, k) in list(g01.edges) and (k,node2) not in list(g01.edges):
1199
+ # G.add_edge(k,node2)
1200
+
1201
+ causaleff3 = np.zeros((m,m))
1202
+ for i in list(G.nodes):
1203
+ for j in list(G.nodes):
1204
+ if i in list(nx.ancestors(G,j)):
1205
+ causaleff3[i,j] = causaleff0[i,j]#causaleff3[i,j]+causaleff2[iter][i,j]
1206
+
1207
+ plot_matrix(causaleff3/np.max(np.abs(causaleff3)),motif,'pc_causaleff')
1208
+ plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn')
1209
+ fig, ax = plt.subplots(1,1,figsize=(10,10))
1210
+ nx.draw(G, with_labels= True,ax=ax)
1211
+ fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
1212
+ return G,causaleff3
1213
+ #%%
1214
+ def ablation(lag,dataset,g01,G,k):
1215
+ m=dataset.shape[1]
1216
+ dataset3 = data_transformed(np.delete(dataset, [k],axis=1),lag)
1217
+ g2=G.copy()
1218
+ labels = list(range(m))
1219
+ labels.remove(k)
1220
+ relabels = dict(zip(range(m-1),labels))
1221
+ (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
1222
+ data_matrix=dataset3,
1223
+ alpha=alpha,method='stable')
1224
+ g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
1225
+ causaleff = causaleff_ida(g,dataset3)
1226
+
1227
+ g1, causaleff1 = return_finaledges(g,causaleff,lag,m-1)
1228
+ g1 = nx.relabel_nodes(g1,relabels)
1229
+ #causaleff2 = causaleff1/np.max(np.abs(causaleff1))
1230
+
1231
+ for (node1,node2) in list(g1.edges):
1232
+ if node1 !=node2 and (node1,node2) not in list(g01.edges) and node1 in nx.ancestors(g01,k) and (k,node2) not in list(g01.edges):#(node1, k) in list(g01.edges) and (k,node2) not in list(g01.edges):
1233
+ g2.add_edge(k,node2)
1234
+ return g2
1235
+
1236
+ def pc_plot_realdata(dataset,lag,alpha,motif,niter=10):
1237
+ import networkx as nx
1238
+ import matplotlib.pyplot as plt
1239
+ from gen_data_fns import create_dataset4, plot_matrix
1240
+ import numpy as np
1241
+ from pcalg import causaleff_ida, pc_bootstrap
1242
+ import random
1243
+ from tqdm import tqdm
1244
+ import multiprocessing as mp
1245
+ from functools import partial
1246
+ import time
1247
+ m=dataset.shape[1]
1248
+ dataset01=data_transformed(dataset, lag)
1249
+ g0, causaleff0 = pc_bootstrap_realdata(dataset01,lag,alpha,m,niter)
1250
+ g01=g0.copy()
1251
+ g01.remove_edges_from(list(nx.selfloop_edges(g01)))
1252
+ G=g0
1253
+ #nx.draw(G, with_labels= True,ax=ax)
1254
+ #g1=[]
1255
+ causaleff2={}
1256
+
1257
+ pool = mp.Pool(4)
1258
+ func= partial(ablation,lag,dataset,g01,G)
1259
+ t1=time.time()
1260
+ g1 = pool.map(func,range(m))
1261
+ print(time.time()-t1)
1262
+ #for k in tqdm(range(m)):
1263
+ #idx=random.randint(0,dataset.shape[0]-1000)
1264
+ #k=random.sample(range(m),1)[0]
1265
+ #dataset2 = dataset[idx:(idx+5000),:]
1266
+ #g1.append(ablation(k,lag,dataset,g01,G))
1267
+ G=nx.compose_all(g1)
1268
+ nx.draw(G,with_labels=True)
1269
+ causaleff3 = np.zeros((m,m))
1270
+ for i in list(G.nodes):
1271
+ for j in list(G.nodes):
1272
+ if i in nx.ancestors(G,j):
1273
+ causaleff3[i,j] = causaleff0[i,j]#causaleff3[i,j]+causaleff2[iter][i,j]
1274
+ causaleff4 = np.zeros((m,m))
1275
+ for i in list(G.nodes):
1276
+ for j in list(G.nodes):
1277
+ if i in G.predecessors(j):
1278
+ causaleff4[i,j] = causaleff0[i,j]
1279
+ plot_matrix(causaleff3/np.max(np.abs(causaleff3)),motif,'pc_causaleff')
1280
+ plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn')
1281
+ fig, ax = plt.subplots(1,1,figsize=(10,10))
1282
+ nx.draw(G, with_labels= True,ax=ax)
1283
+ fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
1284
+ return G,causaleff3,causaleff4
1285
+ # #%%
1286
+ # def pc_plot_realdata(dataset,lag,alpha,motif,niter=10):
1287
+ # import networkx as nx
1288
+ # import matplotlib.pyplot as plt
1289
+ # from gen_data_fns import create_dataset4, plot_matrix
1290
+ # import numpy as np
1291
+ # from pcalg import causaleff_ida, pc_bootstrap
1292
+ # import random
1293
+ # from tqdm import tqdm
1294
+ # m=dataset.shape[1]
1295
+ # dataset01=data_transformed(dataset, lag)
1296
+ # g0, causaleff0 = pc_bootstrap_realdata(dataset01,lag,alpha,m,niter)
1297
+ # g01=g0.copy()
1298
+ # g01.remove_edges_from(g01.selfloop_edges())
1299
+ # G=g0
1300
+ # #nx.draw(G, with_labels= True,ax=ax)
1301
+ # g1={}
1302
+ # causaleff2={}
1303
+ # g2=[]
1304
+ # for k in tqdm(range(m)):
1305
+ # #idx=random.randint(0,dataset.shape[0]-1000)
1306
+ # #k=random.sample(range(m),1)[0]
1307
+ # #dataset2 = dataset[idx:(idx+5000),:]
1308
+ # dataset3 = data_transformed(np.delete(dataset, [k],axis=1),lag)
1309
+ # g_iter=G.copy()
1310
+ # labels = list(range(m))
1311
+ # labels.remove(k)
1312
+ # relabels = dict(zip(range(m-1),labels))
1313
+ # (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
1314
+ # data_matrix=dataset3,
1315
+ # alpha=alpha,method='stable')
1316
+ # g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
1317
+ # causaleff = causaleff_ida(g,dataset3)
1318
+
1319
+ # g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m-1)
1320
+ # g1[iter] = nx.relabel_nodes(g1[iter],relabels)
1321
+ # causaleff2[iter] = causaleff1/np.max(np.abs(causaleff1))
1322
+
1323
+ # for (node1,node2) in list(g1[iter].edges):
1324
+ # if node1 !=node2 and (node1,node2) not in list(g01.edges) and node1 in nx.ancestors(g01,k) and (k,node2) not in list(g01.edges):#(node1, k) in list(g01.edges) and (k,node2) not in list(g01.edges):
1325
+ # g_iter.add_edge(k,node2)
1326
+ # nx.draw(G,with_labels=True)
1327
+ # causaleff3 = np.zeros((m,m))
1328
+ # for i in list(G.nodes):
1329
+ # for j in list(G.nodes):
1330
+ # if i in nx.ancestors(G,j):
1331
+ # causaleff3[i,j] = causaleff0[i,j]#causaleff3[i,j]+causaleff2[iter][i,j]
1332
+ # causaleff4 = np.zeros((m,m))
1333
+ # for i in list(G.nodes):
1334
+ # for j in list(G.nodes):
1335
+ # if i in G.predecessors(j):
1336
+ # causaleff4[i,j] = causaleff0[i,j]
1337
+ # plot_matrix(causaleff3/np.max(np.abs(causaleff3)),motif,'pc_causaleff')
1338
+ # plot_matrix(0.5*(causaleff3!=0),motif,'pc_causalconn')
1339
+ # fig, ax = plt.subplots(1,1,figsize=(10,10))
1340
+ # nx.draw(G, with_labels= True,ax=ax)
1341
+ # fig.savefig('dag_'+motif+'.png',format = 'png', dpi =600)
1342
+ # return G,causaleff3,causaleff4
1343
+ #%%
1344
+ #lag=2
1345
+ def pc_bootstrap_realdata(dataset2,lag,alpha,m,niter=10):
1346
+ import networkx as nx
1347
+ import matplotlib.pyplot as plt
1348
+ from gen_data_fns import create_dataset4, plot_matrix
1349
+ import numpy as np
1350
+ from pcalg import causaleff_ida
1351
+ import random
1352
+ from tqdm import tqdm
1353
+ g1={}
1354
+ causaleff2={}
1355
+ for iter in tqdm(range(niter)):
1356
+ idx=iter#random.randint(0,dataset2.shape[0]-10)
1357
+ #dataset3 = dataset2[random.sample(range(dataset2.shape[0]),1000),:]
1358
+ dataset3 = dataset2[idx:(dataset2.shape[0]-niter+idx+1),:]#(idx+dataset2.shape[0]-10),:]
1359
+ (g, sep_set) = estimate_skeleton(indep_test_func=ci_test_gauss,
1360
+ data_matrix=dataset3,
1361
+ alpha=alpha,method='stable')
1362
+ g = estimate_cpdag(skel_graph=g, sep_set=sep_set)
1363
+ causaleff = causaleff_ida(g,dataset3)
1364
+ g1[iter], causaleff1 = return_finaledges(g,causaleff,lag,m)
1365
+ causaleff2[iter] = causaleff1#/np.max(np.abs(causaleff1))
1366
+ edgemat=np.zeros((m,m))
1367
+ for i in range(m):
1368
+ for j in range(m):
1369
+ for iter in range(niter):
1370
+ if (i,j) in list(g1[iter].edges):
1371
+ edgemat[i,j] = edgemat[i,j]+1
1372
+ edgefinal = set([])
1373
+ for i in range(m):
1374
+ for j in range(m):
1375
+ if edgemat[i,j]>= (25*niter/100.):
1376
+ edgefinal = edgefinal | {(i,j)}
1377
+ g2=nx.DiGraph()
1378
+ g2.add_nodes_from(range(m))
1379
+ g2.add_edges_from(edgefinal)
1380
+ #nx.draw(g2,with_labels=True)
1381
+ causaleff3 = np.zeros((m,m))
1382
+ for iter in range(niter):
1383
+ causaleff3 = causaleff3+causaleff2[iter]
1384
+ causaleff3 = causaleff3/niter
1385
+ # for i in list(g2.nodes):
1386
+ # for j in list(g2.nodes):
1387
+ # if i not in nx.ancestors(g2,j):
1388
+ # causaleff3[i,j] = 0
1389
+ return g2,causaleff3
1390
+ def data_transformed(data, lag):
1391
+ import numpy as np
1392
+ n = data.shape[0]
1393
+ p = data.shape[1]
1394
+ lag1=lag+1
1395
+ new_n = int(np.floor((n-lag)/(2*lag1))*(2*lag1))
1396
+ data=data[:new_n,:]
1397
+ data2=np.zeros((int(new_n/(2*lag1)),p*lag1))
1398
+ for i in range(p):
1399
+ for j in range(lag1):
1400
+ data2[:,lag1*i+j]=data[j::(2*lag1),i]
1401
+ return data2
1402
+ def data_transformed_mod(data, lag, node):
1403
+ import numpy as np
1404
+ n = data.shape[0]
1405
+ p = data.shape[1]
1406
+ lag1=lag+1
1407
+ new_n = int(np.floor((n-lag)/(2*lag1))*(2*lag1))
1408
+ data=data[:new_n,:]
1409
+ data2=np.zeros((int(new_n/(2*lag1)),p*(lag1-1)+1))
1410
+ #data2[:,lag1*i]=data[j::(2*lag1),i]
1411
+ for i in range(p):
1412
+ for j in range(1,lag1):
1413
+ data2[:,(lag1-1)*i+j]=data[j::(2*lag1),i]
1414
+ data2[:,0] = data[0::(2*lag1),node]
1415
+ return data2
1416
+
1417
+ def data_transformed_overlapping(data, tau):
1418
+ import numpy as np
1419
+ n = data.shape[0]
1420
+ p = data.shape[1]
1421
+ #lag1=lag+1
1422
+ #lag = lag+1
1423
+ new_n = n-tau+1#int(np.floor((n-lag)/(2*lag1))*(2*lag1))
1424
+ p_new = p*tau
1425
+ if tau <1 :
1426
+ return('lag should be >= 1')
1427
+ elif tau == 1:
1428
+ data2 = data
1429
+ else:
1430
+ data2=np.zeros((new_n,p_new))
1431
+ for t in range(0,n-tau+1):
1432
+ data2[t,:]=np.hstack(data[t:t+tau,])
1433
+ return data2
1434
+
1435
+ def data_transformed_btstrp(data):
1436
+ import numpy as np
1437
+ from arch import bootstrap
1438
+ from numpy.random import RandomState
1439
+ band = bootstrap.optimal_block_length(data)
1440
+ n = data.shape[0]
1441
+ p = data.shape[1]
1442
+ data2=np.zeros((2*n*p,n*p))
1443
+ bs = bootstrap.StationaryBootstrap(np.median(band.iloc[:,0]),data,random_state=RandomState(111))
1444
+ t=0
1445
+ for data1 in bs.bootstrap(2*n*p):
1446
+ data2[t,:]=np.hstack(data1[0][0])
1447
+ t=t+1
1448
+ return data2
1449
+ # def data_transformed_v2(data, lag, window):
1450
+ # import numpy as np
1451
+ # n = data.shape[0]
1452
+ # m = data.shape[1]
1453
+ # lag1=lag+1
1454
+ # data1 = np.zeros((int(n/(2*window)),m*lag1))
1455
+ # for t1 in range(int(n/(2*window))):
1456
+ # t=2*window*t1
1457
+ # for p1 in range(m):
1458
+ # for l in range(lag1):
1459
+ # data1[t1,lag1*p1+l]=np.mean(data[(t+l):(t+l+window),p1])
1460
+ # return data1
1461
+
1462
+ def data_transformed1(data, lag):
1463
+ import numpy as np
1464
+ n = data.shape[0]
1465
+ p = data.shape[1]
1466
+ lag1=2
1467
+ lag2={}
1468
+ lag2[0]=0
1469
+ lag2[1]=lag
1470
+ lag2[2]=lag+1
1471
+ new_n = int(np.floor((n-lag)/(2*lag2[2]))*(2*lag2[2]))
1472
+ data=data[:new_n,:]
1473
+ data2=np.zeros((int(np.floor((n-lag)/(2*lag2[2]))),p*lag1))#np.zeros((int(new_n/(2*lag2[2])),p*lag1))
1474
+ for i in range(p):
1475
+ for j in range(lag1):
1476
+ data2[:,lag1*i+j]=data[lag2[j]::(2*lag2[2]),i]
1477
+ return data2
1478
+ def data_transformed_nogaps(data, lag):
1479
+ import numpy as np
1480
+ n = data.shape[0]
1481
+ p = data.shape[1]
1482
+ lag1=lag+1
1483
+ new_n = int(np.floor((n-lag)/(lag1))*(lag1))
1484
+ data=data[:new_n,:]
1485
+ data2=np.zeros((int(new_n/(lag1)),p*lag1))
1486
+ for i in range(p):
1487
+ for j in range(lag1):
1488
+ data2[:,lag1*i+j]=data[j::(lag1),i]
1489
+ return data2
1490
+ # def data_transformed(data, lag):
1491
+ # import numpy as np
1492
+ # n = data.shape[0]
1493
+ # p = data.shape[1]
1494
+ # lag1=lag+1
1495
+ # new_n = int(np.floor(n/(2*lag1))*(2*lag1))
1496
+ # data=data[:new_n,:]
1497
+ # data2=np.zeros((int(new_n/(2*lag1)),p*lag1))
1498
+ # for i in range(p):
1499
+ # for j in range(lag1):
1500
+ # data2[:,lag1*i+j]=data[j::(2*lag1),i]
1501
+ # return data2
1502
+
1503
+ # def data_transformed_old(data, lag):
1504
+ # import numpy as np
1505
+ # n = data.shape[0]
1506
+ # p = data.shape[1]
1507
+ # lag1=lag+1
1508
+ # new_n = int(np.floor(n/lag1)*lag1)
1509
+ # data=data[:new_n,:]
1510
+ # data2=np.zeros((int(new_n/lag1),p*lag1))
1511
+ # for i in range(p):
1512
+ # for j in range(lag1):
1513
+ # data2[:,lag1*i+j]=data[j::lag1,i]
1514
+ # return data2
1515
+ def data_transformed_fin(data,lag,maxdeg=1):
1516
+ n = data.shape[0]
1517
+ p = data.shape[1]
1518
+ #data2=np.zeros(data.shape)
1519
+ data1 = data
1520
+ if maxdeg>1:
1521
+ for diter in range(2,maxdeg+1):
1522
+ data1=data1+ (data**diter)/np.math.factorial(diter)
1523
+ data2=np.cumsum(data1,axis=0)[lag:,] - np.cumsum(data1,axis=0)[:-lag,]
1524
+ data3=np.zeros((int(n/(2*lag)),p))#np.zeros((int((n-lag-1)/(2*lag)),2*p))
1525
+ k=0
1526
+ for i in range(lag+1,n,2*lag):
1527
+ data3[k,:] = data2[i-1-lag,:]#np.hstack((data[i,:],data2[i-1-lag,:]))
1528
+ k=k+1
1529
+ return data3
1530
+ def data_transformed_2(data, lag,n_ctrnn,n_samp):
1531
+ import numpy as np
1532
+ import random as random
1533
+ r = random.randint(0,n_ctrnn-lag-1)
1534
+ lag1=lag+1
1535
+ n = data.shape[0]
1536
+ p = data.shape[1]
1537
+ n_new = n_samp
1538
+ p_new = (lag+1)*p
1539
+ y=np.zeros((n_new,p_new))
1540
+ for i in range(r,r+lag+1):
1541
+ for j in range(p):
1542
+ y[:,lag*(j-1)+i-r] = data[i::n_ctrnn,j]
1543
+ return y
1544
+ #causaleff = causaleff_ida(g,dataset2)
1545
+ #g1, causaleff1 = return_finaledges(g,causaleff,lag1,p)
1546
+ #nx.draw(g1,with_labels=True)
1547
+ #%%plot graph
1548
+ def return_relabels(lag,p):
1549
+ lag1=lag+1
1550
+ labels={}#strs = ["" for x in range(lag1*p)]#np.empty(lag1*p,dtype=str)
1551
+ for i in range(p):
1552
+ for j in range(lag1):
1553
+ labels[lag1*i+j]=str(i)+'_'+str(j)
1554
+ return labels
1555
+
1556
+ def orient(g,lag,m):
1557
+ import networkx as nx
1558
+ import numpy as np
1559
+ labels=np.arange(0,(lag+1)*m)
1560
+
1561
+ edge=set([])
1562
+ labelmat=labels.reshape((m,lag+1))
1563
+ for i in range(m):
1564
+ for k in range(m):
1565
+ for j in range(lag+1):
1566
+ for l in range(lag+1):
1567
+ if j<=l:# and j>=l-lag:
1568
+ #if j==l-1:
1569
+ if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
1570
+ edge = edge | {(labelmat[i,j],labelmat[k,l])}
1571
+ g1 = nx.DiGraph()
1572
+ g1.add_nodes_from(g.nodes)
1573
+ g1.add_edges_from(edge)
1574
+ return g1
1575
+ #%%
1576
+ def return_finaledges(g,causaleff,lag,m):
1577
+ import networkx as nx
1578
+ import numpy as np
1579
+ labels=np.arange(0,(lag+1)*m)
1580
+
1581
+ # edge=set([])
1582
+ labelmat=labels.reshape((m,lag+1))
1583
+ causaleff1 = np.zeros((m,m))
1584
+ causaleff2 = np.zeros((m,m))
1585
+ #pval_fin = np.zeros((m,m))
1586
+ edge_mat = np.zeros((m,m))==1
1587
+ # for i in range(m):
1588
+ # for k in range(m):
1589
+ # for j in range(lag+1):
1590
+ # for l in range(lag+1):
1591
+ # if j<=l:# and j>=l-lag:
1592
+ # #if j==l-1:
1593
+ # if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
1594
+ # edge = edge | {(labelmat[i,j],labelmat[k,l])}
1595
+ # g1 = nx.DiGraph()
1596
+ # g1.add_nodes_from(g.nodes)
1597
+ # g1.add_edges_from(edge)
1598
+ g1=g
1599
+ for i in range(m):
1600
+ for k in range(m):
1601
+ acc=[]
1602
+ ansacc=[]
1603
+ for j in range(lag+1):
1604
+ for l in range(lag+1):
1605
+ if j<=l:
1606
+ if (labelmat[i,j],labelmat[k,l]) in g1.edges:
1607
+ #print((labelmat[i,j],labelmat[k,l]))
1608
+ # edge = edge | {(i,k)}
1609
+ edge_mat[i,k]=True
1610
+ #acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1611
+ #for lagit in range(1,lag+1):
1612
+ #if labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
1613
+ acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1614
+ if labelmat[i,j] in nx.ancestors(g1,labelmat[k,l]):
1615
+ ansacc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1616
+ if len(acc)>0:
1617
+ causaleff1[i,k] = np.mean(acc)#acc[iacc]#np.mean(acc)
1618
+ if len(ansacc)>0:
1619
+ causaleff2[i,k] = np.mean(ansacc)
1620
+ return edge_mat.astype(int), causaleff1,causaleff2#, pval_fin
1621
+
1622
+ def return_finaledges_v2(g,causaleff,lag,m):
1623
+ import networkx as nx
1624
+ import numpy as np
1625
+ labels=np.arange(0,lag*m)
1626
+
1627
+ # edge=set([])
1628
+ labelmat=labels.reshape((m,lag))
1629
+ causaleff1 = np.zeros((m,m))
1630
+ causaleff2 = np.zeros((m,m))
1631
+ #pval_fin = np.zeros((m,m))
1632
+ edge_mat = np.zeros((m,m))==1
1633
+ # for i in range(m):
1634
+ # for k in range(m):
1635
+ # for j in range(lag+1):
1636
+ # for l in range(lag+1):
1637
+ # if j<=l:# and j>=l-lag:
1638
+ # #if j==l-1:
1639
+ # if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
1640
+ # edge = edge | {(labelmat[i,j],labelmat[k,l])}
1641
+ # g1 = nx.DiGraph()
1642
+ # g1.add_nodes_from(g.nodes)
1643
+ # g1.add_edges_from(edge)
1644
+ g1=g
1645
+ for i in range(m):
1646
+ for k in range(m):
1647
+ acc=[]
1648
+ ansacc=[]
1649
+ for j in range(lag):
1650
+ for l in range(lag):
1651
+ if j<=l:
1652
+ if (labelmat[i,j],labelmat[k,l]) in g1.edges:
1653
+ #print((labelmat[i,j],labelmat[k,l]))
1654
+ # edge = edge | {(i,k)}
1655
+ edge_mat[i,k]=True
1656
+ #acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1657
+ #for lagit in range(1,lag+1):
1658
+ #if labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
1659
+ acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1660
+ if labelmat[i,j] in nx.ancestors(g1,labelmat[k,l]):
1661
+ ansacc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1662
+ if len(acc)>0:
1663
+ causaleff1[i,k] = np.mean(acc)#acc[iacc]#np.mean(acc)
1664
+ if len(ansacc)>0:
1665
+ causaleff2[i,k] = np.mean(ansacc)
1666
+ return edge_mat.astype(int), causaleff1,causaleff2#, pval_fin
1667
+
1668
+ # def return_finaledges(g,causaleff,lag,m):
1669
+ # import networkx as nx
1670
+ # import numpy as np
1671
+ # labels=np.arange(0,(lag+1)*m)
1672
+
1673
+ # edge=set([])
1674
+ # labelmat=labels.reshape((m,lag+1))
1675
+ # causaleff1 = np.zeros((m,m))
1676
+ # edge_mat = np.zeros((m,m))==1
1677
+ # for i in range(m):
1678
+ # for k in range(m):
1679
+ # acc=[]
1680
+ # for j in range(lag+1):
1681
+ # for l in range(lag+1):
1682
+ # if j<=l:# and j>=l-lag:
1683
+ # #if j==l-1:
1684
+ # if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
1685
+ # #print((labelmat[i,j],labelmat[k,l]))
1686
+ # # edge = edge | {(i,k)}
1687
+ # edge_mat[i,k]=True
1688
+ # #acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1689
+ # #for lagit in range(1,lag+1):
1690
+ # #if labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
1691
+ # acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1692
+ # if len(acc)>0:
1693
+ # #iacc=np.argmax(np.abs(acc))
1694
+ # causaleff1[i,k] = np.mean(acc)#acc[iacc]#np.mean(acc)
1695
+ # return edge_mat.astype(int), causaleff1
1696
+
1697
+ # def return_finaledges_v2(g,causaleff,lag,m):
1698
+ # import networkx as nx
1699
+ # import numpy as np
1700
+ # labels=np.arange(0,(lag+1)*m)
1701
+
1702
+ # edge=set([])
1703
+ # labelmat=labels.reshape((m,lag+1))
1704
+ # causaleff1 = np.zeros((m,m))
1705
+ # edge_mat = np.zeros((m,m))==1
1706
+ # for i in range(m):
1707
+ # for k in range(m):
1708
+ # acc=[]
1709
+ # for j in range(lag+1):
1710
+ # for l in range(lag+1):
1711
+ # if j<=l:# and j>=l-lag:
1712
+ # #if j==l-1:
1713
+ # if (labelmat[i,j],labelmat[k,l]) in g.edges or (labelmat[k,l],labelmat[i,j]) in g.edges:
1714
+ # #print((labelmat[i,j],labelmat[k,l]))
1715
+ # # edge = edge | {(i,k)}
1716
+ # edge_mat[i,k]=True
1717
+ # #acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1718
+ # #for lagit in range(1,lag+1):
1719
+ # #if labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
1720
+ # acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1721
+ # if len(acc)>0:
1722
+ # #iacc=np.argmax(np.abs(acc))
1723
+ # causaleff1[i,k] = np.mean(acc)#acc[iacc]#np.mean(acc)
1724
+ # #g1=nx.DiGraph()
1725
+ # #g1.add_nodes_from(range(m))
1726
+ # #g1.add_edges_from(edge)
1727
+ # #g1.graph['edge'] = {'arrowsize': '0.6', 'splines': 'curved'}
1728
+ # #g1.graph['graph'] = {'scale': '3'}
1729
+
1730
+ # return edge_mat.astype(int), causaleff1
1731
+
1732
+ def return_finaledges_fin(g,causaleff,p):
1733
+ import networkx as nx
1734
+ import numpy as np
1735
+ causaleff1 = np.zeros(p)
1736
+ edge_mat = np.zeros(p)==1
1737
+ for k in range(p):
1738
+ #for k in range(p):
1739
+ if (k+1,0) in g.edges:#(k,i) in g.edges or
1740
+ edge_mat[k]=True
1741
+ causaleff1[k]=causaleff[k+1,0]#causaleff[k,i]*int((k,i) in g.edges) +
1742
+ return edge_mat.astype(int), causaleff1
1743
+
1744
+ # def return_finaledges(g,causaleff,lag,m):
1745
+ # import networkx as nx
1746
+ # import numpy as np
1747
+ # labels=np.arange(0,(lag+1)*m)
1748
+
1749
+ # edge=set([])
1750
+ # labelmat=labels.reshape((m,lag+1))
1751
+ # causaleff1 = np.zeros((m,m))
1752
+ # for i in range(m):
1753
+ # for k in range(m):
1754
+ # acc=[]
1755
+ # for j in range(lag+1):
1756
+ # for l in range(lag+1):
1757
+ # if j<=l:
1758
+ # #if j==l-1:
1759
+ # if (labelmat[i,j],labelmat[k,l]) in g.edges:
1760
+ # #print((labelmat[i,j],labelmat[k,l]))
1761
+ # edge = edge | {(i,k)}
1762
+ # #acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1763
+ # if j==l-lag and labelmat[i,j] in nx.ancestors(g,labelmat[k,l]):####CF at delay=lag
1764
+ # acc.append(causaleff[labelmat[i,j],labelmat[k,l]])
1765
+ # if len(acc)>0:
1766
+ # causaleff1[i,k] = np.mean(acc)
1767
+
1768
+ # g1=nx.DiGraph()
1769
+ # g1.add_nodes_from(range(m))
1770
+ # g1.add_edges_from(edge)
1771
+ # #g1.graph['edge'] = {'arrowsize': '0.6', 'splines': 'curved'}
1772
+ # #g1.graph['graph'] = {'scale': '3'}
1773
+ # return g1, causaleff1
1774
+
1775
+ def find_lag(data):
1776
+ p=data.shape[1]
1777
+ n=data.shape[0]
1778
+ valuelag= np.zeros(int(n/2))
1779
+ for lag in range(1,int(n/2)):
1780
+ vecnorm = np.zeros(n)
1781
+ for i in range(n-lag):
1782
+ vecnorm[i]=np.linalg.norm(data[i,:]-data[(i+lag),:])
1783
+ valuelag[lag]=np.mean(vecnorm)
1784
+ #np.where(valuelag>=np.quantile(valuelag,0.1))[0][0]
1785
+ return valuelag
1786
+ # %%
1787
+ def causaleffin(G,data_trans,lag):
1788
+ import numpy as np
1789
+ Nodes = list(G.nodes)
1790
+ m = len(Nodes)
1791
+ causaleff=np.zeros(m,m)
1792
+ causaleff2=np.zeros(m,m)
1793
+ labels=np.arange(0,(lag+1)*m)
1794
+ labelmat=labels.reshape((m,lag+1))
1795
+
1796
+ for x1 in Nodes:
1797
+ for y1 in Nodes:
1798
+ if x1!=y1 and (x1,y1) in G.edges:#list(nx.ancestors(g,y)):#list(g.predecessors(y)):#list(nx.ancestors(g,y)):
1799
+ for i in range(lag+1):
1800
+ for j in range(i,lag+1):
1801
+ #if (x,y) in Edges:
1802
+ #if y not in list(g.predecessors(x)):
1803
+ #if x in list(nx.ancestors(g,y)):
1804
+ #lm = linear_model.LinearRegression()
1805
+ x = labelmat[x1,i]
1806
+ y = labelmat[y1,j]
1807
+ pa_x = list(g.predecessors(x))
1808
+ pa_y = list(g.predecessors(y))
1809
+ if x not in pa_x:
1810
+ regressors = pa_x + [x]
1811
+ else:
1812
+ regressors = pa_x
1813
+ if y in pa_x:
1814
+ causaleff[x,y] = 0
1815
+ else:
1816
+ X=np.asarray(data[:,regressors])
1817
+ Y=np.asarray(data[:,y])
1818
+ X0=np.hstack((np.ones((X.shape[0],1)),X))
1819
+ lm_out = np.linalg.lstsq(X0,Y,rcond=None)[0]
1820
+ causaleff[x,y] = lm_out[regressors.index(x)+1]
1821
+ return causaleff