tigramite-fast 5.2.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tigramite/__init__.py +0 -0
- tigramite/causal_effects.py +1525 -0
- tigramite/causal_mediation.py +1592 -0
- tigramite/data_processing.py +1574 -0
- tigramite/graphs.py +1509 -0
- tigramite/independence_tests/LBFGS.py +1114 -0
- tigramite/independence_tests/__init__.py +0 -0
- tigramite/independence_tests/cmiknn.py +661 -0
- tigramite/independence_tests/cmiknn_mixed.py +1397 -0
- tigramite/independence_tests/cmisymb.py +286 -0
- tigramite/independence_tests/gpdc.py +664 -0
- tigramite/independence_tests/gpdc_torch.py +820 -0
- tigramite/independence_tests/gsquared.py +190 -0
- tigramite/independence_tests/independence_tests_base.py +1310 -0
- tigramite/independence_tests/oracle_conditional_independence.py +1582 -0
- tigramite/independence_tests/pairwise_CI.py +383 -0
- tigramite/independence_tests/parcorr.py +369 -0
- tigramite/independence_tests/parcorr_mult.py +485 -0
- tigramite/independence_tests/parcorr_wls.py +451 -0
- tigramite/independence_tests/regressionCI.py +403 -0
- tigramite/independence_tests/robust_parcorr.py +403 -0
- tigramite/jpcmciplus.py +966 -0
- tigramite/lpcmci.py +3649 -0
- tigramite/models.py +2257 -0
- tigramite/pcmci.py +3935 -0
- tigramite/pcmci_base.py +1218 -0
- tigramite/plotting.py +4735 -0
- tigramite/rpcmci.py +467 -0
- tigramite/toymodels/__init__.py +0 -0
- tigramite/toymodels/context_model.py +261 -0
- tigramite/toymodels/non_additive.py +1231 -0
- tigramite/toymodels/structural_causal_processes.py +1201 -0
- tigramite/toymodels/surrogate_generator.py +319 -0
- tigramite_fast-5.2.10.1.dist-info/METADATA +182 -0
- tigramite_fast-5.2.10.1.dist-info/RECORD +38 -0
- tigramite_fast-5.2.10.1.dist-info/WHEEL +5 -0
- tigramite_fast-5.2.10.1.dist-info/licenses/license.txt +621 -0
- tigramite_fast-5.2.10.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,820 @@
|
|
|
1
|
+
"""Tigramite causal discovery for time series."""
|
|
2
|
+
|
|
3
|
+
# Author: Jakob Runge <jakob@jakob-runge.com>
|
|
4
|
+
#
|
|
5
|
+
# License: GNU General Public License v3.0
|
|
6
|
+
|
|
7
|
+
from __future__ import print_function
|
|
8
|
+
import json, warnings, os, pathlib
|
|
9
|
+
import numpy as np
|
|
10
|
+
import gc
|
|
11
|
+
import dcor
|
|
12
|
+
import torch
|
|
13
|
+
import gpytorch
|
|
14
|
+
from .LBFGS import FullBatchLBFGS
|
|
15
|
+
from .independence_tests_base import CondIndTest
|
|
16
|
+
|
|
17
|
+
class GaussProcRegTorch():
|
|
18
|
+
r"""Gaussian processes abstract base class.
|
|
19
|
+
|
|
20
|
+
GP is estimated with gpytorch. Note that the kernel's hyperparameters are
|
|
21
|
+
optimized during fitting.
|
|
22
|
+
|
|
23
|
+
When the null distribution is not analytically available, but can be
|
|
24
|
+
precomputed with the function generate_and_save_nulldists(...) which saves
|
|
25
|
+
a \*.npz file containing the null distribution for different sample sizes.
|
|
26
|
+
This file can then be supplied as null_dist_filename.
|
|
27
|
+
|
|
28
|
+
Assumes one-dimensional X, Y. But can be combined with PairwiseMultCI to
|
|
29
|
+
obtain a test for multivariate X, Y.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
null_samples : int
|
|
34
|
+
Number of null samples to use
|
|
35
|
+
|
|
36
|
+
cond_ind_test : CondIndTest
|
|
37
|
+
Conditional independence test that this Gaussian Proccess Regressor will
|
|
38
|
+
calculate the null distribution for. This is used to grab the
|
|
39
|
+
get_dependence_measure function.
|
|
40
|
+
|
|
41
|
+
null_dist_filename : str, otional (default: None)
|
|
42
|
+
Path to file containing null distribution.
|
|
43
|
+
|
|
44
|
+
verbosity : int, optional (default: 0)
|
|
45
|
+
Level of verbosity.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self,
|
|
49
|
+
null_samples,
|
|
50
|
+
cond_ind_test,
|
|
51
|
+
null_dist_filename=None,
|
|
52
|
+
checkpoint_size=None,
|
|
53
|
+
verbosity=0):
|
|
54
|
+
# Set the dependence measure function
|
|
55
|
+
self.cond_ind_test = cond_ind_test
|
|
56
|
+
# Set member variables
|
|
57
|
+
self.verbosity = verbosity
|
|
58
|
+
# Set the null distribution defaults
|
|
59
|
+
self.null_samples = null_samples
|
|
60
|
+
self.null_dists = {}
|
|
61
|
+
self.null_dist_filename = null_dist_filename
|
|
62
|
+
# Check if we are loading a null distrubtion from a cached file
|
|
63
|
+
if self.null_dist_filename is not None:
|
|
64
|
+
self.null_dists, self.null_samples = \
|
|
65
|
+
self._load_nulldist(self.null_dist_filename)
|
|
66
|
+
# Size for batching
|
|
67
|
+
self.checkpoint_size = checkpoint_size
|
|
68
|
+
|
|
69
|
+
def _load_nulldist(self, filename):
|
|
70
|
+
r"""
|
|
71
|
+
Load a precomputed null distribution from a \*.npz file. This
|
|
72
|
+
distribution can be calculated using generate_and_save_nulldists(...).
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
filename : strng
|
|
77
|
+
Path to the \*.npz file
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
null_dists, null_samples : dict, int
|
|
82
|
+
The null distirbution as a dictionary of distributions keyed by
|
|
83
|
+
sample size, the number of null samples in total.
|
|
84
|
+
"""
|
|
85
|
+
null_dist_file = np.load(filename)
|
|
86
|
+
null_dists = dict(zip(null_dist_file['T'],
|
|
87
|
+
null_dist_file['exact_dist']))
|
|
88
|
+
null_samples = len(null_dist_file['exact_dist'][0])
|
|
89
|
+
return null_dists, null_samples
|
|
90
|
+
|
|
91
|
+
def _generate_nulldist(self, df,
|
|
92
|
+
add_to_null_dists=True):
|
|
93
|
+
"""Generates null distribution for pairwise independence tests.
|
|
94
|
+
|
|
95
|
+
Generates the null distribution for sample size df. Assumes pairwise
|
|
96
|
+
samples transformed to uniform marginals. Uses get_dependence_measure
|
|
97
|
+
available in class and generates self.sig_samples random samples. Adds
|
|
98
|
+
the null distributions to self.null_dists.
|
|
99
|
+
|
|
100
|
+
Parameters
|
|
101
|
+
----------
|
|
102
|
+
df : int
|
|
103
|
+
Degrees of freedom / sample size to generate null distribution for.
|
|
104
|
+
add_to_null_dists : bool, optional (default: True)
|
|
105
|
+
Whether to add the null dist to the dictionary of null dists or
|
|
106
|
+
just return it.
|
|
107
|
+
|
|
108
|
+
Returns
|
|
109
|
+
-------
|
|
110
|
+
null_dist : array of shape [df,]
|
|
111
|
+
Only returned,if add_to_null_dists is False.
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
if self.verbosity > 0:
|
|
115
|
+
print("Generating null distribution for df = %d. " % df)
|
|
116
|
+
if add_to_null_dists:
|
|
117
|
+
print("For faster computations, run function "
|
|
118
|
+
"generate_and_save_nulldists(...) to "
|
|
119
|
+
"precompute null distribution and load *.npz file with "
|
|
120
|
+
"argument null_dist_filename")
|
|
121
|
+
|
|
122
|
+
xyz = np.array([0, 1])
|
|
123
|
+
|
|
124
|
+
null_dist = np.zeros(self.null_samples)
|
|
125
|
+
for i in range(self.null_samples):
|
|
126
|
+
array = self.cond_ind_test.random_state.random((2, df))
|
|
127
|
+
null_dist[i] = self.cond_ind_test.get_dependence_measure(
|
|
128
|
+
array, xyz)
|
|
129
|
+
|
|
130
|
+
null_dist.sort()
|
|
131
|
+
if add_to_null_dists:
|
|
132
|
+
self.null_dists[df] = null_dist
|
|
133
|
+
return null_dist
|
|
134
|
+
|
|
135
|
+
def _generate_and_save_nulldists(self, sample_sizes, null_dist_filename):
|
|
136
|
+
"""Generates and saves null distribution for pairwise independence
|
|
137
|
+
tests.
|
|
138
|
+
|
|
139
|
+
Generates the null distribution for different sample sizes. Calls
|
|
140
|
+
generate_nulldist. Null dists are saved to disk as
|
|
141
|
+
self.null_dist_filename.npz. Also adds the null distributions to
|
|
142
|
+
self.null_dists.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
sample_sizes : list
|
|
147
|
+
List of sample sizes.
|
|
148
|
+
|
|
149
|
+
null_dist_filename : str
|
|
150
|
+
Name to save file containing null distributions.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
self.null_dist_filename = null_dist_filename
|
|
154
|
+
|
|
155
|
+
null_dists = np.zeros((len(sample_sizes), self.null_samples))
|
|
156
|
+
|
|
157
|
+
for iT, T in enumerate(sample_sizes):
|
|
158
|
+
null_dists[iT] = self._generate_nulldist(
|
|
159
|
+
T, add_to_null_dists=False)
|
|
160
|
+
self.null_dists[T] = null_dists[iT]
|
|
161
|
+
|
|
162
|
+
np.savez("%s" % null_dist_filename,
|
|
163
|
+
exact_dist=null_dists,
|
|
164
|
+
T=np.array(sample_sizes))
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _get_single_residuals(self, array, target_var,
|
|
168
|
+
return_means=False,
|
|
169
|
+
standardize=True,
|
|
170
|
+
return_likelihood=False,
|
|
171
|
+
training_iter=50,
|
|
172
|
+
lr=0.1):
|
|
173
|
+
"""Returns residuals of Gaussian process regression.
|
|
174
|
+
|
|
175
|
+
Performs a GP regression of the variable indexed by target_var on the
|
|
176
|
+
conditions Z. Here array is assumed to contain X and Y as the first two
|
|
177
|
+
rows with the remaining rows (if present) containing the conditions Z.
|
|
178
|
+
Optionally returns the estimated mean and the likelihood.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
array : array-like
|
|
183
|
+
data array with X, Y, Z in rows and observations in columns
|
|
184
|
+
|
|
185
|
+
target_var : {0, 1}
|
|
186
|
+
Variable to regress out conditions from.
|
|
187
|
+
|
|
188
|
+
standardize : bool, optional (default: True)
|
|
189
|
+
Whether to standardize the array beforehand.
|
|
190
|
+
|
|
191
|
+
return_means : bool, optional (default: False)
|
|
192
|
+
Whether to return the estimated regression line.
|
|
193
|
+
|
|
194
|
+
return_likelihood : bool, optional (default: False)
|
|
195
|
+
Whether to return the log_marginal_likelihood of the fitted GP.
|
|
196
|
+
|
|
197
|
+
training_iter : int, optional (default: 50)
|
|
198
|
+
Number of training iterations.
|
|
199
|
+
|
|
200
|
+
lr : float, optional (default: 0.1)
|
|
201
|
+
Learning rate (default: 0.1).
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
resid [, mean, likelihood] : array-like
|
|
206
|
+
The residual of the regression and optionally the estimated mean
|
|
207
|
+
and/or the likelihood.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
dim, T = array.shape
|
|
211
|
+
|
|
212
|
+
if dim <= 2:
|
|
213
|
+
if return_likelihood:
|
|
214
|
+
return array[target_var, :], -np.inf
|
|
215
|
+
return array[target_var, :]
|
|
216
|
+
|
|
217
|
+
# Implement using PyTorch
|
|
218
|
+
# Standardize
|
|
219
|
+
if standardize:
|
|
220
|
+
array -= array.mean(axis=1).reshape(dim, 1)
|
|
221
|
+
std = array.std(axis=1)
|
|
222
|
+
for i in range(dim):
|
|
223
|
+
if std[i] != 0.:
|
|
224
|
+
array[i] /= std[i]
|
|
225
|
+
if np.any(std == 0.) and self.verbosity > 0:
|
|
226
|
+
warnings.warn("Possibly constant array!")
|
|
227
|
+
# array /= array.std(axis=1).reshape(dim, 1)
|
|
228
|
+
# if np.isnan(array).any():
|
|
229
|
+
# raise ValueError("Nans after standardizing, "
|
|
230
|
+
# "possibly constant array!")
|
|
231
|
+
|
|
232
|
+
target_series = array[target_var, :]
|
|
233
|
+
z = array[2:].T.copy()
|
|
234
|
+
if np.ndim(z) == 1:
|
|
235
|
+
z = z.reshape(-1, 1)
|
|
236
|
+
|
|
237
|
+
train_x = torch.tensor(z).float()
|
|
238
|
+
train_y = torch.tensor(target_series).float()
|
|
239
|
+
|
|
240
|
+
device_type = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
241
|
+
output_device = torch.device(device_type)
|
|
242
|
+
train_x, train_y = train_x.to(output_device), train_y.to(output_device)
|
|
243
|
+
|
|
244
|
+
if device_type == 'cuda':
|
|
245
|
+
# If GPU is available, use MultiGPU with Kernel Partitioning
|
|
246
|
+
n_devices = torch.cuda.device_count()
|
|
247
|
+
class mExactGPModel(gpytorch.models.ExactGP):
|
|
248
|
+
def __init__(self, train_x, train_y, likelihood, n_devices):
|
|
249
|
+
super(mExactGPModel, self).__init__(train_x, train_y, likelihood)
|
|
250
|
+
self.mean_module = gpytorch.means.ConstantMean()
|
|
251
|
+
base_covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
|
|
252
|
+
|
|
253
|
+
self.covar_module = gpytorch.kernels.MultiDeviceKernel(
|
|
254
|
+
base_covar_module, device_ids=range(n_devices),
|
|
255
|
+
output_device=output_device
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
def forward(self, x):
|
|
259
|
+
mean_x = self.mean_module(x)
|
|
260
|
+
covar_x = self.covar_module(x)
|
|
261
|
+
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
|
|
262
|
+
|
|
263
|
+
def mtrain(train_x,
|
|
264
|
+
train_y,
|
|
265
|
+
n_devices,
|
|
266
|
+
output_device,
|
|
267
|
+
checkpoint_size,
|
|
268
|
+
preconditioner_size,
|
|
269
|
+
n_training_iter,
|
|
270
|
+
):
|
|
271
|
+
likelihood = gpytorch.likelihoods.GaussianLikelihood().to(output_device)
|
|
272
|
+
model = mExactGPModel(train_x, train_y, likelihood, n_devices).to(output_device)
|
|
273
|
+
model.train()
|
|
274
|
+
likelihood.train()
|
|
275
|
+
|
|
276
|
+
optimizer = FullBatchLBFGS(model.parameters(), lr=lr)
|
|
277
|
+
# "Loss" for GPs - the marginal log likelihood
|
|
278
|
+
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
|
|
279
|
+
|
|
280
|
+
with gpytorch.beta_features.checkpoint_kernel(checkpoint_size), \
|
|
281
|
+
gpytorch.settings.max_preconditioner_size(preconditioner_size):
|
|
282
|
+
|
|
283
|
+
def closure():
|
|
284
|
+
optimizer.zero_grad()
|
|
285
|
+
output = model(train_x)
|
|
286
|
+
loss = -mll(output, train_y)
|
|
287
|
+
return loss
|
|
288
|
+
|
|
289
|
+
loss = closure()
|
|
290
|
+
loss.backward()
|
|
291
|
+
|
|
292
|
+
for i in range(n_training_iter):
|
|
293
|
+
options = {'closure': closure, 'current_loss': loss, 'max_ls': 10}
|
|
294
|
+
loss, _, _, _, _, _, _, fail = optimizer.step(options)
|
|
295
|
+
|
|
296
|
+
'''print('Iter %d/%d - Loss: %.3f lengthscale: %.3f noise: %.3f' % (
|
|
297
|
+
i + 1, n_training_iter, loss.item(),
|
|
298
|
+
model.covar_module.module.base_kernel.lengthscale.item(),
|
|
299
|
+
model.likelihood.noise.item()
|
|
300
|
+
))'''
|
|
301
|
+
|
|
302
|
+
if fail:
|
|
303
|
+
# print('Convergence reached!')
|
|
304
|
+
break
|
|
305
|
+
|
|
306
|
+
return model, likelihood, mll
|
|
307
|
+
|
|
308
|
+
def find_best_gpu_setting(train_x,
|
|
309
|
+
train_y,
|
|
310
|
+
n_devices,
|
|
311
|
+
output_device,
|
|
312
|
+
preconditioner_size
|
|
313
|
+
):
|
|
314
|
+
N = train_x.size(0)
|
|
315
|
+
|
|
316
|
+
# Find the optimum partition/checkpoint size by decreasing in powers of 2
|
|
317
|
+
# Start with no partitioning (size = 0)
|
|
318
|
+
settings = [0] + [int(n) for n in np.ceil(N / 2 ** np.arange(1, np.floor(np.log2(N))))]
|
|
319
|
+
|
|
320
|
+
for checkpoint_size in settings:
|
|
321
|
+
print('Number of devices: {} -- Kernel partition size: {}'.format(n_devices, checkpoint_size))
|
|
322
|
+
try:
|
|
323
|
+
# Try a full forward and backward pass with this setting to check memory usage
|
|
324
|
+
_, _, _ = mtrain(train_x, train_y,
|
|
325
|
+
n_devices=n_devices, output_device=output_device,
|
|
326
|
+
checkpoint_size=checkpoint_size,
|
|
327
|
+
preconditioner_size=preconditioner_size, n_training_iter=1)
|
|
328
|
+
|
|
329
|
+
# when successful, break out of for-loop and jump to finally block
|
|
330
|
+
break
|
|
331
|
+
except RuntimeError as e:
|
|
332
|
+
pass
|
|
333
|
+
except AttributeError as e:
|
|
334
|
+
pass
|
|
335
|
+
finally:
|
|
336
|
+
# handle CUDA OOM error
|
|
337
|
+
gc.collect()
|
|
338
|
+
torch.cuda.empty_cache()
|
|
339
|
+
return checkpoint_size
|
|
340
|
+
|
|
341
|
+
# Set a large enough preconditioner size to reduce the number of CG iterations run
|
|
342
|
+
preconditioner_size = 100
|
|
343
|
+
if self.checkpoint_size is None:
|
|
344
|
+
self.checkpoint_size = find_best_gpu_setting(train_x, train_y,
|
|
345
|
+
n_devices=n_devices,
|
|
346
|
+
output_device=output_device,
|
|
347
|
+
preconditioner_size=preconditioner_size)
|
|
348
|
+
|
|
349
|
+
model, likelihood, mll = mtrain(train_x, train_y,
|
|
350
|
+
n_devices=n_devices, output_device=output_device,
|
|
351
|
+
checkpoint_size=self.checkpoint_size,
|
|
352
|
+
preconditioner_size=100,
|
|
353
|
+
n_training_iter=training_iter)
|
|
354
|
+
|
|
355
|
+
# Get into evaluation (predictive posterior) mode
|
|
356
|
+
model.eval()
|
|
357
|
+
likelihood.eval()
|
|
358
|
+
|
|
359
|
+
# Make predictions by feeding model through likelihood
|
|
360
|
+
with torch.no_grad(), gpytorch.settings.fast_pred_var(), gpytorch.beta_features.checkpoint_kernel(1000):
|
|
361
|
+
mean = model(train_x).loc.detach()
|
|
362
|
+
loglik = mll(model(train_x), train_y)*T
|
|
363
|
+
|
|
364
|
+
resid = (train_y - mean).detach().cpu().numpy()
|
|
365
|
+
mean = mean.detach().cpu().numpy()
|
|
366
|
+
|
|
367
|
+
else:
|
|
368
|
+
# If only CPU is available, we will use the simplest form of GP model, exact inference
|
|
369
|
+
class ExactGPModel(gpytorch.models.ExactGP):
|
|
370
|
+
def __init__(self, train_x, train_y, likelihood):
|
|
371
|
+
super(ExactGPModel, self).__init__(
|
|
372
|
+
train_x, train_y, likelihood)
|
|
373
|
+
self.mean_module = gpytorch.means.ConstantMean()
|
|
374
|
+
|
|
375
|
+
# We only use the RBF kernel here, the WhiteNoiseKernel is deprecated
|
|
376
|
+
# and its featured integrated into the Likelihood-Module.
|
|
377
|
+
self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
|
|
378
|
+
|
|
379
|
+
def forward(self, x):
|
|
380
|
+
mean_x = self.mean_module(x)
|
|
381
|
+
covar_x = self.covar_module(x)
|
|
382
|
+
return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
|
|
383
|
+
|
|
384
|
+
# initialize likelihood and model
|
|
385
|
+
likelihood = gpytorch.likelihoods.GaussianLikelihood()
|
|
386
|
+
model = ExactGPModel(train_x, train_y, likelihood)
|
|
387
|
+
|
|
388
|
+
# Find optimal model hyperparameters
|
|
389
|
+
model.train()
|
|
390
|
+
likelihood.train()
|
|
391
|
+
|
|
392
|
+
# Use the adam optimizer
|
|
393
|
+
# Includes GaussianLikelihood parameters
|
|
394
|
+
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
|
|
395
|
+
|
|
396
|
+
# "Loss" for GPs - the marginal log likelihood
|
|
397
|
+
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
|
|
398
|
+
|
|
399
|
+
for i in range(training_iter):
|
|
400
|
+
# Zero gradients from previous iteration
|
|
401
|
+
optimizer.zero_grad()
|
|
402
|
+
# Output from model
|
|
403
|
+
output = model(train_x)
|
|
404
|
+
|
|
405
|
+
# Calc loss and backprop gradients
|
|
406
|
+
loss = -mll(output, train_y)
|
|
407
|
+
loss.backward()
|
|
408
|
+
optimizer.step()
|
|
409
|
+
|
|
410
|
+
# Get into evaluation (predictive posterior) mode
|
|
411
|
+
model.eval()
|
|
412
|
+
likelihood.eval()
|
|
413
|
+
|
|
414
|
+
# Make predictions by feeding model through likelihood
|
|
415
|
+
with torch.no_grad(), gpytorch.settings.fast_pred_var():
|
|
416
|
+
mean = model(train_x).loc.detach()
|
|
417
|
+
loglik = mll(model(train_x), train_y) * T
|
|
418
|
+
|
|
419
|
+
resid = (train_y - mean).detach().numpy()
|
|
420
|
+
mean = mean.detach().numpy()
|
|
421
|
+
|
|
422
|
+
if return_means and not return_likelihood:
|
|
423
|
+
return resid, mean
|
|
424
|
+
elif return_likelihood and not return_means:
|
|
425
|
+
return resid, loglik
|
|
426
|
+
elif return_means and return_likelihood:
|
|
427
|
+
return resid, mean, loglik
|
|
428
|
+
return resid
|
|
429
|
+
|
|
430
|
+
def _get_model_selection_criterion(self, j, parents, tau_max=0):
|
|
431
|
+
"""Returns log marginal likelihood for GP regression.
|
|
432
|
+
|
|
433
|
+
Fits a GP model of the parents to variable j and returns the negative
|
|
434
|
+
log marginal likelihood as a model selection score. Is used to determine
|
|
435
|
+
optimal hyperparameters in PCMCI, in particular the pc_alpha value.
|
|
436
|
+
|
|
437
|
+
Parameters
|
|
438
|
+
----------
|
|
439
|
+
j : int
|
|
440
|
+
Index of target variable in data array.
|
|
441
|
+
|
|
442
|
+
parents : list
|
|
443
|
+
List of form [(0, -1), (3, -2), ...] containing parents.
|
|
444
|
+
|
|
445
|
+
tau_max : int, optional (default: 0)
|
|
446
|
+
Maximum time lag. This may be used to make sure that estimates for
|
|
447
|
+
different lags in X, Z, all have the same sample size.
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
score : float
|
|
451
|
+
Model score.
|
|
452
|
+
"""
|
|
453
|
+
|
|
454
|
+
Y = [(j, 0)]
|
|
455
|
+
X = [(j, 0)] # dummy variable here
|
|
456
|
+
Z = parents
|
|
457
|
+
array, xyz, _ = \
|
|
458
|
+
self.cond_ind_test.dataframe.construct_array(
|
|
459
|
+
X=X, Y=Y, Z=Z,
|
|
460
|
+
tau_max=tau_max,
|
|
461
|
+
mask_type=self.cond_ind_test.mask_type,
|
|
462
|
+
return_cleaned_xyz=False,
|
|
463
|
+
do_checks=True,
|
|
464
|
+
verbosity=self.verbosity)
|
|
465
|
+
|
|
466
|
+
dim, T = array.shape
|
|
467
|
+
|
|
468
|
+
_, logli = self._get_single_residuals(array,
|
|
469
|
+
target_var=1,
|
|
470
|
+
return_likelihood=True)
|
|
471
|
+
|
|
472
|
+
score = -logli
|
|
473
|
+
return score
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
class GPDCtorch(CondIndTest):
|
|
477
|
+
r"""GPDC conditional independence test based on Gaussian processes and distance correlation. Here with gpytorch implementation.
|
|
478
|
+
|
|
479
|
+
GPDC is based on a Gaussian process (GP) regression and a distance
|
|
480
|
+
correlation test on the residuals [2]_. GP is estimated with gpytorch.
|
|
481
|
+
The distance correlation test is implemented with the dcor package available
|
|
482
|
+
from pip. Here the null distribution is not analytically available, but can be
|
|
483
|
+
precomputed with the function generate_and_save_nulldists(...) which saves a
|
|
484
|
+
\*.npz file containing the null distribution for different sample sizes.
|
|
485
|
+
This file can then be supplied as null_dist_filename.
|
|
486
|
+
|
|
487
|
+
Notes
|
|
488
|
+
-----
|
|
489
|
+
|
|
490
|
+
GPDC is based on a Gaussian process (GP) regression and a distance
|
|
491
|
+
correlation test on the residuals. Distance correlation is described in
|
|
492
|
+
[2]_. To test :math:`X \perp Y | Z`, first :math:`Z` is regressed out from
|
|
493
|
+
:math:`X` and :math:`Y` assuming the model
|
|
494
|
+
|
|
495
|
+
.. math:: X & = f_X(Z) + \epsilon_{X} \\
|
|
496
|
+
Y & = f_Y(Z) + \epsilon_{Y} \\
|
|
497
|
+
\epsilon_{X,Y} &\sim \mathcal{N}(0, \sigma^2)
|
|
498
|
+
|
|
499
|
+
using GP regression. Here :math:`\sigma^2` and the kernel bandwidth are
|
|
500
|
+
optimzed using ``gpytorch``. Then the residuals are transformed to uniform
|
|
501
|
+
marginals yielding :math:`r_X,r_Y` and their dependency is tested with
|
|
502
|
+
|
|
503
|
+
.. math:: \mathcal{R}\left(r_X, r_Y\right)
|
|
504
|
+
|
|
505
|
+
The null distribution of the distance correlation should be pre-computed.
|
|
506
|
+
Otherwise it is computed during runtime.
|
|
507
|
+
|
|
508
|
+
Parameters
|
|
509
|
+
----------
|
|
510
|
+
null_dist_filename : str, otional (default: None)
|
|
511
|
+
Path to file containing null distribution.
|
|
512
|
+
|
|
513
|
+
**kwargs :
|
|
514
|
+
Arguments passed on to parent class GaussProcRegTorch.
|
|
515
|
+
|
|
516
|
+
"""
|
|
517
|
+
@property
|
|
518
|
+
def measure(self):
|
|
519
|
+
"""
|
|
520
|
+
Concrete property to return the measure of the independence test
|
|
521
|
+
"""
|
|
522
|
+
return self._measure
|
|
523
|
+
|
|
524
|
+
def __init__(self,
|
|
525
|
+
null_dist_filename=None,
|
|
526
|
+
**kwargs):
|
|
527
|
+
self._measure = 'gp_dc'
|
|
528
|
+
self.two_sided = False
|
|
529
|
+
self.residual_based = True
|
|
530
|
+
# Call the parent constructor
|
|
531
|
+
CondIndTest.__init__(self, **kwargs)
|
|
532
|
+
# Build the regressor
|
|
533
|
+
self.gauss_pr = GaussProcRegTorch(self.sig_samples,
|
|
534
|
+
self,
|
|
535
|
+
null_dist_filename=null_dist_filename,
|
|
536
|
+
verbosity=self.verbosity)
|
|
537
|
+
|
|
538
|
+
if self.verbosity > 0:
|
|
539
|
+
print("null_dist_filename = %s" % self.gauss_pr.null_dist_filename)
|
|
540
|
+
print("")
|
|
541
|
+
|
|
542
|
+
def _load_nulldist(self, filename):
|
|
543
|
+
r"""
|
|
544
|
+
Load a precomputed null distribution from a \*.npz file. This
|
|
545
|
+
distribution can be calculated using generate_and_save_nulldists(...).
|
|
546
|
+
|
|
547
|
+
Parameters
|
|
548
|
+
----------
|
|
549
|
+
filename : strng
|
|
550
|
+
Path to the \*.npz file
|
|
551
|
+
|
|
552
|
+
Returns
|
|
553
|
+
-------
|
|
554
|
+
null_dists, null_samples : dict, int
|
|
555
|
+
The null distirbution as a dictionary of distributions keyed by
|
|
556
|
+
sample size, the number of null samples in total.
|
|
557
|
+
"""
|
|
558
|
+
return self.gauss_pr._load_nulldist(filename)
|
|
559
|
+
|
|
560
|
+
def generate_nulldist(self, df, add_to_null_dists=True):
|
|
561
|
+
"""Generates null distribution for pairwise independence tests.
|
|
562
|
+
|
|
563
|
+
Generates the null distribution for sample size df. Assumes pairwise
|
|
564
|
+
samples transformed to uniform marginals. Uses get_dependence_measure
|
|
565
|
+
available in class and generates self.sig_samples random samples. Adds
|
|
566
|
+
the null distributions to self.gauss_pr.null_dists.
|
|
567
|
+
|
|
568
|
+
Parameters
|
|
569
|
+
----------
|
|
570
|
+
df : int
|
|
571
|
+
Degrees of freedom / sample size to generate null distribution for.
|
|
572
|
+
|
|
573
|
+
add_to_null_dists : bool, optional (default: True)
|
|
574
|
+
Whether to add the null dist to the dictionary of null dists or
|
|
575
|
+
just return it.
|
|
576
|
+
|
|
577
|
+
Returns
|
|
578
|
+
-------
|
|
579
|
+
null_dist : array of shape [df,]
|
|
580
|
+
Only returned,if add_to_null_dists is False.
|
|
581
|
+
"""
|
|
582
|
+
return self.gauss_pr._generate_nulldist(df, add_to_null_dists)
|
|
583
|
+
|
|
584
|
+
def generate_and_save_nulldists(self, sample_sizes, null_dist_filename):
|
|
585
|
+
"""Generates and saves null distribution for pairwise independence
|
|
586
|
+
tests.
|
|
587
|
+
|
|
588
|
+
Generates the null distribution for different sample sizes. Calls
|
|
589
|
+
generate_nulldist. Null dists are saved to disk as
|
|
590
|
+
self.null_dist_filename.npz. Also adds the null distributions to
|
|
591
|
+
self.gauss_pr.null_dists.
|
|
592
|
+
|
|
593
|
+
Parameters
|
|
594
|
+
----------
|
|
595
|
+
sample_sizes : list
|
|
596
|
+
List of sample sizes.
|
|
597
|
+
|
|
598
|
+
null_dist_filename : str
|
|
599
|
+
Name to save file containing null distributions.
|
|
600
|
+
"""
|
|
601
|
+
self.gauss_pr._generate_and_save_nulldists(sample_sizes,
|
|
602
|
+
null_dist_filename)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
def _get_single_residuals(self, array, target_var,
|
|
606
|
+
return_means=False,
|
|
607
|
+
standardize=True,
|
|
608
|
+
return_likelihood=False,
|
|
609
|
+
training_iter=50,
|
|
610
|
+
lr=0.1):
|
|
611
|
+
"""Returns residuals of Gaussian process regression.
|
|
612
|
+
|
|
613
|
+
Performs a GP regression of the variable indexed by target_var on the
|
|
614
|
+
conditions Z. Here array is assumed to contain X and Y as the first two
|
|
615
|
+
rows with the remaining rows (if present) containing the conditions Z.
|
|
616
|
+
Optionally returns the estimated mean and the likelihood.
|
|
617
|
+
|
|
618
|
+
Parameters
|
|
619
|
+
----------
|
|
620
|
+
array : array-like
|
|
621
|
+
data array with X, Y, Z in rows and observations in columns
|
|
622
|
+
|
|
623
|
+
target_var : {0, 1}
|
|
624
|
+
Variable to regress out conditions from.
|
|
625
|
+
|
|
626
|
+
standardize : bool, optional (default: True)
|
|
627
|
+
Whether to standardize the array beforehand.
|
|
628
|
+
|
|
629
|
+
return_means : bool, optional (default: False)
|
|
630
|
+
Whether to return the estimated regression line.
|
|
631
|
+
|
|
632
|
+
return_likelihood : bool, optional (default: False)
|
|
633
|
+
Whether to return the log_marginal_likelihood of the fitted GP
|
|
634
|
+
|
|
635
|
+
training_iter : int, optional (default: 50)
|
|
636
|
+
Number of training iterations.
|
|
637
|
+
|
|
638
|
+
lr : float, optional (default: 0.1)
|
|
639
|
+
Learning rate (default: 0.1).
|
|
640
|
+
|
|
641
|
+
Returns
|
|
642
|
+
-------
|
|
643
|
+
resid [, mean, likelihood] : array-like
|
|
644
|
+
The residual of the regression and optionally the estimated mean
|
|
645
|
+
and/or the likelihood.
|
|
646
|
+
"""
|
|
647
|
+
return self.gauss_pr._get_single_residuals(
|
|
648
|
+
array, target_var,
|
|
649
|
+
return_means,
|
|
650
|
+
standardize,
|
|
651
|
+
return_likelihood,
|
|
652
|
+
training_iter,
|
|
653
|
+
lr)
|
|
654
|
+
|
|
655
|
+
def get_model_selection_criterion(self, j, parents, tau_max=0):
|
|
656
|
+
"""Returns log marginal likelihood for GP regression.
|
|
657
|
+
|
|
658
|
+
Fits a GP model of the parents to variable j and returns the negative
|
|
659
|
+
log marginal likelihood as a model selection score. Is used to determine
|
|
660
|
+
optimal hyperparameters in PCMCI, in particular the pc_alpha value.
|
|
661
|
+
|
|
662
|
+
Parameters
|
|
663
|
+
----------
|
|
664
|
+
j : int
|
|
665
|
+
Index of target variable in data array.
|
|
666
|
+
|
|
667
|
+
parents : list
|
|
668
|
+
List of form [(0, -1), (3, -2), ...] containing parents.
|
|
669
|
+
|
|
670
|
+
tau_max : int, optional (default: 0)
|
|
671
|
+
Maximum time lag. This may be used to make sure that estimates for
|
|
672
|
+
different lags in X, Z, all have the same sample size.
|
|
673
|
+
|
|
674
|
+
Returns:
|
|
675
|
+
score : float
|
|
676
|
+
Model score.
|
|
677
|
+
"""
|
|
678
|
+
return self.gauss_pr._get_model_selection_criterion(j, parents, tau_max)
|
|
679
|
+
|
|
680
|
+
def get_dependence_measure(self, array, xyz, data_type=None):
|
|
681
|
+
"""Return GPDC measure.
|
|
682
|
+
|
|
683
|
+
Estimated as the distance correlation of the residuals of a GP
|
|
684
|
+
regression.
|
|
685
|
+
|
|
686
|
+
Parameters
|
|
687
|
+
----------
|
|
688
|
+
array : array-like
|
|
689
|
+
data array with X, Y, Z in rows and observations in columns
|
|
690
|
+
|
|
691
|
+
xyz : array of ints
|
|
692
|
+
XYZ identifier array of shape (dim,).
|
|
693
|
+
|
|
694
|
+
Returns
|
|
695
|
+
-------
|
|
696
|
+
val : float
|
|
697
|
+
GPDC test statistic.
|
|
698
|
+
"""
|
|
699
|
+
|
|
700
|
+
x_vals = self._get_single_residuals(array, target_var=0)
|
|
701
|
+
y_vals = self._get_single_residuals(array, target_var=1)
|
|
702
|
+
val = self._get_dcorr(np.array([x_vals, y_vals]))
|
|
703
|
+
return val
|
|
704
|
+
|
|
705
|
+
def _get_dcorr(self, array_resid):
|
|
706
|
+
"""Return distance correlation coefficient.
|
|
707
|
+
|
|
708
|
+
The variables are transformed to uniform marginals using the empirical
|
|
709
|
+
cumulative distribution function beforehand. Here the null distribution
|
|
710
|
+
is not analytically available, but can be precomputed with the function
|
|
711
|
+
generate_and_save_nulldists(...) which saves a *.npz file containing
|
|
712
|
+
the null distribution for different sample sizes. This file can then be
|
|
713
|
+
supplied as null_dist_filename.
|
|
714
|
+
|
|
715
|
+
Parameters
|
|
716
|
+
----------
|
|
717
|
+
array_resid : array-like
|
|
718
|
+
data array must be of shape (2, T)
|
|
719
|
+
|
|
720
|
+
Returns
|
|
721
|
+
-------
|
|
722
|
+
val : float
|
|
723
|
+
Distance correlation coefficient.
|
|
724
|
+
"""
|
|
725
|
+
# Remove ties before applying transformation to uniform marginals
|
|
726
|
+
# array_resid = self._remove_ties(array_resid, verbosity=4)
|
|
727
|
+
x_vals, y_vals = self._trafo2uniform(array_resid)
|
|
728
|
+
val = dcor.distance_correlation(x_vals, y_vals, method='AVL')
|
|
729
|
+
return val
|
|
730
|
+
|
|
731
|
+
def get_shuffle_significance(self, array, xyz, value,
|
|
732
|
+
return_null_dist=False,
|
|
733
|
+
data_type=None):
|
|
734
|
+
"""Returns p-value for shuffle significance test.
|
|
735
|
+
|
|
736
|
+
For residual-based test statistics only the residuals are shuffled.
|
|
737
|
+
|
|
738
|
+
Parameters
|
|
739
|
+
----------
|
|
740
|
+
array : array-like
|
|
741
|
+
data array with X, Y, Z in rows and observations in columns
|
|
742
|
+
|
|
743
|
+
xyz : array of ints
|
|
744
|
+
XYZ identifier array of shape (dim,).
|
|
745
|
+
|
|
746
|
+
value : number
|
|
747
|
+
Value of test statistic for unshuffled estimate.
|
|
748
|
+
|
|
749
|
+
Returns
|
|
750
|
+
-------
|
|
751
|
+
pval : float
|
|
752
|
+
p-value
|
|
753
|
+
"""
|
|
754
|
+
|
|
755
|
+
x_vals = self._get_single_residuals(array, target_var=0)
|
|
756
|
+
y_vals = self._get_single_residuals(array, target_var=1)
|
|
757
|
+
array_resid = np.array([x_vals, y_vals])
|
|
758
|
+
xyz_resid = np.array([0, 1])
|
|
759
|
+
|
|
760
|
+
null_dist = self._get_shuffle_dist(array_resid, xyz_resid,
|
|
761
|
+
self.get_dependence_measure,
|
|
762
|
+
sig_samples=self.sig_samples,
|
|
763
|
+
sig_blocklength=self.sig_blocklength,
|
|
764
|
+
verbosity=self.verbosity)
|
|
765
|
+
|
|
766
|
+
# pval = (null_dist >= value).mean()
|
|
767
|
+
pval = float(np.sum(null_dist >= value) + 1) / (self.sig_samples + 1)
|
|
768
|
+
|
|
769
|
+
if return_null_dist:
|
|
770
|
+
return pval, null_dist
|
|
771
|
+
return pval
|
|
772
|
+
|
|
773
|
+
def get_analytic_significance(self, value, T, dim, xyz):
|
|
774
|
+
"""Returns p-value for the distance correlation coefficient.
|
|
775
|
+
|
|
776
|
+
The null distribution for necessary degrees of freedom (df) is loaded.
|
|
777
|
+
If not available, the null distribution is generated with the function
|
|
778
|
+
generate_nulldist(). It is recommended to generate the nulldists for a
|
|
779
|
+
wide range of sample sizes beforehand with the function
|
|
780
|
+
generate_and_save_nulldists(...). The distance correlation coefficient
|
|
781
|
+
is one-sided. If the degrees of freedom are less than 1, numpy.nan is
|
|
782
|
+
returned.
|
|
783
|
+
|
|
784
|
+
Parameters
|
|
785
|
+
----------
|
|
786
|
+
value : float
|
|
787
|
+
Test statistic value.
|
|
788
|
+
|
|
789
|
+
T : int
|
|
790
|
+
Sample length
|
|
791
|
+
|
|
792
|
+
dim : int
|
|
793
|
+
Dimensionality, ie, number of features.
|
|
794
|
+
|
|
795
|
+
xyz : array of ints
|
|
796
|
+
XYZ identifier array of shape (dim,).
|
|
797
|
+
|
|
798
|
+
Returns
|
|
799
|
+
-------
|
|
800
|
+
pval : float or numpy.nan
|
|
801
|
+
p-value.
|
|
802
|
+
"""
|
|
803
|
+
|
|
804
|
+
# GP regression approximately doesn't cost degrees of freedom
|
|
805
|
+
df = T
|
|
806
|
+
|
|
807
|
+
if df < 1:
|
|
808
|
+
pval = np.nan
|
|
809
|
+
else:
|
|
810
|
+
# idx_near = (np.abs(self.sample_sizes - df)).argmin()
|
|
811
|
+
if int(df) not in list(self.gauss_pr.null_dists):
|
|
812
|
+
# if np.abs(self.sample_sizes[idx_near] - df) / float(df) > 0.01:
|
|
813
|
+
if self.verbosity > 0:
|
|
814
|
+
print("Null distribution for GPDC not available "
|
|
815
|
+
"for deg. of freed. = %d." % df)
|
|
816
|
+
self.generate_nulldist(df)
|
|
817
|
+
null_dist_here = self.gauss_pr.null_dists[int(df)]
|
|
818
|
+
pval = np.mean(null_dist_here > np.abs(value))
|
|
819
|
+
return pval
|
|
820
|
+
|