synthyverse 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ from .benchmark import TabularBenchmark
@@ -0,0 +1,88 @@
1
+ from sklearn.model_selection import train_test_split
2
+ from ..evaluation.eval import MetricEvaluator
3
+ from ..utils.utils import get_generator, free_up_memory
4
+ from ..utils.reproducibility import set_seed
5
+ import pandas as pd
6
+ from time import time
7
+
8
+
9
+ class TabularBenchmark:
10
+ def __init__(
11
+ self,
12
+ generator_name: str = "arf",
13
+ generator_params: dict = {},
14
+ n_random_splits: int = 1,
15
+ n_inits: int = 1,
16
+ n_generated_datasets: int = 1,
17
+ metrics: list = ["classifier_test", "mle", "dcr"],
18
+ test_size: float = 0.3,
19
+ ):
20
+
21
+ self.generator_name = generator_name
22
+ self.generator_params = generator_params
23
+ self.n_random_splits = n_random_splits
24
+ self.n_inits = n_inits
25
+ self.n_generated_datasets = n_generated_datasets
26
+ self.metrics = metrics
27
+ self.test_size = test_size
28
+
29
+ def run(self, X: pd.DataFrame, target_column: str, discrete_columns: list):
30
+
31
+ results = {}
32
+ generator_ = get_generator(self.generator_name)
33
+ for split_i in range(self.n_random_splits):
34
+ results[f"split_{split_i}"] = {}
35
+
36
+ # split data according to current seed
37
+ stratify = None
38
+ if target_column in discrete_columns:
39
+ stratify = X[target_column]
40
+ X_train, X_test = train_test_split(
41
+ X, stratify=stratify, test_size=self.test_size, random_state=split_i
42
+ )
43
+ X_train, X_test = X_train.reset_index(drop=True), X_test.reset_index(
44
+ drop=True
45
+ )
46
+
47
+ for init_i in range(self.n_inits):
48
+ results[f"split_{split_i}"][f"init_{init_i}"] = {}
49
+ set_seed(init_i)
50
+ generator = generator_(random_state=init_i, **self.generator_params)
51
+ start_time = time()
52
+ generator.fit(X_train, discrete_columns)
53
+ results[f"split_{split_i}"][f"init_{init_i}"]["training_time"] = (
54
+ time() - start_time
55
+ )
56
+
57
+ # potentially generate multiple datasets
58
+ for generated_dataset_i in range(self.n_generated_datasets):
59
+ results[f"split_{split_i}"][f"init_{init_i}"][
60
+ f"generated_dataset_{generated_dataset_i}"
61
+ ] = {}
62
+ start_time = time()
63
+ X_syn = generator.generate(len(X))
64
+ results[f"split_{split_i}"][f"init_{init_i}"][
65
+ f"generated_dataset_{generated_dataset_i}"
66
+ ] = {}
67
+ results[f"split_{split_i}"][f"init_{init_i}"][
68
+ f"generated_dataset_{generated_dataset_i}"
69
+ ]["inference_time"] = (time() - start_time)
70
+ start_time = time()
71
+ evaluator = MetricEvaluator(
72
+ metrics=self.metrics,
73
+ discrete_features=discrete_columns,
74
+ target_column=target_column,
75
+ random_state=init_i,
76
+ )
77
+ metric_results = evaluator.evaluate(X_train, X_test, X_syn)
78
+ results[f"split_{split_i}"][f"init_{init_i}"][
79
+ f"generated_dataset_{generated_dataset_i}"
80
+ ]["evaluation_time"] = (time() - start_time)
81
+ results[f"split_{split_i}"][f"init_{init_i}"][
82
+ f"generated_dataset_{generated_dataset_i}"
83
+ ].update(metric_results)
84
+
85
+ # free up memory for next iteration
86
+ free_up_memory()
87
+
88
+ return results
@@ -0,0 +1 @@
1
+ from .eval import MetricEvaluator
@@ -0,0 +1,120 @@
1
+ from typing import Union
2
+ import pandas as pd
3
+ from ..utils.preprocessing import scale
4
+
5
+ from .fidelity import (
6
+ ClassifierTest,
7
+ AlphaPrecisionBetaRecallAuthenticity,
8
+ Similarity,
9
+ )
10
+
11
+ from .utility import MLE
12
+
13
+ from .privacy import DCR
14
+
15
+ METRICS = {
16
+ "classifier_test": ClassifierTest,
17
+ "mle": MLE,
18
+ "dcr": DCR,
19
+ "similarity": Similarity,
20
+ "prauth": AlphaPrecisionBetaRecallAuthenticity,
21
+ }
22
+
23
+
24
+ class MetricEvaluator:
25
+
26
+ def __init__(
27
+ self,
28
+ metrics: Union[dict, list],
29
+ discrete_features: list = [],
30
+ target_column: str = "target",
31
+ random_state: int = 0,
32
+ ):
33
+
34
+ if isinstance(metrics, list):
35
+ self.metrics = {metric: {} for metric in metrics}
36
+ else:
37
+ self.metrics = metrics
38
+ self.discrete_features = discrete_features
39
+ self.target_column = target_column
40
+ self.random_state = random_state
41
+
42
+ def evaluate(
43
+ self, X_train: pd.DataFrame, X_test: pd.DataFrame, X_syn: pd.DataFrame
44
+ ):
45
+ X_train, X_test, X_syn = (
46
+ X_train.reset_index(drop=True),
47
+ X_test.reset_index(drop=True),
48
+ X_syn.reset_index(drop=True),
49
+ )
50
+
51
+ # ensure that we do not evaluate a larger real dataset than synthetic
52
+ X_train, X_test = X_train[: len(X_syn)], X_test[: len(X_syn)]
53
+
54
+ # one hot, label encode, standard scale
55
+ X_tr_scaled, X_te_scaled, X_syn_scaled = scale(
56
+ X_train,
57
+ X_test,
58
+ X_syn,
59
+ discrete_features=self.discrete_features,
60
+ )
61
+
62
+ dict_ = {}
63
+ for metric__ in self.metrics.keys():
64
+ metric_ = metric__.split("-")[0].strip().lower()
65
+ metric_cls = METRICS[metric_]
66
+ # Use class properties to determine which additional information needs to be passed to the metric
67
+ if hasattr(metric_cls, "needs_discrete_features") and getattr(
68
+ metric_cls, "needs_discrete_features", False
69
+ ):
70
+ self.metrics[metric__]["discrete_features"] = self.discrete_features
71
+ if hasattr(metric_cls, "needs_target_column") and getattr(
72
+ metric_cls, "needs_target_column", False
73
+ ):
74
+ self.metrics[metric__]["target_column"] = self.target_column
75
+ if hasattr(metric_cls, "needs_random_state") and getattr(
76
+ metric_cls, "needs_random_state", False
77
+ ):
78
+ self.metrics[metric__]["random_state"] = self.random_state
79
+
80
+ metric = metric_cls(**self.metrics[metric__])
81
+ # Use class property to determine which data to pass
82
+ data_req = getattr(metric_cls, "data_requirement", None)
83
+ if data_req == "test":
84
+ metric_result = metric.evaluate(
85
+ X_test,
86
+ X_syn[-len(X_test) :],
87
+ )
88
+ elif data_req == "train":
89
+ metric_result = metric.evaluate(
90
+ X_train,
91
+ X_syn[: len(X_train)],
92
+ )
93
+ elif data_req == "test_preprocessed":
94
+ metric_result = metric.evaluate(
95
+ X_te_scaled,
96
+ X_syn_scaled[-len(X_test) :],
97
+ )
98
+ elif data_req == "train_preprocessed":
99
+ metric_result = metric.evaluate(
100
+ X_tr_scaled,
101
+ X_syn_scaled[: len(X_train)],
102
+ )
103
+ elif data_req == "train_and_test":
104
+ metric_result = metric.evaluate(X_train, X_test, X_syn)
105
+ elif data_req == "train_and_test_preprocessed":
106
+ metric_result = metric.evaluate(
107
+ X_tr_scaled,
108
+ X_te_scaled,
109
+ X_syn_scaled,
110
+ )
111
+ else:
112
+ raise Exception(
113
+ f"Metric {metric_} not (fully) implemented or missing data_requirement property"
114
+ )
115
+
116
+ # add result to dict (note that quantitative metrics have to output a dict, else they won't get added here)
117
+ if type(metric_result) == dict:
118
+ dict_.update(metric_result)
119
+
120
+ return dict_
@@ -0,0 +1,230 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.metrics import roc_auc_score
4
+ from xgboost import XGBClassifier
5
+ from ..utils.xgb_utils import get_xgb_tree_method
6
+ from ..utils.oneclass import OneClassLayer
7
+ from ..utils.utils import suppress_print
8
+ import torch
9
+ from sklearn.neighbors import NearestNeighbors
10
+ from sdmetrics.reports.single_table import QualityReport
11
+
12
+
13
+ class ClassifierTest:
14
+ """
15
+ AUC score of XGB classifier which aims to distinguish synthetic from real data.
16
+ """
17
+
18
+ data_requirement = "train_and_test"
19
+ needs_discrete_features = True
20
+ needs_random_state = True
21
+
22
+ def __init__(
23
+ self,
24
+ discrete_features: list = [],
25
+ random_state: int = 0,
26
+ ):
27
+ super().__init__()
28
+ self.random_state = random_state
29
+ self.discrete_features = discrete_features
30
+
31
+ def evaluate(
32
+ self,
33
+ train: pd.DataFrame,
34
+ test: pd.DataFrame,
35
+ sd: pd.DataFrame,
36
+ ):
37
+
38
+ numerical_features = [
39
+ col for col in train.columns if col not in self.discrete_features
40
+ ]
41
+
42
+ X = pd.concat((train, sd[: len(train)]))
43
+ X = X.reset_index(drop=True)
44
+ X[numerical_features] = X[numerical_features].astype(float)
45
+ X[self.discrete_features] = X[self.discrete_features].astype("category")
46
+ y = pd.concat(
47
+ (
48
+ pd.Series(0, index=list(range(len(train))), name="y"),
49
+ pd.Series(1, index=list(range(len(train))), name="y"),
50
+ )
51
+ )
52
+ y = y.reset_index(drop=True)
53
+
54
+ model = XGBClassifier(
55
+ tree_method=get_xgb_tree_method(),
56
+ enable_categorical=True,
57
+ random_state=self.random_state,
58
+ max_depth=3,
59
+ )
60
+
61
+ model.fit(X, y)
62
+
63
+ X_te = pd.concat((test, sd[-len(test) :]))
64
+ X_te[numerical_features] = X_te[numerical_features].astype(float)
65
+ X_te[self.discrete_features] = X_te[self.discrete_features].astype("category")
66
+ y_te = pd.concat(
67
+ (
68
+ pd.Series(0, index=list(range(len(test))), name="y"),
69
+ pd.Series(1, index=list(range(len(test))), name="y"),
70
+ )
71
+ )
72
+ y_te = y_te.reset_index(drop=True)
73
+
74
+ preds = model.predict_proba(X_te)
75
+ score = roc_auc_score(y_te, preds[:, 1])
76
+
77
+ return {f"classifiertest.auc": float(score)}
78
+
79
+
80
+ class AlphaPrecisionBetaRecallAuthenticity:
81
+ """
82
+ alpha-Precision, Beta-Recall, Authenticity score from the Alaa et al. paper.
83
+ """
84
+
85
+ data_requirement = "train_preprocessed"
86
+ needs_random_state = True
87
+
88
+ def __init__(
89
+ self,
90
+ discrete_features: list = [],
91
+ random_state: int = 0,
92
+ ):
93
+ super().__init__()
94
+ self.random_state = random_state
95
+ self.discrete_features = discrete_features
96
+
97
+ def evaluate(
98
+ self,
99
+ rd: pd.DataFrame,
100
+ sd: pd.DataFrame,
101
+ ):
102
+
103
+ OC_params = {
104
+ "input_dim": rd.shape[1],
105
+ "rep_dim": rd.shape[1],
106
+ "num_layers": 4,
107
+ "num_hidden": 32,
108
+ "activation": "ReLU",
109
+ "dropout_prob": 0.2,
110
+ "dropout_active": False,
111
+ "LossFn": "SoftBoundary",
112
+ "lr": 2e-3,
113
+ "epochs": 1000,
114
+ "warm_up_epochs": 20,
115
+ "train_prop": 1.0,
116
+ "weight_decay": 2e-3,
117
+ }
118
+ OC_hyperparams = {"Radius": 1, "nu": 1e-2}
119
+ OC_hyperparams["center"] = (
120
+ torch.ones(OC_params["rep_dim"]) * 10
121
+ ) # *10 is what is used in synthcity
122
+ OC_model = OneClassLayer(params=OC_params, hyperparams=OC_hyperparams)
123
+ OC_model.fit(rd.values, verbosity=True)
124
+ real = OC_model.predict(rd.values)
125
+ syn = OC_model.predict(sd.values)
126
+ emb_center = OC_model.c.detach().cpu().numpy()
127
+
128
+ n_steps = 30
129
+ alphas = np.linspace(0, 1, n_steps)
130
+
131
+ Radii = np.quantile(np.sqrt(np.sum((real - emb_center) ** 2, axis=1)), alphas)
132
+
133
+ synth_center = np.mean(syn, axis=0)
134
+
135
+ alpha_precision_curve = []
136
+ beta_coverage_curve = []
137
+
138
+ synth_to_center = np.sqrt(np.sum((syn - emb_center) ** 2, axis=1))
139
+
140
+ nbrs_real = NearestNeighbors(n_neighbors=2, n_jobs=-1, p=2).fit(real)
141
+ real_to_real, _ = nbrs_real.kneighbors(real)
142
+
143
+ nbrs_synth = NearestNeighbors(n_neighbors=1, n_jobs=-1, p=2).fit(syn)
144
+ real_to_synth, real_to_synth_args = nbrs_synth.kneighbors(real)
145
+
146
+ real_to_real = real_to_real[:, 1].squeeze()
147
+ real_to_synth = real_to_synth.squeeze()
148
+ real_to_synth_args = real_to_synth_args.squeeze()
149
+
150
+ real_synth_closest = syn[real_to_synth_args]
151
+
152
+ real_synth_closest_d = np.sqrt(
153
+ np.sum((real_synth_closest - synth_center) ** 2, axis=1)
154
+ )
155
+ closest_synth_Radii = np.quantile(real_synth_closest_d, alphas)
156
+
157
+ for k in range(len(Radii)):
158
+ precision_audit_mask = synth_to_center <= Radii[k]
159
+ alpha_precision = np.mean(precision_audit_mask)
160
+
161
+ beta_coverage = np.mean(
162
+ (
163
+ (real_to_synth <= real_to_real)
164
+ * (real_synth_closest_d <= closest_synth_Radii[k])
165
+ )
166
+ )
167
+
168
+ alpha_precision_curve.append(alpha_precision)
169
+ beta_coverage_curve.append(beta_coverage)
170
+
171
+ authen = real_to_real[real_to_synth_args] < real_to_synth
172
+ authenticity = np.mean(authen)
173
+
174
+ Delta_precision_alpha = 1 - np.sum(
175
+ np.abs(np.array(alphas) - np.array(alpha_precision_curve))
176
+ ) / np.sum(alphas)
177
+
178
+ Delta_coverage_beta = 1 - np.sum(
179
+ np.abs(np.array(alphas) - np.array(beta_coverage_curve))
180
+ ) / np.sum(alphas)
181
+
182
+ return {
183
+ "alphaprecision.oc.score": float(Delta_precision_alpha),
184
+ "betacoverage.oc.score": float(Delta_coverage_beta),
185
+ "authenticity.oc.score": float(authenticity),
186
+ }
187
+
188
+
189
+ class Similarity:
190
+ """
191
+ Column Shapes and Column Pair Trends from the SDMetrics library.
192
+ Indicates quality of marginal distributions and correlations in synthetic data, respectively.
193
+ """
194
+
195
+ data_requirement = "train"
196
+ needs_discrete_features = True
197
+
198
+ def __init__(
199
+ self,
200
+ discrete_features: list = [],
201
+ ):
202
+ super().__init__()
203
+ self.discrete_features = discrete_features
204
+
205
+ @suppress_print
206
+ def evaluate(
207
+ self,
208
+ rd: pd.DataFrame,
209
+ sd: pd.DataFrame,
210
+ ):
211
+ dtypes = [
212
+ "categorical" if x in self.discrete_features else "numerical"
213
+ for x in rd.columns
214
+ ]
215
+ metadata = {k: {"sdtype": v} for k, v in zip(rd.columns, dtypes)}
216
+ metadata = {"columns": metadata}
217
+ metadata["primary_key"] = "index"
218
+
219
+ report = QualityReport()
220
+ report.generate(rd, sd, metadata)
221
+ scores = report.get_properties()
222
+
223
+ return {
224
+ "similarity.shape": float(
225
+ scores.loc[scores["Property"] == "Column Shapes", "Score"]
226
+ ),
227
+ "similarity.trend": float(
228
+ scores.loc[scores["Property"] == "Column Pair Trends", "Score"]
229
+ ),
230
+ }
@@ -0,0 +1,88 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.metrics import pairwise_distances_argmin_min
4
+
5
+
6
+ class DCR:
7
+ """
8
+ Distance to Closest Record scores.
9
+ Indicates closeness of synthetic data to the training data, and an independent holdout set.
10
+ """
11
+
12
+ data_requirement = "train_and_test_preprocessed"
13
+
14
+ def __init__(
15
+ self,
16
+ estimates: list = [
17
+ "mean",
18
+ 0.01,
19
+ 0.05,
20
+ 0.1,
21
+ 0.25,
22
+ 0.5,
23
+ ],
24
+ batch_size: int = 16000,
25
+ ):
26
+ super().__init__()
27
+ self.estimates = estimates
28
+ self.batch_size = batch_size
29
+
30
+ def _compute_min_distances_batch(
31
+ self, query_data: pd.DataFrame, reference_data: pd.DataFrame
32
+ ) -> np.ndarray:
33
+ """
34
+ Compute minimum distances between query_data and reference_data in batches.
35
+
36
+ Args:
37
+ query_data: DataFrame containing query points
38
+ reference_data: DataFrame containing reference points
39
+
40
+ Returns:
41
+ Array of minimum distances for each query point
42
+ """
43
+ if self.batch_size is None:
44
+ # Use original method for small datasets or when batch_size is not specified
45
+ _, min_distances = pairwise_distances_argmin_min(
46
+ query_data, reference_data, metric="euclidean"
47
+ )
48
+ return min_distances
49
+
50
+ min_distances = []
51
+ n_query = len(query_data)
52
+
53
+ for i in range(0, n_query, self.batch_size):
54
+ end_idx = min(i + self.batch_size, n_query)
55
+ batch_query = query_data.iloc[i:end_idx]
56
+
57
+ _, batch_min_distances = pairwise_distances_argmin_min(
58
+ batch_query, reference_data, metric="euclidean"
59
+ )
60
+ min_distances.extend(batch_min_distances)
61
+
62
+ return np.array(min_distances)
63
+
64
+ def evaluate(self, train: pd.DataFrame, test: pd.DataFrame, sd: pd.DataFrame):
65
+
66
+ sd = sd[: len(train)]
67
+
68
+ # Use batch processing if batch_size is specified
69
+ min_distances_syn = self._compute_min_distances_batch(sd, train)
70
+ min_distances_test = self._compute_min_distances_batch(test, train)
71
+
72
+ dictionary = {}
73
+
74
+ for estimate in self.estimates:
75
+ if estimate == "mean":
76
+ score_train = min_distances_syn.mean()
77
+ score_test = min_distances_test.mean()
78
+ else:
79
+ score_train = np.quantile(min_distances_syn, estimate)
80
+ score_test = np.quantile(min_distances_test, estimate)
81
+ dictionary.update(
82
+ {
83
+ f"dcr.train.{estimate}": float(score_train),
84
+ f"dcr.test.{estimate}": float(score_test),
85
+ }
86
+ )
87
+
88
+ return dictionary
@@ -0,0 +1,115 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.metrics import roc_auc_score, r2_score
4
+ from xgboost import XGBClassifier, XGBRegressor
5
+ from sklearn.preprocessing import LabelEncoder
6
+ from ..utils.xgb_utils import get_xgb_tree_method
7
+
8
+
9
+ class MLE:
10
+ """
11
+ Machine Learning Efficacy from a XGB classifier.
12
+ AUC score for discrete target columns, R^2 score for continuous target columns.
13
+ """
14
+
15
+ data_requirement = "train_and_test"
16
+ needs_discrete_features = True
17
+ needs_target_column = True
18
+ needs_random_state = True
19
+
20
+ def __init__(
21
+ self,
22
+ target_column: str = "target",
23
+ discrete_features: list = [],
24
+ random_state: int = 0,
25
+ train_set: str = "synthetic",
26
+ ):
27
+ super().__init__()
28
+ self.random_state = random_state
29
+ self.discrete_features = discrete_features
30
+ self.target_column = target_column
31
+ self.train_set = train_set
32
+
33
+ def evaluate(
34
+ self,
35
+ train: pd.DataFrame,
36
+ test: pd.DataFrame,
37
+ sd: pd.DataFrame,
38
+ ):
39
+
40
+ y_tr = train[self.target_column]
41
+ y_te = test[self.target_column]
42
+ y_s = sd[self.target_column]
43
+ x_tr = train.drop(columns=[self.target_column])
44
+ x_te = test.drop(columns=[self.target_column])
45
+ x_s = sd.drop(columns=[self.target_column])
46
+
47
+ numerical_features = [
48
+ col for col in train.columns if col not in self.discrete_features
49
+ ]
50
+ discrete_features = [
51
+ col for col in self.discrete_features if col != self.target_column
52
+ ]
53
+
54
+ x_tr[numerical_features], x_te[numerical_features], x_s[numerical_features] = (
55
+ x_tr[numerical_features].astype(float),
56
+ x_te[numerical_features].astype(float),
57
+ x_s[numerical_features].astype(float),
58
+ )
59
+ x_tr[discrete_features], x_te[discrete_features], x_s[discrete_features] = (
60
+ x_tr[discrete_features].astype("category"),
61
+ x_te[discrete_features].astype("category"),
62
+ x_s[discrete_features].astype("category"),
63
+ )
64
+
65
+ if self.target_column in self.discrete_features:
66
+ le = LabelEncoder()
67
+ le.fit(pd.concat((y_tr, y_te, y_s)))
68
+ y_tr = le.transform(y_tr)
69
+ y_te = le.transform(y_te)
70
+ y_s = le.transform(y_s)
71
+ model = XGBClassifier(
72
+ tree_method=get_xgb_tree_method(),
73
+ enable_categorical=True,
74
+ random_state=self.random_state,
75
+ max_depth=3,
76
+ )
77
+ else:
78
+ model = XGBRegressor(
79
+ tree_method=get_xgb_tree_method(),
80
+ enable_categorical=True,
81
+ random_state=self.random_state,
82
+ max_depth=3,
83
+ )
84
+
85
+ if self.train_set == "synthetic":
86
+ model.fit(x_s[: len(x_tr)], y_s[: len(x_tr)])
87
+ score = self._get_score(y_te, x_te, model)
88
+ else:
89
+ model.fit(x_tr, y_tr)
90
+ score = self._get_score(y_s[-len(x_te) :], x_s[-len(x_te) :], model)
91
+
92
+ # also add trtr score
93
+ model.fit(x_tr, y_tr)
94
+ score_trtr = self._get_score(y_te, x_te, model)
95
+
96
+ return {
97
+ f"mle.train-{self.train_set}-test-{'real' if self.train_set == 'synthetic' else 'synthetic'}.{'auc' if self.target_column in self.discrete_features else 'r2'}": float(
98
+ score
99
+ ),
100
+ f"mle.train-real-test-real.{'auc' if self.target_column in self.discrete_features else 'r2'}": float(
101
+ score_trtr
102
+ ),
103
+ }
104
+
105
+ def _get_score(self, y_te, X_te, model):
106
+ if self.target_column in self.discrete_features:
107
+ preds = model.predict_proba(X_te)
108
+ if np.unique(y_te).shape[0] > 2:
109
+ score = roc_auc_score(y_te, preds, multi_class="ovr", average="micro")
110
+ else:
111
+ score = roc_auc_score(y_te, preds[:, 1])
112
+ else:
113
+ preds = model.predict(X_te)
114
+ score = r2_score(y_te, preds)
115
+ return score
@@ -0,0 +1,23 @@
1
+ from .base import BaseGenerator
2
+
3
+ try:
4
+ from .arf_generator import ARFGenerator
5
+ except ImportError:
6
+ ARFGenerator = None
7
+
8
+ try:
9
+ from .bn_generator import BNGenerator
10
+ except ImportError:
11
+ BNGenerator = None
12
+
13
+ try:
14
+ from .ctgan_generator import CTGANGenerator
15
+ except ImportError:
16
+ CTGANGenerator = None
17
+
18
+ try:
19
+ from .tvae_generator import TVAEGenerator
20
+ except ImportError:
21
+ TVAEGenerator = None
22
+
23
+ all_generators = [ARFGenerator, BNGenerator, CTGANGenerator, TVAEGenerator]
@@ -0,0 +1 @@
1
+ from .arf import ARFGenerator