synthyverse 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {synthyverse-0.2.0/synthyverse.egg-info → synthyverse-0.2.1}/PKG-INFO +13 -13
- {synthyverse-0.2.0 → synthyverse-0.2.1}/setup.py +1 -1
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/privacy.py +107 -83
- {synthyverse-0.2.0 → synthyverse-0.2.1/synthyverse.egg-info}/PKG-INFO +13 -13
- {synthyverse-0.2.0 → synthyverse-0.2.1}/LICENSE +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/MANIFEST.in +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/README.md +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/logo/logo.png +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/pyproject.toml +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/dev/docs.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/evaluation/eval.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/arf.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/base.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/bn.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/cdtd.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/ctgan.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/smote.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/tabargn.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/tabddpm.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/tabsyn.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/tvae.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/univariate.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/scripts/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/scripts/setup_utils.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/scripts/update_templates.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/setup.cfg +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/benchmark/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/benchmark/synthesis.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/eval.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/fidelity.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/ml.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/preprocessing.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/utility.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/arf_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/arf_generator/arf.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/base.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/bn_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/bn_generator/bn.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/cdtd_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/cdtd_generator/cdtd.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/cdtd_generator/layers.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/cdtd_generator/utils.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/ctgan_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/ctgan_generator/ct_gan.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/persistence.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/smote_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/smote_generator/smote.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabargn_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabargn_generator/tabargn.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabddpm_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabddpm_generator/model.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabddpm_generator/plugin.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabddpm_generator/tabddpm.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabsyn_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabsyn_generator/diffusion.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabsyn_generator/tabsyn.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabsyn_generator/vae.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tvae_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tvae_generator/tvae.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/univariate_generator/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/univariate_generator/univariate.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/utils/__init__.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/utils/reproducibility.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/utils/utils.py +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse.egg-info/SOURCES.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse.egg-info/dependency_links.txt +0 -0
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse.egg-info/requires.txt +12 -12
- {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: synthyverse
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Synthetic data generation and evaluation library
|
|
5
5
|
Home-page: https://github.com/synthyverse/synthyverse
|
|
6
6
|
Author: Jim Achterberg, Saif Ul Islam, Zia Ur Rehman
|
|
@@ -106,24 +106,24 @@ Requires-Dist: xgboost; extra == "eval"
|
|
|
106
106
|
Requires-Dist: optuna; extra == "eval"
|
|
107
107
|
Requires-Dist: seaborn; extra == "eval"
|
|
108
108
|
Provides-Extra: full
|
|
109
|
-
Requires-Dist: ctgan==0.10.0; extra == "full"
|
|
110
109
|
Requires-Dist: imbalanced-learn; extra == "full"
|
|
111
|
-
Requires-Dist: pandas; extra == "full"
|
|
112
110
|
Requires-Dist: pykeops; extra == "full"
|
|
113
|
-
Requires-Dist:
|
|
114
|
-
Requires-Dist: scikit-learn; extra == "full"
|
|
115
|
-
Requires-Dist: optuna; extra == "full"
|
|
116
|
-
Requires-Dist: einops; extra == "full"
|
|
117
|
-
Requires-Dist: seaborn; extra == "full"
|
|
118
|
-
Requires-Dist: numpy; extra == "full"
|
|
119
|
-
Requires-Dist: scipy; extra == "full"
|
|
111
|
+
Requires-Dist: geomloss; extra == "full"
|
|
120
112
|
Requires-Dist: tqdm; extra == "full"
|
|
121
|
-
Requires-Dist:
|
|
113
|
+
Requires-Dist: seaborn; extra == "full"
|
|
114
|
+
Requires-Dist: torch-ema; extra == "full"
|
|
115
|
+
Requires-Dist: optuna; extra == "full"
|
|
122
116
|
Requires-Dist: arfpy; extra == "full"
|
|
117
|
+
Requires-Dist: pandas; extra == "full"
|
|
123
118
|
Requires-Dist: mostlyai-engine; extra == "full"
|
|
124
|
-
Requires-Dist:
|
|
125
|
-
Requires-Dist: geomloss; extra == "full"
|
|
119
|
+
Requires-Dist: opacus==1.5.3; extra == "full"
|
|
126
120
|
Requires-Dist: xgboost; extra == "full"
|
|
121
|
+
Requires-Dist: scipy; extra == "full"
|
|
122
|
+
Requires-Dist: einops; extra == "full"
|
|
123
|
+
Requires-Dist: ctgan==0.10.0; extra == "full"
|
|
124
|
+
Requires-Dist: numpy; extra == "full"
|
|
125
|
+
Requires-Dist: torch; extra == "full"
|
|
126
|
+
Requires-Dist: scikit-learn; extra == "full"
|
|
127
127
|
Requires-Dist: synthcity==0.2.12; extra == "full"
|
|
128
128
|
|
|
129
129
|
<table align="center" border="0">
|
|
@@ -13,6 +13,7 @@ from sklearn.metrics import (
|
|
|
13
13
|
r2_score,
|
|
14
14
|
root_mean_squared_error,
|
|
15
15
|
roc_auc_score,
|
|
16
|
+
roc_curve,
|
|
16
17
|
)
|
|
17
18
|
from sklearn.neighbors import KDTree, NearestNeighbors
|
|
18
19
|
from scipy.stats import gaussian_kde, rankdata
|
|
@@ -54,6 +55,32 @@ def lift_at_k(y_true, y_score, k=0.1):
|
|
|
54
55
|
return float(precision_at_k / prevalence)
|
|
55
56
|
|
|
56
57
|
|
|
58
|
+
def tpr_at_fpr(y_true, y_score, max_fpr=0.1):
|
|
59
|
+
"""
|
|
60
|
+
Return the highest true positive rate achievable at or below max_fpr.
|
|
61
|
+
|
|
62
|
+
This is a thresholded attack metric: it measures member recall while
|
|
63
|
+
constraining the fraction of non-members incorrectly flagged as members.
|
|
64
|
+
"""
|
|
65
|
+
if not 0 <= max_fpr <= 1:
|
|
66
|
+
raise ValueError("max_fpr must be in the interval [0, 1].")
|
|
67
|
+
|
|
68
|
+
y_true = np.asarray(y_true, dtype=int)
|
|
69
|
+
y_score = np.nan_to_num(np.asarray(y_score, dtype=float))
|
|
70
|
+
if len(y_true) != len(y_score):
|
|
71
|
+
raise ValueError("y_true and y_score must have the same length.")
|
|
72
|
+
if len(y_true) == 0:
|
|
73
|
+
return 0.0
|
|
74
|
+
if not np.any(y_true == 1) or not np.any(y_true == 0):
|
|
75
|
+
return 0.0
|
|
76
|
+
|
|
77
|
+
fpr, tpr, _ = roc_curve(y_true, y_score)
|
|
78
|
+
valid = fpr <= max_fpr
|
|
79
|
+
if not np.any(valid):
|
|
80
|
+
return 0.0
|
|
81
|
+
return float(np.max(tpr[valid]))
|
|
82
|
+
|
|
83
|
+
|
|
57
84
|
class DCR:
|
|
58
85
|
"""Distance to Closest Record (DCR) privacy metrics.
|
|
59
86
|
|
|
@@ -114,17 +141,17 @@ class DCR:
|
|
|
114
141
|
- "dcr.score": DCR score such that higher scores indicate better privacy
|
|
115
142
|
- "dcr.train": Proportion closer to train
|
|
116
143
|
- "dcr.test": Proportion closer to test
|
|
117
|
-
- "dcr.quantile_002": Proportion closer to train than the 2% train
|
|
118
|
-
- "dcr.quantile_005": Proportion closer to train than the 5% train
|
|
144
|
+
- "dcr.quantile_002": Proportion closer to train than the 2% test-to-train distance quantile
|
|
145
|
+
- "dcr.quantile_005": Proportion closer to train than the 5% test-to-train distance quantile
|
|
119
146
|
- "dcr.nndr_train": Mean NNDR score from synthetic records to train records
|
|
120
147
|
- "dcr.nndr_train_002": 2% NNDR quantile from synthetic records to train records
|
|
121
148
|
- "dcr.nndr_train_005": 5% NNDR quantile from synthetic records to train records
|
|
122
|
-
- "dcr.nndr_test": Mean NNDR
|
|
123
|
-
- "dcr.nndr_test_002": 2% NNDR
|
|
124
|
-
- "dcr.nndr_test_005": 5% NNDR
|
|
125
|
-
- "dcr.nndr_ratio": Mean pointwise ratio of each synthetic row's train NNDR score to
|
|
126
|
-
- "dcr.nndr_ratio_002": 2% quantile of pointwise
|
|
127
|
-
- "dcr.nndr_ratio_005": 5% quantile of pointwise
|
|
149
|
+
- "dcr.nndr_test": Mean NNDR score from synthetic records to test records
|
|
150
|
+
- "dcr.nndr_test_002": 2% NNDR quantile from synthetic records to test records
|
|
151
|
+
- "dcr.nndr_test_005": 5% NNDR quantile from synthetic records to test records
|
|
152
|
+
- "dcr.nndr_ratio": Mean pointwise ratio of each synthetic row's train NNDR score to its test NNDR score
|
|
153
|
+
- "dcr.nndr_ratio_002": 2% quantile of pointwise synthetic train/test NNDR ratios
|
|
154
|
+
- "dcr.nndr_ratio_005": 5% quantile of pointwise synthetic train/test NNDR ratios
|
|
128
155
|
|
|
129
156
|
Raises:
|
|
130
157
|
AssertionError: If test set is larger than train set.
|
|
@@ -136,37 +163,29 @@ class DCR:
|
|
|
136
163
|
train
|
|
137
164
|
), "Test set must be smaller than or equal to train size to compute DCR"
|
|
138
165
|
|
|
166
|
+
num_rows_subsample = len(test) if self.subsample_test_size else len(train)
|
|
167
|
+
if len(sd) < num_rows_subsample:
|
|
168
|
+
raise ValueError(
|
|
169
|
+
"Synthetic data must contain at least "
|
|
170
|
+
f"{num_rows_subsample} rows to compute DCR with the current "
|
|
171
|
+
"subsampling settings."
|
|
172
|
+
)
|
|
173
|
+
|
|
139
174
|
data = gower_like_transform(
|
|
140
|
-
{"train": train, "test": test, "syn": sd[: len(train)]},
|
|
175
|
+
{"train": train, "test": test, "syn": sd.iloc[: len(train)]},
|
|
141
176
|
reference_data=train,
|
|
142
177
|
discrete_features=self.discrete_features,
|
|
143
178
|
categorical_fit_data=[train, test, sd],
|
|
144
179
|
)
|
|
145
180
|
|
|
146
181
|
# Optionally subsample train and synthetic data to match the test size.
|
|
147
|
-
num_rows_subsample = (
|
|
148
|
-
len(data["test"]) if self.subsample_test_size else len(data["train"])
|
|
149
|
-
)
|
|
150
182
|
if len(data["train"]) < 2 or len(data["test"]) < 2 or num_rows_subsample < 2:
|
|
151
183
|
raise ValueError(
|
|
152
184
|
"DCR with NNDR requires at least two train rows and two test rows "
|
|
153
185
|
"after applying subsampling."
|
|
154
186
|
)
|
|
155
187
|
num_iterations = int(np.ceil(len(data["train"]) / num_rows_subsample))
|
|
156
|
-
|
|
157
|
-
closer_to_train = []
|
|
158
|
-
closer_to_test = []
|
|
159
|
-
dcr_002 = []
|
|
160
|
-
dcr_005 = []
|
|
161
|
-
nndr_train = []
|
|
162
|
-
nndr_train_002 = []
|
|
163
|
-
nndr_train_005 = []
|
|
164
|
-
nndr_test = []
|
|
165
|
-
nndr_test_002 = []
|
|
166
|
-
nndr_test_005 = []
|
|
167
|
-
nndr_ratio = []
|
|
168
|
-
nndr_ratio_002 = []
|
|
169
|
-
nndr_ratio_005 = []
|
|
188
|
+
metric_values = {}
|
|
170
189
|
|
|
171
190
|
rng = np.random.default_rng(self.random_state)
|
|
172
191
|
|
|
@@ -177,7 +196,6 @@ class DCR:
|
|
|
177
196
|
query: np.ndarray,
|
|
178
197
|
reference: np.ndarray,
|
|
179
198
|
n_neighbors: int = 1,
|
|
180
|
-
return_indices: bool = False,
|
|
181
199
|
):
|
|
182
200
|
if len(reference) < n_neighbors:
|
|
183
201
|
raise ValueError(
|
|
@@ -189,87 +207,78 @@ class DCR:
|
|
|
189
207
|
metric="cityblock",
|
|
190
208
|
n_jobs=-1,
|
|
191
209
|
).fit(reference)
|
|
192
|
-
distances,
|
|
210
|
+
distances, _ = nbrs.kneighbors(query)
|
|
193
211
|
if n_neighbors == 1:
|
|
194
212
|
distances = distances.ravel()
|
|
195
|
-
indices = indices.ravel()
|
|
196
|
-
if return_indices:
|
|
197
|
-
return distances, indices
|
|
198
213
|
return distances
|
|
199
214
|
|
|
200
|
-
def
|
|
215
|
+
def safe_ratio(numerator: np.ndarray, denominator: np.ndarray) -> np.ndarray:
|
|
201
216
|
return np.divide(
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
out=np.ones(len(
|
|
205
|
-
where=
|
|
217
|
+
numerator,
|
|
218
|
+
denominator,
|
|
219
|
+
out=np.ones(len(numerator), dtype=float),
|
|
220
|
+
where=denominator > 0,
|
|
206
221
|
)
|
|
207
222
|
|
|
223
|
+
def nearest_distance_ratio(distances: np.ndarray) -> np.ndarray:
|
|
224
|
+
return safe_ratio(distances[:, 0], distances[:, 1])
|
|
225
|
+
|
|
226
|
+
def distribution_metrics(prefix: str, values: np.ndarray) -> dict:
|
|
227
|
+
return {
|
|
228
|
+
prefix: np.mean(values),
|
|
229
|
+
f"{prefix}_002": np.quantile(values, 0.02),
|
|
230
|
+
f"{prefix}_005": np.quantile(values, 0.05),
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
def collect(metrics: dict):
|
|
234
|
+
for name, value in metrics.items():
|
|
235
|
+
metric_values.setdefault(name, []).append(value)
|
|
236
|
+
|
|
208
237
|
for _ in range(num_iterations):
|
|
209
238
|
syn_curr = choose(data["syn"], num_rows_subsample)
|
|
210
239
|
train_curr = choose(data["train"], num_rows_subsample)
|
|
211
240
|
test_curr = choose(data["test"], min(len(data["test"]), num_rows_subsample))
|
|
212
241
|
|
|
213
|
-
d_s_tr_neighbors
|
|
242
|
+
d_s_tr_neighbors = nearest_distances(
|
|
214
243
|
syn_curr,
|
|
215
244
|
train_curr,
|
|
216
245
|
n_neighbors=2,
|
|
217
|
-
return_indices=True,
|
|
218
246
|
)
|
|
219
247
|
d_s_tr = d_s_tr_neighbors[:, 0]
|
|
220
248
|
# align test set size to never exceed subsampled set size
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
249
|
+
d_s_te_neighbors = nearest_distances(syn_curr, test_curr, n_neighbors=2)
|
|
250
|
+
d_s_te = d_s_te_neighbors[:, 0]
|
|
251
|
+
d_te_tr = nearest_distances(test_curr, train_curr)
|
|
224
252
|
|
|
225
253
|
closer_to_train_ = np.mean(d_s_tr < d_s_te)
|
|
226
254
|
closer_to_test_ = 1 - closer_to_train_
|
|
227
|
-
closer_to_train.append(closer_to_train_)
|
|
228
|
-
closer_to_test.append(closer_to_test_)
|
|
229
|
-
score = min(1, closer_to_test_ * 2)
|
|
230
|
-
scores.append(score)
|
|
231
|
-
dcr_002.append(np.mean(d_s_tr < np.quantile(d_tr_te, 0.02)))
|
|
232
|
-
dcr_005.append(np.mean(d_s_tr < np.quantile(d_tr_te, 0.05)))
|
|
233
255
|
|
|
234
256
|
nndr_train_scores = nearest_distance_ratio(d_s_tr_neighbors)
|
|
235
|
-
nndr_test_scores = nearest_distance_ratio(
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
nndr_train_scores,
|
|
251
|
-
nndr_test_scores_matched,
|
|
252
|
-
out=np.ones(len(nndr_train_scores), dtype=float),
|
|
253
|
-
where=nndr_test_scores_matched > 0,
|
|
257
|
+
nndr_test_scores = nearest_distance_ratio(d_s_te_neighbors)
|
|
258
|
+
nndr_ratio_scores = safe_ratio(nndr_train_scores, nndr_test_scores)
|
|
259
|
+
|
|
260
|
+
iteration_metrics = {
|
|
261
|
+
"score": min(1, closer_to_test_ * 2),
|
|
262
|
+
"train": closer_to_train_,
|
|
263
|
+
"test": closer_to_test_,
|
|
264
|
+
"quantile_002": np.mean(d_s_tr < np.quantile(d_te_tr, 0.02)),
|
|
265
|
+
"quantile_005": np.mean(d_s_tr < np.quantile(d_te_tr, 0.05)),
|
|
266
|
+
}
|
|
267
|
+
iteration_metrics.update(
|
|
268
|
+
distribution_metrics("nndr_train", nndr_train_scores)
|
|
269
|
+
)
|
|
270
|
+
iteration_metrics.update(
|
|
271
|
+
distribution_metrics("nndr_test", nndr_test_scores)
|
|
254
272
|
)
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
273
|
+
iteration_metrics.update(
|
|
274
|
+
distribution_metrics("nndr_ratio", nndr_ratio_scores)
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
collect(iteration_metrics)
|
|
278
|
+
|
|
258
279
|
return {
|
|
259
|
-
f"{self.name}.
|
|
260
|
-
|
|
261
|
-
f"{self.name}.test": float(np.mean(closer_to_test)),
|
|
262
|
-
f"{self.name}.quantile_002": float(np.mean(dcr_002)),
|
|
263
|
-
f"{self.name}.quantile_005": float(np.mean(dcr_005)),
|
|
264
|
-
f"{self.name}.nndr_train": float(np.mean(nndr_train)),
|
|
265
|
-
f"{self.name}.nndr_train_002": float(np.mean(nndr_train_002)),
|
|
266
|
-
f"{self.name}.nndr_train_005": float(np.mean(nndr_train_005)),
|
|
267
|
-
f"{self.name}.nndr_test": float(np.mean(nndr_test)),
|
|
268
|
-
f"{self.name}.nndr_test_002": float(np.mean(nndr_test_002)),
|
|
269
|
-
f"{self.name}.nndr_test_005": float(np.mean(nndr_test_005)),
|
|
270
|
-
f"{self.name}.nndr_ratio": float(np.mean(nndr_ratio)),
|
|
271
|
-
f"{self.name}.nndr_ratio_002": float(np.mean(nndr_ratio_002)),
|
|
272
|
-
f"{self.name}.nndr_ratio_005": float(np.mean(nndr_ratio_005)),
|
|
280
|
+
f"{self.name}.{name}": float(np.mean(values))
|
|
281
|
+
for name, values in metric_values.items()
|
|
273
282
|
}
|
|
274
283
|
|
|
275
284
|
|
|
@@ -484,6 +493,9 @@ class MIA(ABC):
|
|
|
484
493
|
"lift_010",
|
|
485
494
|
"lift_005",
|
|
486
495
|
"lift_001",
|
|
496
|
+
"tpr_at_fpr_010",
|
|
497
|
+
"tpr_at_fpr_005",
|
|
498
|
+
"tpr_at_fpr_001",
|
|
487
499
|
)
|
|
488
500
|
|
|
489
501
|
def __init__(
|
|
@@ -535,6 +547,15 @@ class MIA(ABC):
|
|
|
535
547
|
"lift_010": lift_at_k(mia_data.y_eval, membership_scores, 0.10),
|
|
536
548
|
"lift_005": lift_at_k(mia_data.y_eval, membership_scores, 0.05),
|
|
537
549
|
"lift_001": lift_at_k(mia_data.y_eval, membership_scores, 0.01),
|
|
550
|
+
"tpr_at_fpr_010": tpr_at_fpr(
|
|
551
|
+
mia_data.y_eval, membership_scores, 0.10
|
|
552
|
+
),
|
|
553
|
+
"tpr_at_fpr_005": tpr_at_fpr(
|
|
554
|
+
mia_data.y_eval, membership_scores, 0.05
|
|
555
|
+
),
|
|
556
|
+
"tpr_at_fpr_001": tpr_at_fpr(
|
|
557
|
+
mia_data.y_eval, membership_scores, 0.01
|
|
558
|
+
),
|
|
538
559
|
}
|
|
539
560
|
|
|
540
561
|
avg_scores = {
|
|
@@ -1017,6 +1038,9 @@ class EnsembleMIA(MIA):
|
|
|
1017
1038
|
"lift_010": lift_at_k(y_eval, scores, 0.10),
|
|
1018
1039
|
"lift_005": lift_at_k(y_eval, scores, 0.05),
|
|
1019
1040
|
"lift_001": lift_at_k(y_eval, scores, 0.01),
|
|
1041
|
+
"tpr_at_fpr_010": tpr_at_fpr(y_eval, scores, 0.10),
|
|
1042
|
+
"tpr_at_fpr_005": tpr_at_fpr(y_eval, scores, 0.05),
|
|
1043
|
+
"tpr_at_fpr_001": tpr_at_fpr(y_eval, scores, 0.01),
|
|
1020
1044
|
}
|
|
1021
1045
|
|
|
1022
1046
|
def _component_membership_scores(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: synthyverse
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Synthetic data generation and evaluation library
|
|
5
5
|
Home-page: https://github.com/synthyverse/synthyverse
|
|
6
6
|
Author: Jim Achterberg, Saif Ul Islam, Zia Ur Rehman
|
|
@@ -106,24 +106,24 @@ Requires-Dist: xgboost; extra == "eval"
|
|
|
106
106
|
Requires-Dist: optuna; extra == "eval"
|
|
107
107
|
Requires-Dist: seaborn; extra == "eval"
|
|
108
108
|
Provides-Extra: full
|
|
109
|
-
Requires-Dist: ctgan==0.10.0; extra == "full"
|
|
110
109
|
Requires-Dist: imbalanced-learn; extra == "full"
|
|
111
|
-
Requires-Dist: pandas; extra == "full"
|
|
112
110
|
Requires-Dist: pykeops; extra == "full"
|
|
113
|
-
Requires-Dist:
|
|
114
|
-
Requires-Dist: scikit-learn; extra == "full"
|
|
115
|
-
Requires-Dist: optuna; extra == "full"
|
|
116
|
-
Requires-Dist: einops; extra == "full"
|
|
117
|
-
Requires-Dist: seaborn; extra == "full"
|
|
118
|
-
Requires-Dist: numpy; extra == "full"
|
|
119
|
-
Requires-Dist: scipy; extra == "full"
|
|
111
|
+
Requires-Dist: geomloss; extra == "full"
|
|
120
112
|
Requires-Dist: tqdm; extra == "full"
|
|
121
|
-
Requires-Dist:
|
|
113
|
+
Requires-Dist: seaborn; extra == "full"
|
|
114
|
+
Requires-Dist: torch-ema; extra == "full"
|
|
115
|
+
Requires-Dist: optuna; extra == "full"
|
|
122
116
|
Requires-Dist: arfpy; extra == "full"
|
|
117
|
+
Requires-Dist: pandas; extra == "full"
|
|
123
118
|
Requires-Dist: mostlyai-engine; extra == "full"
|
|
124
|
-
Requires-Dist:
|
|
125
|
-
Requires-Dist: geomloss; extra == "full"
|
|
119
|
+
Requires-Dist: opacus==1.5.3; extra == "full"
|
|
126
120
|
Requires-Dist: xgboost; extra == "full"
|
|
121
|
+
Requires-Dist: scipy; extra == "full"
|
|
122
|
+
Requires-Dist: einops; extra == "full"
|
|
123
|
+
Requires-Dist: ctgan==0.10.0; extra == "full"
|
|
124
|
+
Requires-Dist: numpy; extra == "full"
|
|
125
|
+
Requires-Dist: torch; extra == "full"
|
|
126
|
+
Requires-Dist: scikit-learn; extra == "full"
|
|
127
127
|
Requires-Dist: synthcity==0.2.12; extra == "full"
|
|
128
128
|
|
|
129
129
|
<table align="center" border="0">
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabargn_generator/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabddpm_generator/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabsyn_generator/diffusion.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/univariate_generator/__init__.py
RENAMED
|
File without changes
|
{synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/univariate_generator/univariate.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -47,24 +47,24 @@ optuna
|
|
|
47
47
|
seaborn
|
|
48
48
|
|
|
49
49
|
[full]
|
|
50
|
-
ctgan==0.10.0
|
|
51
50
|
imbalanced-learn
|
|
52
|
-
pandas
|
|
53
51
|
pykeops
|
|
54
|
-
|
|
55
|
-
scikit-learn
|
|
56
|
-
optuna
|
|
57
|
-
einops
|
|
58
|
-
seaborn
|
|
59
|
-
numpy
|
|
60
|
-
scipy
|
|
52
|
+
geomloss
|
|
61
53
|
tqdm
|
|
62
|
-
|
|
54
|
+
seaborn
|
|
55
|
+
torch-ema
|
|
56
|
+
optuna
|
|
63
57
|
arfpy
|
|
58
|
+
pandas
|
|
64
59
|
mostlyai-engine
|
|
65
|
-
|
|
66
|
-
geomloss
|
|
60
|
+
opacus==1.5.3
|
|
67
61
|
xgboost
|
|
62
|
+
scipy
|
|
63
|
+
einops
|
|
64
|
+
ctgan==0.10.0
|
|
65
|
+
numpy
|
|
66
|
+
torch
|
|
67
|
+
scikit-learn
|
|
68
68
|
synthcity==0.2.12
|
|
69
69
|
|
|
70
70
|
[smote]
|
|
File without changes
|