synthyverse 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {synthyverse-0.2.0/synthyverse.egg-info → synthyverse-0.2.1}/PKG-INFO +13 -13
  2. {synthyverse-0.2.0 → synthyverse-0.2.1}/setup.py +1 -1
  3. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/privacy.py +107 -83
  4. {synthyverse-0.2.0 → synthyverse-0.2.1/synthyverse.egg-info}/PKG-INFO +13 -13
  5. {synthyverse-0.2.0 → synthyverse-0.2.1}/LICENSE +0 -0
  6. {synthyverse-0.2.0 → synthyverse-0.2.1}/MANIFEST.in +0 -0
  7. {synthyverse-0.2.0 → synthyverse-0.2.1}/README.md +0 -0
  8. {synthyverse-0.2.0 → synthyverse-0.2.1}/logo/logo.png +0 -0
  9. {synthyverse-0.2.0 → synthyverse-0.2.1}/pyproject.toml +0 -0
  10. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/dev/docs.txt +0 -0
  11. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/evaluation/eval.txt +0 -0
  12. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/arf.txt +0 -0
  13. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/base.txt +0 -0
  14. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/bn.txt +0 -0
  15. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/cdtd.txt +0 -0
  16. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/ctgan.txt +0 -0
  17. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/smote.txt +0 -0
  18. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/tabargn.txt +0 -0
  19. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/tabddpm.txt +0 -0
  20. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/tabsyn.txt +0 -0
  21. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/tvae.txt +0 -0
  22. {synthyverse-0.2.0 → synthyverse-0.2.1}/requirements/generators/univariate.txt +0 -0
  23. {synthyverse-0.2.0 → synthyverse-0.2.1}/scripts/__init__.py +0 -0
  24. {synthyverse-0.2.0 → synthyverse-0.2.1}/scripts/setup_utils.py +0 -0
  25. {synthyverse-0.2.0 → synthyverse-0.2.1}/scripts/update_templates.py +0 -0
  26. {synthyverse-0.2.0 → synthyverse-0.2.1}/setup.cfg +0 -0
  27. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/__init__.py +0 -0
  28. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/benchmark/__init__.py +0 -0
  29. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/benchmark/synthesis.py +0 -0
  30. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/__init__.py +0 -0
  31. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/eval.py +0 -0
  32. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/fidelity.py +0 -0
  33. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/ml.py +0 -0
  34. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/preprocessing.py +0 -0
  35. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/evaluation/utility.py +0 -0
  36. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/__init__.py +0 -0
  37. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/arf_generator/__init__.py +0 -0
  38. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/arf_generator/arf.py +0 -0
  39. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/base.py +0 -0
  40. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/bn_generator/__init__.py +0 -0
  41. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/bn_generator/bn.py +0 -0
  42. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/cdtd_generator/__init__.py +0 -0
  43. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/cdtd_generator/cdtd.py +0 -0
  44. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/cdtd_generator/layers.py +0 -0
  45. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/cdtd_generator/utils.py +0 -0
  46. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/ctgan_generator/__init__.py +0 -0
  47. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/ctgan_generator/ct_gan.py +0 -0
  48. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/persistence.py +0 -0
  49. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/smote_generator/__init__.py +0 -0
  50. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/smote_generator/smote.py +0 -0
  51. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabargn_generator/__init__.py +0 -0
  52. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabargn_generator/tabargn.py +0 -0
  53. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabddpm_generator/__init__.py +0 -0
  54. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabddpm_generator/model.py +0 -0
  55. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabddpm_generator/plugin.py +0 -0
  56. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabddpm_generator/tabddpm.py +0 -0
  57. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabsyn_generator/__init__.py +0 -0
  58. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabsyn_generator/diffusion.py +0 -0
  59. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabsyn_generator/tabsyn.py +0 -0
  60. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tabsyn_generator/vae.py +0 -0
  61. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tvae_generator/__init__.py +0 -0
  62. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/tvae_generator/tvae.py +0 -0
  63. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/univariate_generator/__init__.py +0 -0
  64. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/generators/univariate_generator/univariate.py +0 -0
  65. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/utils/__init__.py +0 -0
  66. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/utils/reproducibility.py +0 -0
  67. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse/utils/utils.py +0 -0
  68. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse.egg-info/SOURCES.txt +0 -0
  69. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse.egg-info/dependency_links.txt +0 -0
  70. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse.egg-info/requires.txt +12 -12
  71. {synthyverse-0.2.0 → synthyverse-0.2.1}/synthyverse.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: synthyverse
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Synthetic data generation and evaluation library
5
5
  Home-page: https://github.com/synthyverse/synthyverse
6
6
  Author: Jim Achterberg, Saif Ul Islam, Zia Ur Rehman
@@ -106,24 +106,24 @@ Requires-Dist: xgboost; extra == "eval"
106
106
  Requires-Dist: optuna; extra == "eval"
107
107
  Requires-Dist: seaborn; extra == "eval"
108
108
  Provides-Extra: full
109
- Requires-Dist: ctgan==0.10.0; extra == "full"
110
109
  Requires-Dist: imbalanced-learn; extra == "full"
111
- Requires-Dist: pandas; extra == "full"
112
110
  Requires-Dist: pykeops; extra == "full"
113
- Requires-Dist: torch; extra == "full"
114
- Requires-Dist: scikit-learn; extra == "full"
115
- Requires-Dist: optuna; extra == "full"
116
- Requires-Dist: einops; extra == "full"
117
- Requires-Dist: seaborn; extra == "full"
118
- Requires-Dist: numpy; extra == "full"
119
- Requires-Dist: scipy; extra == "full"
111
+ Requires-Dist: geomloss; extra == "full"
120
112
  Requires-Dist: tqdm; extra == "full"
121
- Requires-Dist: opacus==1.5.3; extra == "full"
113
+ Requires-Dist: seaborn; extra == "full"
114
+ Requires-Dist: torch-ema; extra == "full"
115
+ Requires-Dist: optuna; extra == "full"
122
116
  Requires-Dist: arfpy; extra == "full"
117
+ Requires-Dist: pandas; extra == "full"
123
118
  Requires-Dist: mostlyai-engine; extra == "full"
124
- Requires-Dist: torch-ema; extra == "full"
125
- Requires-Dist: geomloss; extra == "full"
119
+ Requires-Dist: opacus==1.5.3; extra == "full"
126
120
  Requires-Dist: xgboost; extra == "full"
121
+ Requires-Dist: scipy; extra == "full"
122
+ Requires-Dist: einops; extra == "full"
123
+ Requires-Dist: ctgan==0.10.0; extra == "full"
124
+ Requires-Dist: numpy; extra == "full"
125
+ Requires-Dist: torch; extra == "full"
126
+ Requires-Dist: scikit-learn; extra == "full"
127
127
  Requires-Dist: synthcity==0.2.12; extra == "full"
128
128
 
129
129
  <table align="center" border="0">
@@ -9,7 +9,7 @@ if scripts_dir not in sys.path:
9
9
 
10
10
  from scripts.setup_utils import get_extras, read_requirements
11
11
 
12
- version = "0.2.0"
12
+ version = "0.2.1"
13
13
 
14
14
  # Define extras dynamically from requirements folder
15
15
  extras = get_extras()
@@ -13,6 +13,7 @@ from sklearn.metrics import (
13
13
  r2_score,
14
14
  root_mean_squared_error,
15
15
  roc_auc_score,
16
+ roc_curve,
16
17
  )
17
18
  from sklearn.neighbors import KDTree, NearestNeighbors
18
19
  from scipy.stats import gaussian_kde, rankdata
@@ -54,6 +55,32 @@ def lift_at_k(y_true, y_score, k=0.1):
54
55
  return float(precision_at_k / prevalence)
55
56
 
56
57
 
58
+ def tpr_at_fpr(y_true, y_score, max_fpr=0.1):
59
+ """
60
+ Return the highest true positive rate achievable at or below max_fpr.
61
+
62
+ This is a thresholded attack metric: it measures member recall while
63
+ constraining the fraction of non-members incorrectly flagged as members.
64
+ """
65
+ if not 0 <= max_fpr <= 1:
66
+ raise ValueError("max_fpr must be in the interval [0, 1].")
67
+
68
+ y_true = np.asarray(y_true, dtype=int)
69
+ y_score = np.nan_to_num(np.asarray(y_score, dtype=float))
70
+ if len(y_true) != len(y_score):
71
+ raise ValueError("y_true and y_score must have the same length.")
72
+ if len(y_true) == 0:
73
+ return 0.0
74
+ if not np.any(y_true == 1) or not np.any(y_true == 0):
75
+ return 0.0
76
+
77
+ fpr, tpr, _ = roc_curve(y_true, y_score)
78
+ valid = fpr <= max_fpr
79
+ if not np.any(valid):
80
+ return 0.0
81
+ return float(np.max(tpr[valid]))
82
+
83
+
57
84
  class DCR:
58
85
  """Distance to Closest Record (DCR) privacy metrics.
59
86
 
@@ -114,17 +141,17 @@ class DCR:
114
141
  - "dcr.score": DCR score such that higher scores indicate better privacy
115
142
  - "dcr.train": Proportion closer to train
116
143
  - "dcr.test": Proportion closer to test
117
- - "dcr.quantile_002": Proportion closer to train than the 2% train-test distance quantile
118
- - "dcr.quantile_005": Proportion closer to train than the 5% train-test distance quantile
144
+ - "dcr.quantile_002": Proportion closer to train than the 2% test-to-train distance quantile
145
+ - "dcr.quantile_005": Proportion closer to train than the 5% test-to-train distance quantile
119
146
  - "dcr.nndr_train": Mean NNDR score from synthetic records to train records
120
147
  - "dcr.nndr_train_002": 2% NNDR quantile from synthetic records to train records
121
148
  - "dcr.nndr_train_005": 5% NNDR quantile from synthetic records to train records
122
- - "dcr.nndr_test": Mean NNDR baseline score from train records to test records
123
- - "dcr.nndr_test_002": 2% NNDR baseline quantile from train records to test records
124
- - "dcr.nndr_test_005": 5% NNDR baseline quantile from train records to test records
125
- - "dcr.nndr_ratio": Mean pointwise ratio of each synthetic row's train NNDR score to the train-test NNDR baseline of its nearest training row
126
- - "dcr.nndr_ratio_002": 2% quantile of pointwise nearest-training-row normalized NNDR ratios
127
- - "dcr.nndr_ratio_005": 5% quantile of pointwise nearest-training-row normalized NNDR ratios
149
+ - "dcr.nndr_test": Mean NNDR score from synthetic records to test records
150
+ - "dcr.nndr_test_002": 2% NNDR quantile from synthetic records to test records
151
+ - "dcr.nndr_test_005": 5% NNDR quantile from synthetic records to test records
152
+ - "dcr.nndr_ratio": Mean pointwise ratio of each synthetic row's train NNDR score to its test NNDR score
153
+ - "dcr.nndr_ratio_002": 2% quantile of pointwise synthetic train/test NNDR ratios
154
+ - "dcr.nndr_ratio_005": 5% quantile of pointwise synthetic train/test NNDR ratios
128
155
 
129
156
  Raises:
130
157
  AssertionError: If test set is larger than train set.
@@ -136,37 +163,29 @@ class DCR:
136
163
  train
137
164
  ), "Test set must be smaller than or equal to train size to compute DCR"
138
165
 
166
+ num_rows_subsample = len(test) if self.subsample_test_size else len(train)
167
+ if len(sd) < num_rows_subsample:
168
+ raise ValueError(
169
+ "Synthetic data must contain at least "
170
+ f"{num_rows_subsample} rows to compute DCR with the current "
171
+ "subsampling settings."
172
+ )
173
+
139
174
  data = gower_like_transform(
140
- {"train": train, "test": test, "syn": sd[: len(train)]},
175
+ {"train": train, "test": test, "syn": sd.iloc[: len(train)]},
141
176
  reference_data=train,
142
177
  discrete_features=self.discrete_features,
143
178
  categorical_fit_data=[train, test, sd],
144
179
  )
145
180
 
146
181
  # Optionally subsample train and synthetic data to match the test size.
147
- num_rows_subsample = (
148
- len(data["test"]) if self.subsample_test_size else len(data["train"])
149
- )
150
182
  if len(data["train"]) < 2 or len(data["test"]) < 2 or num_rows_subsample < 2:
151
183
  raise ValueError(
152
184
  "DCR with NNDR requires at least two train rows and two test rows "
153
185
  "after applying subsampling."
154
186
  )
155
187
  num_iterations = int(np.ceil(len(data["train"]) / num_rows_subsample))
156
- scores = []
157
- closer_to_train = []
158
- closer_to_test = []
159
- dcr_002 = []
160
- dcr_005 = []
161
- nndr_train = []
162
- nndr_train_002 = []
163
- nndr_train_005 = []
164
- nndr_test = []
165
- nndr_test_002 = []
166
- nndr_test_005 = []
167
- nndr_ratio = []
168
- nndr_ratio_002 = []
169
- nndr_ratio_005 = []
188
+ metric_values = {}
170
189
 
171
190
  rng = np.random.default_rng(self.random_state)
172
191
 
@@ -177,7 +196,6 @@ class DCR:
177
196
  query: np.ndarray,
178
197
  reference: np.ndarray,
179
198
  n_neighbors: int = 1,
180
- return_indices: bool = False,
181
199
  ):
182
200
  if len(reference) < n_neighbors:
183
201
  raise ValueError(
@@ -189,87 +207,78 @@ class DCR:
189
207
  metric="cityblock",
190
208
  n_jobs=-1,
191
209
  ).fit(reference)
192
- distances, indices = nbrs.kneighbors(query)
210
+ distances, _ = nbrs.kneighbors(query)
193
211
  if n_neighbors == 1:
194
212
  distances = distances.ravel()
195
- indices = indices.ravel()
196
- if return_indices:
197
- return distances, indices
198
213
  return distances
199
214
 
200
- def nearest_distance_ratio(distances: np.ndarray) -> np.ndarray:
215
+ def safe_ratio(numerator: np.ndarray, denominator: np.ndarray) -> np.ndarray:
201
216
  return np.divide(
202
- distances[:, 0],
203
- distances[:, 1],
204
- out=np.ones(len(distances), dtype=float),
205
- where=distances[:, 1] > 0,
217
+ numerator,
218
+ denominator,
219
+ out=np.ones(len(numerator), dtype=float),
220
+ where=denominator > 0,
206
221
  )
207
222
 
223
+ def nearest_distance_ratio(distances: np.ndarray) -> np.ndarray:
224
+ return safe_ratio(distances[:, 0], distances[:, 1])
225
+
226
+ def distribution_metrics(prefix: str, values: np.ndarray) -> dict:
227
+ return {
228
+ prefix: np.mean(values),
229
+ f"{prefix}_002": np.quantile(values, 0.02),
230
+ f"{prefix}_005": np.quantile(values, 0.05),
231
+ }
232
+
233
+ def collect(metrics: dict):
234
+ for name, value in metrics.items():
235
+ metric_values.setdefault(name, []).append(value)
236
+
208
237
  for _ in range(num_iterations):
209
238
  syn_curr = choose(data["syn"], num_rows_subsample)
210
239
  train_curr = choose(data["train"], num_rows_subsample)
211
240
  test_curr = choose(data["test"], min(len(data["test"]), num_rows_subsample))
212
241
 
213
- d_s_tr_neighbors, d_s_tr_indices = nearest_distances(
242
+ d_s_tr_neighbors = nearest_distances(
214
243
  syn_curr,
215
244
  train_curr,
216
245
  n_neighbors=2,
217
- return_indices=True,
218
246
  )
219
247
  d_s_tr = d_s_tr_neighbors[:, 0]
220
248
  # align test set size to never exceed subsampled set size
221
- d_s_te = nearest_distances(syn_curr, test_curr)
222
- d_tr_te_neighbors = nearest_distances(train_curr, test_curr, n_neighbors=2)
223
- d_tr_te = d_tr_te_neighbors[:, 0]
249
+ d_s_te_neighbors = nearest_distances(syn_curr, test_curr, n_neighbors=2)
250
+ d_s_te = d_s_te_neighbors[:, 0]
251
+ d_te_tr = nearest_distances(test_curr, train_curr)
224
252
 
225
253
  closer_to_train_ = np.mean(d_s_tr < d_s_te)
226
254
  closer_to_test_ = 1 - closer_to_train_
227
- closer_to_train.append(closer_to_train_)
228
- closer_to_test.append(closer_to_test_)
229
- score = min(1, closer_to_test_ * 2)
230
- scores.append(score)
231
- dcr_002.append(np.mean(d_s_tr < np.quantile(d_tr_te, 0.02)))
232
- dcr_005.append(np.mean(d_s_tr < np.quantile(d_tr_te, 0.05)))
233
255
 
234
256
  nndr_train_scores = nearest_distance_ratio(d_s_tr_neighbors)
235
- nndr_test_scores = nearest_distance_ratio(d_tr_te_neighbors)
236
- nndr_train_ = np.mean(nndr_train_scores)
237
- nndr_test_ = np.mean(nndr_test_scores)
238
- nndr_train_q002 = np.quantile(nndr_train_scores, 0.02)
239
- nndr_train_q005 = np.quantile(nndr_train_scores, 0.05)
240
- nndr_test_q002 = np.quantile(nndr_test_scores, 0.02)
241
- nndr_test_q005 = np.quantile(nndr_test_scores, 0.05)
242
- nndr_train.append(nndr_train_)
243
- nndr_train_002.append(nndr_train_q002)
244
- nndr_train_005.append(nndr_train_q005)
245
- nndr_test.append(nndr_test_)
246
- nndr_test_002.append(nndr_test_q002)
247
- nndr_test_005.append(nndr_test_q005)
248
- nndr_test_scores_matched = nndr_test_scores[d_s_tr_indices[:, 0]]
249
- nndr_ratio_scores = np.divide(
250
- nndr_train_scores,
251
- nndr_test_scores_matched,
252
- out=np.ones(len(nndr_train_scores), dtype=float),
253
- where=nndr_test_scores_matched > 0,
257
+ nndr_test_scores = nearest_distance_ratio(d_s_te_neighbors)
258
+ nndr_ratio_scores = safe_ratio(nndr_train_scores, nndr_test_scores)
259
+
260
+ iteration_metrics = {
261
+ "score": min(1, closer_to_test_ * 2),
262
+ "train": closer_to_train_,
263
+ "test": closer_to_test_,
264
+ "quantile_002": np.mean(d_s_tr < np.quantile(d_te_tr, 0.02)),
265
+ "quantile_005": np.mean(d_s_tr < np.quantile(d_te_tr, 0.05)),
266
+ }
267
+ iteration_metrics.update(
268
+ distribution_metrics("nndr_train", nndr_train_scores)
269
+ )
270
+ iteration_metrics.update(
271
+ distribution_metrics("nndr_test", nndr_test_scores)
254
272
  )
255
- nndr_ratio.append(np.mean(nndr_ratio_scores))
256
- nndr_ratio_002.append(np.quantile(nndr_ratio_scores, 0.02))
257
- nndr_ratio_005.append(np.quantile(nndr_ratio_scores, 0.05))
273
+ iteration_metrics.update(
274
+ distribution_metrics("nndr_ratio", nndr_ratio_scores)
275
+ )
276
+
277
+ collect(iteration_metrics)
278
+
258
279
  return {
259
- f"{self.name}.score": float(np.mean(scores)),
260
- f"{self.name}.train": float(np.mean(closer_to_train)),
261
- f"{self.name}.test": float(np.mean(closer_to_test)),
262
- f"{self.name}.quantile_002": float(np.mean(dcr_002)),
263
- f"{self.name}.quantile_005": float(np.mean(dcr_005)),
264
- f"{self.name}.nndr_train": float(np.mean(nndr_train)),
265
- f"{self.name}.nndr_train_002": float(np.mean(nndr_train_002)),
266
- f"{self.name}.nndr_train_005": float(np.mean(nndr_train_005)),
267
- f"{self.name}.nndr_test": float(np.mean(nndr_test)),
268
- f"{self.name}.nndr_test_002": float(np.mean(nndr_test_002)),
269
- f"{self.name}.nndr_test_005": float(np.mean(nndr_test_005)),
270
- f"{self.name}.nndr_ratio": float(np.mean(nndr_ratio)),
271
- f"{self.name}.nndr_ratio_002": float(np.mean(nndr_ratio_002)),
272
- f"{self.name}.nndr_ratio_005": float(np.mean(nndr_ratio_005)),
280
+ f"{self.name}.{name}": float(np.mean(values))
281
+ for name, values in metric_values.items()
273
282
  }
274
283
 
275
284
 
@@ -484,6 +493,9 @@ class MIA(ABC):
484
493
  "lift_010",
485
494
  "lift_005",
486
495
  "lift_001",
496
+ "tpr_at_fpr_010",
497
+ "tpr_at_fpr_005",
498
+ "tpr_at_fpr_001",
487
499
  )
488
500
 
489
501
  def __init__(
@@ -535,6 +547,15 @@ class MIA(ABC):
535
547
  "lift_010": lift_at_k(mia_data.y_eval, membership_scores, 0.10),
536
548
  "lift_005": lift_at_k(mia_data.y_eval, membership_scores, 0.05),
537
549
  "lift_001": lift_at_k(mia_data.y_eval, membership_scores, 0.01),
550
+ "tpr_at_fpr_010": tpr_at_fpr(
551
+ mia_data.y_eval, membership_scores, 0.10
552
+ ),
553
+ "tpr_at_fpr_005": tpr_at_fpr(
554
+ mia_data.y_eval, membership_scores, 0.05
555
+ ),
556
+ "tpr_at_fpr_001": tpr_at_fpr(
557
+ mia_data.y_eval, membership_scores, 0.01
558
+ ),
538
559
  }
539
560
 
540
561
  avg_scores = {
@@ -1017,6 +1038,9 @@ class EnsembleMIA(MIA):
1017
1038
  "lift_010": lift_at_k(y_eval, scores, 0.10),
1018
1039
  "lift_005": lift_at_k(y_eval, scores, 0.05),
1019
1040
  "lift_001": lift_at_k(y_eval, scores, 0.01),
1041
+ "tpr_at_fpr_010": tpr_at_fpr(y_eval, scores, 0.10),
1042
+ "tpr_at_fpr_005": tpr_at_fpr(y_eval, scores, 0.05),
1043
+ "tpr_at_fpr_001": tpr_at_fpr(y_eval, scores, 0.01),
1020
1044
  }
1021
1045
 
1022
1046
  def _component_membership_scores(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: synthyverse
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Synthetic data generation and evaluation library
5
5
  Home-page: https://github.com/synthyverse/synthyverse
6
6
  Author: Jim Achterberg, Saif Ul Islam, Zia Ur Rehman
@@ -106,24 +106,24 @@ Requires-Dist: xgboost; extra == "eval"
106
106
  Requires-Dist: optuna; extra == "eval"
107
107
  Requires-Dist: seaborn; extra == "eval"
108
108
  Provides-Extra: full
109
- Requires-Dist: ctgan==0.10.0; extra == "full"
110
109
  Requires-Dist: imbalanced-learn; extra == "full"
111
- Requires-Dist: pandas; extra == "full"
112
110
  Requires-Dist: pykeops; extra == "full"
113
- Requires-Dist: torch; extra == "full"
114
- Requires-Dist: scikit-learn; extra == "full"
115
- Requires-Dist: optuna; extra == "full"
116
- Requires-Dist: einops; extra == "full"
117
- Requires-Dist: seaborn; extra == "full"
118
- Requires-Dist: numpy; extra == "full"
119
- Requires-Dist: scipy; extra == "full"
111
+ Requires-Dist: geomloss; extra == "full"
120
112
  Requires-Dist: tqdm; extra == "full"
121
- Requires-Dist: opacus==1.5.3; extra == "full"
113
+ Requires-Dist: seaborn; extra == "full"
114
+ Requires-Dist: torch-ema; extra == "full"
115
+ Requires-Dist: optuna; extra == "full"
122
116
  Requires-Dist: arfpy; extra == "full"
117
+ Requires-Dist: pandas; extra == "full"
123
118
  Requires-Dist: mostlyai-engine; extra == "full"
124
- Requires-Dist: torch-ema; extra == "full"
125
- Requires-Dist: geomloss; extra == "full"
119
+ Requires-Dist: opacus==1.5.3; extra == "full"
126
120
  Requires-Dist: xgboost; extra == "full"
121
+ Requires-Dist: scipy; extra == "full"
122
+ Requires-Dist: einops; extra == "full"
123
+ Requires-Dist: ctgan==0.10.0; extra == "full"
124
+ Requires-Dist: numpy; extra == "full"
125
+ Requires-Dist: torch; extra == "full"
126
+ Requires-Dist: scikit-learn; extra == "full"
127
127
  Requires-Dist: synthcity==0.2.12; extra == "full"
128
128
 
129
129
  <table align="center" border="0">
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -47,24 +47,24 @@ optuna
47
47
  seaborn
48
48
 
49
49
  [full]
50
- ctgan==0.10.0
51
50
  imbalanced-learn
52
- pandas
53
51
  pykeops
54
- torch
55
- scikit-learn
56
- optuna
57
- einops
58
- seaborn
59
- numpy
60
- scipy
52
+ geomloss
61
53
  tqdm
62
- opacus==1.5.3
54
+ seaborn
55
+ torch-ema
56
+ optuna
63
57
  arfpy
58
+ pandas
64
59
  mostlyai-engine
65
- torch-ema
66
- geomloss
60
+ opacus==1.5.3
67
61
  xgboost
62
+ scipy
63
+ einops
64
+ ctgan==0.10.0
65
+ numpy
66
+ torch
67
+ scikit-learn
68
68
  synthcity==0.2.12
69
69
 
70
70
  [smote]