superquantx 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- superquantx/__init__.py +24 -12
- superquantx/algorithms/__init__.py +1 -1
- superquantx/algorithms/base_algorithm.py +36 -36
- superquantx/algorithms/hybrid_classifier.py +22 -22
- superquantx/algorithms/qaoa.py +29 -28
- superquantx/algorithms/quantum_agents.py +57 -56
- superquantx/algorithms/quantum_kmeans.py +17 -17
- superquantx/algorithms/quantum_nn.py +18 -18
- superquantx/algorithms/quantum_pca.py +26 -26
- superquantx/algorithms/quantum_svm.py +26 -25
- superquantx/algorithms/vqe.py +40 -39
- superquantx/algorithms.py +56 -55
- superquantx/backends/__init__.py +12 -12
- superquantx/backends/base_backend.py +25 -24
- superquantx/backends/braket_backend.py +21 -21
- superquantx/backends/cirq_backend.py +26 -26
- superquantx/backends/ocean_backend.py +38 -38
- superquantx/backends/pennylane_backend.py +12 -11
- superquantx/backends/qiskit_backend.py +12 -12
- superquantx/backends/simulator_backend.py +31 -17
- superquantx/backends/tket_backend.py +23 -23
- superquantx/circuits.py +25 -25
- superquantx/cli/commands.py +6 -7
- superquantx/cli/main.py +5 -6
- superquantx/client.py +42 -42
- superquantx/config.py +14 -14
- superquantx/datasets/__init__.py +58 -0
- superquantx/datasets/molecular.py +307 -0
- superquantx/datasets/preprocessing.py +279 -0
- superquantx/datasets/quantum_datasets.py +277 -0
- superquantx/datasets/synthetic.py +300 -0
- superquantx/exceptions.py +29 -29
- superquantx/gates.py +26 -26
- superquantx/logging_config.py +29 -29
- superquantx/measurements.py +53 -54
- superquantx/ml.py +51 -52
- superquantx/noise.py +49 -49
- superquantx/utils/benchmarking.py +41 -36
- superquantx/utils/classical_utils.py +32 -32
- superquantx/utils/feature_mapping.py +40 -35
- superquantx/utils/optimization.py +28 -26
- superquantx/utils/quantum_utils.py +47 -48
- superquantx/utils/visualization.py +49 -49
- superquantx/version.py +3 -3
- {superquantx-0.1.0.dist-info → superquantx-0.1.1.dist-info}/METADATA +18 -16
- superquantx-0.1.1.dist-info/RECORD +51 -0
- superquantx-0.1.1.dist-info/licenses/LICENSE +180 -0
- superquantx-0.1.0.dist-info/RECORD +0 -46
- superquantx-0.1.0.dist-info/licenses/LICENSE +0 -21
- {superquantx-0.1.0.dist-info → superquantx-0.1.1.dist-info}/WHEEL +0 -0
- {superquantx-0.1.0.dist-info → superquantx-0.1.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,277 @@
|
|
1
|
+
"""Quantum-adapted classical datasets for machine learning.
|
2
|
+
|
3
|
+
This module provides classical datasets adapted for quantum machine learning,
|
4
|
+
with proper preprocessing and encoding for quantum circuits.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Any, Dict, Optional, Tuple
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
from sklearn import datasets
|
11
|
+
from sklearn.model_selection import train_test_split
|
12
|
+
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
13
|
+
|
14
|
+
from .preprocessing import normalize_quantum_data
|
15
|
+
|
16
|
+
|
17
|
+
def load_iris_quantum(
|
18
|
+
n_features: Optional[int] = None,
|
19
|
+
encoding: str = 'amplitude',
|
20
|
+
normalize: bool = True,
|
21
|
+
test_size: float = 0.2,
|
22
|
+
random_state: Optional[int] = 42
|
23
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
|
24
|
+
"""Load and preprocess the Iris dataset for quantum machine learning.
|
25
|
+
|
26
|
+
Args:
|
27
|
+
n_features: Number of features to keep (default: all 4)
|
28
|
+
encoding: Type of quantum encoding ('amplitude', 'angle', 'basis')
|
29
|
+
normalize: Whether to normalize features
|
30
|
+
test_size: Proportion of dataset for testing
|
31
|
+
random_state: Random seed for reproducibility
|
32
|
+
|
33
|
+
Returns:
|
34
|
+
Tuple of (X_train, X_test, y_train, y_test, metadata)
|
35
|
+
|
36
|
+
"""
|
37
|
+
# Load dataset
|
38
|
+
iris = datasets.load_iris()
|
39
|
+
X, y = iris.data, iris.target
|
40
|
+
|
41
|
+
# Feature selection
|
42
|
+
if n_features is not None and n_features < X.shape[1]:
|
43
|
+
# Select features with highest variance
|
44
|
+
feature_vars = np.var(X, axis=0)
|
45
|
+
selected_features = np.argsort(feature_vars)[-n_features:]
|
46
|
+
X = X[:, selected_features]
|
47
|
+
feature_names = [iris.feature_names[i] for i in selected_features]
|
48
|
+
else:
|
49
|
+
feature_names = iris.feature_names
|
50
|
+
n_features = X.shape[1]
|
51
|
+
|
52
|
+
# Normalization
|
53
|
+
if normalize:
|
54
|
+
if encoding == 'amplitude':
|
55
|
+
X = normalize_quantum_data(X, method='l2')
|
56
|
+
elif encoding == 'angle':
|
57
|
+
scaler = MinMaxScaler(feature_range=(0, 2*np.pi))
|
58
|
+
X = scaler.fit_transform(X)
|
59
|
+
else:
|
60
|
+
scaler = StandardScaler()
|
61
|
+
X = scaler.fit_transform(X)
|
62
|
+
|
63
|
+
# Train-test split
|
64
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
65
|
+
X, y, test_size=test_size, random_state=random_state, stratify=y
|
66
|
+
)
|
67
|
+
|
68
|
+
# Metadata
|
69
|
+
metadata = {
|
70
|
+
'dataset_name': 'iris',
|
71
|
+
'n_samples': len(X),
|
72
|
+
'n_features': n_features,
|
73
|
+
'n_classes': len(np.unique(y)),
|
74
|
+
'class_names': iris.target_names.tolist(),
|
75
|
+
'feature_names': feature_names,
|
76
|
+
'encoding': encoding,
|
77
|
+
'normalized': normalize
|
78
|
+
}
|
79
|
+
|
80
|
+
return X_train, X_test, y_train, y_test, metadata
|
81
|
+
|
82
|
+
|
83
|
+
def load_wine_quantum(
|
84
|
+
n_features: Optional[int] = 8,
|
85
|
+
encoding: str = 'amplitude',
|
86
|
+
normalize: bool = True,
|
87
|
+
test_size: float = 0.2,
|
88
|
+
random_state: Optional[int] = 42
|
89
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
|
90
|
+
"""Load and preprocess the Wine dataset for quantum machine learning.
|
91
|
+
|
92
|
+
Args:
|
93
|
+
n_features: Number of top features to keep (default: 8)
|
94
|
+
encoding: Type of quantum encoding
|
95
|
+
normalize: Whether to normalize features
|
96
|
+
test_size: Proportion of dataset for testing
|
97
|
+
random_state: Random seed for reproducibility
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
Tuple of (X_train, X_test, y_train, y_test, metadata)
|
101
|
+
|
102
|
+
"""
|
103
|
+
# Load dataset
|
104
|
+
wine = datasets.load_wine()
|
105
|
+
X, y = wine.data, wine.target
|
106
|
+
|
107
|
+
# Feature selection based on variance
|
108
|
+
if n_features is not None and n_features < X.shape[1]:
|
109
|
+
feature_vars = np.var(X, axis=0)
|
110
|
+
selected_features = np.argsort(feature_vars)[-n_features:]
|
111
|
+
X = X[:, selected_features]
|
112
|
+
feature_names = [wine.feature_names[i] for i in selected_features]
|
113
|
+
else:
|
114
|
+
feature_names = wine.feature_names
|
115
|
+
n_features = X.shape[1]
|
116
|
+
|
117
|
+
# Normalization
|
118
|
+
if normalize:
|
119
|
+
if encoding == 'amplitude':
|
120
|
+
X = normalize_quantum_data(X, method='l2')
|
121
|
+
elif encoding == 'angle':
|
122
|
+
scaler = MinMaxScaler(feature_range=(0, 2*np.pi))
|
123
|
+
X = scaler.fit_transform(X)
|
124
|
+
else:
|
125
|
+
scaler = StandardScaler()
|
126
|
+
X = scaler.fit_transform(X)
|
127
|
+
|
128
|
+
# Train-test split
|
129
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
130
|
+
X, y, test_size=test_size, random_state=random_state, stratify=y
|
131
|
+
)
|
132
|
+
|
133
|
+
metadata = {
|
134
|
+
'dataset_name': 'wine',
|
135
|
+
'n_samples': len(X),
|
136
|
+
'n_features': n_features,
|
137
|
+
'n_classes': len(np.unique(y)),
|
138
|
+
'class_names': wine.target_names.tolist(),
|
139
|
+
'feature_names': feature_names,
|
140
|
+
'encoding': encoding,
|
141
|
+
'normalized': normalize
|
142
|
+
}
|
143
|
+
|
144
|
+
return X_train, X_test, y_train, y_test, metadata
|
145
|
+
|
146
|
+
|
147
|
+
def load_digits_quantum(
|
148
|
+
n_classes: int = 10,
|
149
|
+
n_pixels: Optional[int] = 32,
|
150
|
+
encoding: str = 'amplitude',
|
151
|
+
normalize: bool = True,
|
152
|
+
test_size: float = 0.2,
|
153
|
+
random_state: Optional[int] = 42
|
154
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
|
155
|
+
"""Load and preprocess the Digits dataset for quantum machine learning.
|
156
|
+
|
157
|
+
Args:
|
158
|
+
n_classes: Number of digit classes to include (2-10)
|
159
|
+
n_pixels: Number of pixels to keep (reduces from 64)
|
160
|
+
encoding: Type of quantum encoding
|
161
|
+
normalize: Whether to normalize features
|
162
|
+
test_size: Proportion of dataset for testing
|
163
|
+
random_state: Random seed for reproducibility
|
164
|
+
|
165
|
+
Returns:
|
166
|
+
Tuple of (X_train, X_test, y_train, y_test, metadata)
|
167
|
+
|
168
|
+
"""
|
169
|
+
# Load dataset
|
170
|
+
digits = datasets.load_digits()
|
171
|
+
X, y = digits.data, digits.target
|
172
|
+
|
173
|
+
# Class filtering
|
174
|
+
if n_classes < 10:
|
175
|
+
mask = y < n_classes
|
176
|
+
X, y = X[mask], y[mask]
|
177
|
+
|
178
|
+
# Feature selection (pixel reduction)
|
179
|
+
if n_pixels is not None and n_pixels < X.shape[1]:
|
180
|
+
# Select pixels with highest variance
|
181
|
+
pixel_vars = np.var(X, axis=0)
|
182
|
+
selected_pixels = np.argsort(pixel_vars)[-n_pixels:]
|
183
|
+
X = X[:, selected_pixels]
|
184
|
+
else:
|
185
|
+
n_pixels = X.shape[1]
|
186
|
+
|
187
|
+
# Normalization
|
188
|
+
if normalize:
|
189
|
+
if encoding == 'amplitude':
|
190
|
+
X = normalize_quantum_data(X, method='l2')
|
191
|
+
elif encoding == 'angle':
|
192
|
+
scaler = MinMaxScaler(feature_range=(0, 2*np.pi))
|
193
|
+
X = scaler.fit_transform(X)
|
194
|
+
else:
|
195
|
+
scaler = StandardScaler()
|
196
|
+
X = scaler.fit_transform(X)
|
197
|
+
|
198
|
+
# Train-test split
|
199
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
200
|
+
X, y, test_size=test_size, random_state=random_state, stratify=y
|
201
|
+
)
|
202
|
+
|
203
|
+
metadata = {
|
204
|
+
'dataset_name': 'digits',
|
205
|
+
'n_samples': len(X),
|
206
|
+
'n_features': n_pixels,
|
207
|
+
'n_classes': n_classes,
|
208
|
+
'original_shape': (8, 8),
|
209
|
+
'encoding': encoding,
|
210
|
+
'normalized': normalize
|
211
|
+
}
|
212
|
+
|
213
|
+
return X_train, X_test, y_train, y_test, metadata
|
214
|
+
|
215
|
+
|
216
|
+
def load_breast_cancer_quantum(
|
217
|
+
n_features: Optional[int] = 16,
|
218
|
+
encoding: str = 'amplitude',
|
219
|
+
normalize: bool = True,
|
220
|
+
test_size: float = 0.2,
|
221
|
+
random_state: Optional[int] = 42
|
222
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
|
223
|
+
"""Load and preprocess the Breast Cancer dataset for quantum machine learning.
|
224
|
+
|
225
|
+
Args:
|
226
|
+
n_features: Number of top features to keep
|
227
|
+
encoding: Type of quantum encoding
|
228
|
+
normalize: Whether to normalize features
|
229
|
+
test_size: Proportion of dataset for testing
|
230
|
+
random_state: Random seed for reproducibility
|
231
|
+
|
232
|
+
Returns:
|
233
|
+
Tuple of (X_train, X_test, y_train, y_test, metadata)
|
234
|
+
|
235
|
+
"""
|
236
|
+
# Load dataset
|
237
|
+
cancer = datasets.load_breast_cancer()
|
238
|
+
X, y = cancer.data, cancer.target
|
239
|
+
|
240
|
+
# Feature selection based on correlation with target
|
241
|
+
if n_features is not None and n_features < X.shape[1]:
|
242
|
+
correlations = np.abs([np.corrcoef(X[:, i], y)[0, 1] for i in range(X.shape[1])])
|
243
|
+
selected_features = np.argsort(correlations)[-n_features:]
|
244
|
+
X = X[:, selected_features]
|
245
|
+
feature_names = [cancer.feature_names[i] for i in selected_features]
|
246
|
+
else:
|
247
|
+
feature_names = cancer.feature_names
|
248
|
+
n_features = X.shape[1]
|
249
|
+
|
250
|
+
# Normalization
|
251
|
+
if normalize:
|
252
|
+
if encoding == 'amplitude':
|
253
|
+
X = normalize_quantum_data(X, method='l2')
|
254
|
+
elif encoding == 'angle':
|
255
|
+
scaler = MinMaxScaler(feature_range=(0, 2*np.pi))
|
256
|
+
X = scaler.fit_transform(X)
|
257
|
+
else:
|
258
|
+
scaler = StandardScaler()
|
259
|
+
X = scaler.fit_transform(X)
|
260
|
+
|
261
|
+
# Train-test split
|
262
|
+
X_train, X_test, y_train, y_test = train_test_split(
|
263
|
+
X, y, test_size=test_size, random_state=random_state, stratify=y
|
264
|
+
)
|
265
|
+
|
266
|
+
metadata = {
|
267
|
+
'dataset_name': 'breast_cancer',
|
268
|
+
'n_samples': len(X),
|
269
|
+
'n_features': n_features,
|
270
|
+
'n_classes': 2,
|
271
|
+
'class_names': cancer.target_names.tolist(),
|
272
|
+
'feature_names': feature_names,
|
273
|
+
'encoding': encoding,
|
274
|
+
'normalized': normalize
|
275
|
+
}
|
276
|
+
|
277
|
+
return X_train, X_test, y_train, y_test, metadata
|
@@ -0,0 +1,300 @@
|
|
1
|
+
"""Synthetic data generators for quantum machine learning.
|
2
|
+
|
3
|
+
This module provides functions to generate synthetic datasets specifically
|
4
|
+
designed for testing quantum algorithms and benchmarking performance.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from typing import Any, Dict, Optional, Tuple
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
from sklearn.datasets import make_blobs, make_classification, make_regression
|
11
|
+
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
12
|
+
|
13
|
+
from .preprocessing import normalize_quantum_data
|
14
|
+
|
15
|
+
|
16
|
+
def generate_classification_data(
|
17
|
+
n_samples: int = 200,
|
18
|
+
n_features: int = 4,
|
19
|
+
n_classes: int = 2,
|
20
|
+
n_redundant: int = 0,
|
21
|
+
n_informative: Optional[int] = None,
|
22
|
+
class_sep: float = 1.0,
|
23
|
+
encoding: str = 'amplitude',
|
24
|
+
normalize: bool = True,
|
25
|
+
test_size: float = 0.2,
|
26
|
+
random_state: Optional[int] = 42
|
27
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
|
28
|
+
"""Generate synthetic classification data for quantum machine learning.
|
29
|
+
|
30
|
+
Args:
|
31
|
+
n_samples: Number of samples to generate
|
32
|
+
n_features: Number of features (should be power of 2 for quantum efficiency)
|
33
|
+
n_classes: Number of classes
|
34
|
+
n_redundant: Number of redundant features
|
35
|
+
n_informative: Number of informative features (default: n_features)
|
36
|
+
class_sep: Class separation factor
|
37
|
+
encoding: Type of quantum encoding
|
38
|
+
normalize: Whether to normalize features
|
39
|
+
test_size: Proportion for test split
|
40
|
+
random_state: Random seed
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
Tuple of (X_train, X_test, y_train, y_test, metadata)
|
44
|
+
|
45
|
+
"""
|
46
|
+
if n_informative is None:
|
47
|
+
n_informative = n_features
|
48
|
+
|
49
|
+
# Generate synthetic data
|
50
|
+
X, y = make_classification(
|
51
|
+
n_samples=n_samples,
|
52
|
+
n_features=n_features,
|
53
|
+
n_informative=n_informative,
|
54
|
+
n_redundant=n_redundant,
|
55
|
+
n_classes=n_classes,
|
56
|
+
class_sep=class_sep,
|
57
|
+
random_state=random_state
|
58
|
+
)
|
59
|
+
|
60
|
+
# Normalization for quantum encoding
|
61
|
+
if normalize:
|
62
|
+
if encoding == 'amplitude':
|
63
|
+
X = normalize_quantum_data(X, method='l2')
|
64
|
+
elif encoding == 'angle':
|
65
|
+
scaler = MinMaxScaler(feature_range=(0, 2*np.pi))
|
66
|
+
X = scaler.fit_transform(X)
|
67
|
+
else:
|
68
|
+
scaler = StandardScaler()
|
69
|
+
X = scaler.fit_transform(X)
|
70
|
+
|
71
|
+
# Train-test split
|
72
|
+
n_train = int(n_samples * (1 - test_size))
|
73
|
+
indices = np.random.RandomState(random_state).permutation(n_samples)
|
74
|
+
|
75
|
+
train_idx = indices[:n_train]
|
76
|
+
test_idx = indices[n_train:]
|
77
|
+
|
78
|
+
X_train, X_test = X[train_idx], X[test_idx]
|
79
|
+
y_train, y_test = y[train_idx], y[test_idx]
|
80
|
+
|
81
|
+
metadata = {
|
82
|
+
'dataset_type': 'synthetic_classification',
|
83
|
+
'n_samples': n_samples,
|
84
|
+
'n_features': n_features,
|
85
|
+
'n_classes': n_classes,
|
86
|
+
'n_informative': n_informative,
|
87
|
+
'n_redundant': n_redundant,
|
88
|
+
'class_sep': class_sep,
|
89
|
+
'encoding': encoding,
|
90
|
+
'normalized': normalize
|
91
|
+
}
|
92
|
+
|
93
|
+
return X_train, X_test, y_train, y_test, metadata
|
94
|
+
|
95
|
+
|
96
|
+
def generate_regression_data(
|
97
|
+
n_samples: int = 200,
|
98
|
+
n_features: int = 4,
|
99
|
+
n_informative: Optional[int] = None,
|
100
|
+
noise: float = 0.1,
|
101
|
+
encoding: str = 'amplitude',
|
102
|
+
normalize: bool = True,
|
103
|
+
test_size: float = 0.2,
|
104
|
+
random_state: Optional[int] = 42
|
105
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
|
106
|
+
"""Generate synthetic regression data for quantum machine learning.
|
107
|
+
|
108
|
+
Args:
|
109
|
+
n_samples: Number of samples to generate
|
110
|
+
n_features: Number of features
|
111
|
+
n_informative: Number of informative features
|
112
|
+
noise: Noise level in target
|
113
|
+
encoding: Type of quantum encoding
|
114
|
+
normalize: Whether to normalize features
|
115
|
+
test_size: Proportion for test split
|
116
|
+
random_state: Random seed
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
Tuple of (X_train, X_test, y_train, y_test, metadata)
|
120
|
+
|
121
|
+
"""
|
122
|
+
if n_informative is None:
|
123
|
+
n_informative = n_features
|
124
|
+
|
125
|
+
# Generate synthetic regression data
|
126
|
+
X, y = make_regression(
|
127
|
+
n_samples=n_samples,
|
128
|
+
n_features=n_features,
|
129
|
+
n_informative=n_informative,
|
130
|
+
noise=noise,
|
131
|
+
random_state=random_state
|
132
|
+
)
|
133
|
+
|
134
|
+
# Normalization
|
135
|
+
if normalize:
|
136
|
+
if encoding == 'amplitude':
|
137
|
+
X = normalize_quantum_data(X, method='l2')
|
138
|
+
elif encoding == 'angle':
|
139
|
+
scaler = MinMaxScaler(feature_range=(0, 2*np.pi))
|
140
|
+
X = scaler.fit_transform(X)
|
141
|
+
else:
|
142
|
+
scaler = StandardScaler()
|
143
|
+
X = scaler.fit_transform(X)
|
144
|
+
|
145
|
+
# Normalize targets
|
146
|
+
y = (y - np.mean(y)) / np.std(y)
|
147
|
+
|
148
|
+
# Train-test split
|
149
|
+
n_train = int(n_samples * (1 - test_size))
|
150
|
+
indices = np.random.RandomState(random_state).permutation(n_samples)
|
151
|
+
|
152
|
+
train_idx = indices[:n_train]
|
153
|
+
test_idx = indices[n_train:]
|
154
|
+
|
155
|
+
X_train, X_test = X[train_idx], X[test_idx]
|
156
|
+
y_train, y_test = y[train_idx], y[test_idx]
|
157
|
+
|
158
|
+
metadata = {
|
159
|
+
'dataset_type': 'synthetic_regression',
|
160
|
+
'n_samples': n_samples,
|
161
|
+
'n_features': n_features,
|
162
|
+
'n_informative': n_informative,
|
163
|
+
'noise': noise,
|
164
|
+
'encoding': encoding,
|
165
|
+
'normalized': normalize
|
166
|
+
}
|
167
|
+
|
168
|
+
return X_train, X_test, y_train, y_test, metadata
|
169
|
+
|
170
|
+
|
171
|
+
def generate_clustering_data(
|
172
|
+
n_samples: int = 200,
|
173
|
+
n_features: int = 4,
|
174
|
+
n_clusters: int = 3,
|
175
|
+
cluster_std: float = 1.0,
|
176
|
+
center_box: Tuple[float, float] = (-10., 10.),
|
177
|
+
encoding: str = 'amplitude',
|
178
|
+
normalize: bool = True,
|
179
|
+
random_state: Optional[int] = 42
|
180
|
+
) -> Tuple[np.ndarray, np.ndarray, Dict[str, Any]]:
|
181
|
+
"""Generate synthetic clustering data for quantum machine learning.
|
182
|
+
|
183
|
+
Args:
|
184
|
+
n_samples: Number of samples to generate
|
185
|
+
n_features: Number of features
|
186
|
+
n_clusters: Number of clusters
|
187
|
+
cluster_std: Standard deviation of clusters
|
188
|
+
center_box: Bounding box for cluster centers
|
189
|
+
encoding: Type of quantum encoding
|
190
|
+
normalize: Whether to normalize features
|
191
|
+
random_state: Random seed
|
192
|
+
|
193
|
+
Returns:
|
194
|
+
Tuple of (X, y_true, metadata)
|
195
|
+
|
196
|
+
"""
|
197
|
+
# Generate clustering data
|
198
|
+
X, y_true = make_blobs(
|
199
|
+
n_samples=n_samples,
|
200
|
+
n_features=n_features,
|
201
|
+
centers=n_clusters,
|
202
|
+
cluster_std=cluster_std,
|
203
|
+
center_box=center_box,
|
204
|
+
random_state=random_state
|
205
|
+
)
|
206
|
+
|
207
|
+
# Normalization
|
208
|
+
if normalize:
|
209
|
+
if encoding == 'amplitude':
|
210
|
+
X = normalize_quantum_data(X, method='l2')
|
211
|
+
elif encoding == 'angle':
|
212
|
+
scaler = MinMaxScaler(feature_range=(0, 2*np.pi))
|
213
|
+
X = scaler.fit_transform(X)
|
214
|
+
else:
|
215
|
+
scaler = StandardScaler()
|
216
|
+
X = scaler.fit_transform(X)
|
217
|
+
|
218
|
+
metadata = {
|
219
|
+
'dataset_type': 'synthetic_clustering',
|
220
|
+
'n_samples': n_samples,
|
221
|
+
'n_features': n_features,
|
222
|
+
'n_clusters': n_clusters,
|
223
|
+
'cluster_std': cluster_std,
|
224
|
+
'encoding': encoding,
|
225
|
+
'normalized': normalize
|
226
|
+
}
|
227
|
+
|
228
|
+
return X, y_true, metadata
|
229
|
+
|
230
|
+
|
231
|
+
def generate_portfolio_data(
|
232
|
+
n_assets: int = 8,
|
233
|
+
n_scenarios: int = 100,
|
234
|
+
risk_level: float = 0.2,
|
235
|
+
correlation: float = 0.3,
|
236
|
+
normalize: bool = True,
|
237
|
+
random_state: Optional[int] = 42
|
238
|
+
) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict[str, Any]]:
|
239
|
+
"""Generate synthetic portfolio optimization data for quantum finance algorithms.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
n_assets: Number of assets in portfolio
|
243
|
+
n_scenarios: Number of return scenarios
|
244
|
+
risk_level: Overall risk level (volatility)
|
245
|
+
correlation: Average correlation between assets
|
246
|
+
normalize: Whether to normalize returns
|
247
|
+
random_state: Random seed
|
248
|
+
|
249
|
+
Returns:
|
250
|
+
Tuple of (returns, covariance_matrix, expected_returns, metadata)
|
251
|
+
|
252
|
+
"""
|
253
|
+
np.random.seed(random_state)
|
254
|
+
|
255
|
+
# Generate expected returns
|
256
|
+
expected_returns = np.random.uniform(0.05, 0.20, n_assets)
|
257
|
+
|
258
|
+
# Generate correlation matrix
|
259
|
+
correlations = np.full((n_assets, n_assets), correlation)
|
260
|
+
np.fill_diagonal(correlations, 1.0)
|
261
|
+
|
262
|
+
# Add some randomness to correlations
|
263
|
+
noise = np.random.uniform(-0.1, 0.1, (n_assets, n_assets))
|
264
|
+
correlations += (noise + noise.T) / 2
|
265
|
+
np.fill_diagonal(correlations, 1.0)
|
266
|
+
|
267
|
+
# Ensure positive definite
|
268
|
+
correlations = np.maximum(correlations, -0.99)
|
269
|
+
correlations = np.minimum(correlations, 0.99)
|
270
|
+
|
271
|
+
# Generate volatilities
|
272
|
+
volatilities = np.random.uniform(
|
273
|
+
risk_level * 0.5,
|
274
|
+
risk_level * 1.5,
|
275
|
+
n_assets
|
276
|
+
)
|
277
|
+
|
278
|
+
# Create covariance matrix
|
279
|
+
covariance_matrix = np.outer(volatilities, volatilities) * correlations
|
280
|
+
|
281
|
+
# Generate return scenarios
|
282
|
+
returns = np.random.multivariate_normal(
|
283
|
+
expected_returns,
|
284
|
+
covariance_matrix,
|
285
|
+
n_scenarios
|
286
|
+
)
|
287
|
+
|
288
|
+
if normalize:
|
289
|
+
returns = normalize_quantum_data(returns, method='l2')
|
290
|
+
|
291
|
+
metadata = {
|
292
|
+
'dataset_type': 'portfolio_optimization',
|
293
|
+
'n_assets': n_assets,
|
294
|
+
'n_scenarios': n_scenarios,
|
295
|
+
'risk_level': risk_level,
|
296
|
+
'avg_correlation': correlation,
|
297
|
+
'normalized': normalize
|
298
|
+
}
|
299
|
+
|
300
|
+
return returns, covariance_matrix, expected_returns, metadata
|