spotoptim 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spotoptim/.DS_Store +0 -0
- spotoptim/SpotOptim.py +382 -0
- spotoptim/__init__.py +11 -0
- spotoptim/py.typed +0 -0
- spotoptim/surrogate/README.md +149 -0
- spotoptim/surrogate/__init__.py +5 -0
- spotoptim/surrogate/kriging.py +360 -0
- spotoptim-0.0.5.dist-info/METADATA +150 -0
- spotoptim-0.0.5.dist-info/RECORD +10 -0
- spotoptim-0.0.5.dist-info/WHEEL +4 -0
spotoptim/.DS_Store
ADDED
|
Binary file
|
spotoptim/SpotOptim.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from typing import Callable, Optional, Tuple, List
|
|
3
|
+
from scipy.optimize import OptimizeResult, differential_evolution
|
|
4
|
+
from scipy.stats.qmc import LatinHypercube
|
|
5
|
+
from sklearn.base import BaseEstimator
|
|
6
|
+
from sklearn.gaussian_process import GaussianProcessRegressor
|
|
7
|
+
from sklearn.gaussian_process.kernels import Matern, ConstantKernel
|
|
8
|
+
import warnings
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SpotOptim(BaseEstimator):
|
|
12
|
+
"""
|
|
13
|
+
SPOT optimizer compatible with scipy.optimize interface.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
fun : callable
|
|
18
|
+
Objective function to minimize. Should accept array of shape (n_samples, n_features).
|
|
19
|
+
bounds : list of tuple
|
|
20
|
+
Bounds for each dimension as [(low, high), ...].
|
|
21
|
+
max_iter : int, default=20
|
|
22
|
+
Maximum number of optimization iterations.
|
|
23
|
+
n_initial : int, default=10
|
|
24
|
+
Number of initial design points.
|
|
25
|
+
surrogate : object, optional
|
|
26
|
+
Surrogate model (default: Gaussian Process with Matern kernel).
|
|
27
|
+
acquisition : str, default='ei'
|
|
28
|
+
Acquisition function ('ei', 'y', 'pi').
|
|
29
|
+
var_type : list of str, optional
|
|
30
|
+
Variable types for each dimension ('num', 'int', 'float', 'factor').
|
|
31
|
+
tolerance_x : float, default=1e-6
|
|
32
|
+
Minimum distance between points.
|
|
33
|
+
seed : int, optional
|
|
34
|
+
Random seed for reproducibility.
|
|
35
|
+
verbose : bool, default=False
|
|
36
|
+
Print progress information.
|
|
37
|
+
warnings_filter : str, default="ignore". Filter for warnings. One of "error", "ignore", "always", "all", "default", "module", or "once".
|
|
38
|
+
|
|
39
|
+
Attributes
|
|
40
|
+
----------
|
|
41
|
+
X_ : ndarray of shape (n_samples, n_features)
|
|
42
|
+
All evaluated points.
|
|
43
|
+
y_ : ndarray of shape (n_samples,)
|
|
44
|
+
Function values at X_.
|
|
45
|
+
best_x_ : ndarray of shape (n_features,)
|
|
46
|
+
Best point found.
|
|
47
|
+
best_y_ : float
|
|
48
|
+
Best function value found.
|
|
49
|
+
n_iter_ : int
|
|
50
|
+
Number of iterations performed.
|
|
51
|
+
warnings_filter : str
|
|
52
|
+
Filter for warnings during optimization.
|
|
53
|
+
|
|
54
|
+
Examples
|
|
55
|
+
--------
|
|
56
|
+
>>> def objective(X):
|
|
57
|
+
... return np.sum(X**2, axis=1)
|
|
58
|
+
...
|
|
59
|
+
>>> bounds = [(-5, 5), (-5, 5)]
|
|
60
|
+
>>> optimizer = SpotOptim(fun=objective, bounds=bounds, max_iter=10, n_initial=5, verbose=True)
|
|
61
|
+
>>> result = optimizer.optimize()
|
|
62
|
+
>>> print("Best x:", result.x)
|
|
63
|
+
>>> print("Best f(x):", result.fun)
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
fun: Callable,
|
|
69
|
+
bounds: list,
|
|
70
|
+
max_iter: int = 20,
|
|
71
|
+
n_initial: int = 10,
|
|
72
|
+
surrogate: Optional[object] = None,
|
|
73
|
+
acquisition: str = "ei",
|
|
74
|
+
var_type: Optional[list] = None,
|
|
75
|
+
tolerance_x: Optional[float] = None,
|
|
76
|
+
seed: Optional[int] = None,
|
|
77
|
+
verbose: bool = False,
|
|
78
|
+
warnings_filter: str = "ignore",
|
|
79
|
+
):
|
|
80
|
+
|
|
81
|
+
warnings.filterwarnings(warnings_filter)
|
|
82
|
+
|
|
83
|
+
# small value, converted to float
|
|
84
|
+
self.eps = np.sqrt(np.spacing(1))
|
|
85
|
+
|
|
86
|
+
if tolerance_x is None:
|
|
87
|
+
self.tolerance_x = self.eps
|
|
88
|
+
else:
|
|
89
|
+
self.tolerance_x = tolerance_x
|
|
90
|
+
|
|
91
|
+
self.fun = fun
|
|
92
|
+
self.bounds = bounds
|
|
93
|
+
self.max_iter = max_iter
|
|
94
|
+
self.n_initial = n_initial
|
|
95
|
+
self.surrogate = surrogate
|
|
96
|
+
self.acquisition = acquisition
|
|
97
|
+
self.var_type = var_type
|
|
98
|
+
self.seed = seed
|
|
99
|
+
self.verbose = verbose
|
|
100
|
+
|
|
101
|
+
# Derived attributes
|
|
102
|
+
self.n_dim = len(bounds)
|
|
103
|
+
self.lower = np.array([b[0] for b in bounds])
|
|
104
|
+
self.upper = np.array([b[1] for b in bounds])
|
|
105
|
+
|
|
106
|
+
# Default variable types
|
|
107
|
+
if self.var_type is None:
|
|
108
|
+
self.var_type = ["num"] * self.n_dim
|
|
109
|
+
|
|
110
|
+
# Initialize surrogate if not provided
|
|
111
|
+
if self.surrogate is None:
|
|
112
|
+
kernel = ConstantKernel(1.0, (1e-3, 1e3)) * Matern(
|
|
113
|
+
length_scale=1.0, length_scale_bounds=(1e-2, 1e2), nu=2.5
|
|
114
|
+
)
|
|
115
|
+
self.surrogate = GaussianProcessRegressor(
|
|
116
|
+
kernel=kernel,
|
|
117
|
+
n_restarts_optimizer=10,
|
|
118
|
+
normalize_y=True,
|
|
119
|
+
random_state=self.seed,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Design generator
|
|
123
|
+
self.lhs_sampler = LatinHypercube(d=self.n_dim, seed=self.seed)
|
|
124
|
+
|
|
125
|
+
# Storage for results
|
|
126
|
+
self.X_ = None
|
|
127
|
+
self.y_ = None
|
|
128
|
+
self.best_x_ = None
|
|
129
|
+
self.best_y_ = None
|
|
130
|
+
self.n_iter_ = 0
|
|
131
|
+
|
|
132
|
+
def _evaluate_function(self, X: np.ndarray) -> np.ndarray:
|
|
133
|
+
"""Evaluate objective function at points X."""
|
|
134
|
+
# Ensure X is 2D
|
|
135
|
+
X = np.atleast_2d(X)
|
|
136
|
+
|
|
137
|
+
# Evaluate function
|
|
138
|
+
y = self.fun(X)
|
|
139
|
+
|
|
140
|
+
# Ensure y is 1D
|
|
141
|
+
if isinstance(y, np.ndarray) and y.ndim > 1:
|
|
142
|
+
y = y.ravel()
|
|
143
|
+
elif not isinstance(y, np.ndarray):
|
|
144
|
+
y = np.array([y])
|
|
145
|
+
|
|
146
|
+
return y
|
|
147
|
+
|
|
148
|
+
def _generate_initial_design(self) -> np.ndarray:
|
|
149
|
+
"""Generate initial space-filling design using Latin Hypercube Sampling."""
|
|
150
|
+
# Generate samples in [0, 1]^d
|
|
151
|
+
X0_unit = self.lhs_sampler.random(n=self.n_initial)
|
|
152
|
+
|
|
153
|
+
# Scale to [lower, upper]
|
|
154
|
+
X0 = self.lower + X0_unit * (self.upper - self.lower)
|
|
155
|
+
|
|
156
|
+
return self._repair_non_numeric(X0, self.var_type)
|
|
157
|
+
|
|
158
|
+
def _fit_surrogate(self, X: np.ndarray, y: np.ndarray) -> None:
|
|
159
|
+
"""Fit surrogate model to data."""
|
|
160
|
+
self.surrogate.fit(X, y)
|
|
161
|
+
|
|
162
|
+
def _select_new(
|
|
163
|
+
self, A: np.ndarray, X: np.ndarray, tolerance: float = 0
|
|
164
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
165
|
+
"""
|
|
166
|
+
Select rows from A that are not in X.
|
|
167
|
+
|
|
168
|
+
Parameters
|
|
169
|
+
----------
|
|
170
|
+
A : ndarray
|
|
171
|
+
Array with new values.
|
|
172
|
+
X : ndarray
|
|
173
|
+
Array with known values.
|
|
174
|
+
tolerance : float, default=0
|
|
175
|
+
Tolerance value for comparison.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
ndarray
|
|
180
|
+
Array with unknown (new) values.
|
|
181
|
+
ndarray
|
|
182
|
+
Array with True if value is new, otherwise False.
|
|
183
|
+
"""
|
|
184
|
+
B = np.abs(A[:, None] - X)
|
|
185
|
+
ind = np.any(np.all(B <= tolerance, axis=2), axis=1)
|
|
186
|
+
return A[~ind], ~ind
|
|
187
|
+
|
|
188
|
+
def _repair_non_numeric(self, X: np.ndarray, var_type: List[str]) -> np.ndarray:
|
|
189
|
+
"""
|
|
190
|
+
Round non-numeric values to integers.
|
|
191
|
+
This applies to all variables except for "num" and "float".
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
X : ndarray
|
|
196
|
+
X array.
|
|
197
|
+
var_type : list of str
|
|
198
|
+
List with type information.
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
ndarray
|
|
203
|
+
X array with non-numeric values rounded to integers.
|
|
204
|
+
"""
|
|
205
|
+
mask = np.isin(var_type, ["num", "float"], invert=True)
|
|
206
|
+
X[:, mask] = np.around(X[:, mask])
|
|
207
|
+
return X
|
|
208
|
+
|
|
209
|
+
def _acquisition_function(self, x: np.ndarray) -> float:
|
|
210
|
+
"""
|
|
211
|
+
Compute acquisition function value.
|
|
212
|
+
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
x : ndarray of shape (n_features,)
|
|
216
|
+
Point to evaluate.
|
|
217
|
+
|
|
218
|
+
Returns
|
|
219
|
+
-------
|
|
220
|
+
float
|
|
221
|
+
Acquisition function value (to be minimized).
|
|
222
|
+
"""
|
|
223
|
+
x = x.reshape(1, -1)
|
|
224
|
+
|
|
225
|
+
if self.acquisition == "y":
|
|
226
|
+
# Predicted mean
|
|
227
|
+
return self.surrogate.predict(x)[0]
|
|
228
|
+
|
|
229
|
+
elif self.acquisition == "ei":
|
|
230
|
+
# Expected Improvement
|
|
231
|
+
mu, sigma = self.surrogate.predict(x, return_std=True)
|
|
232
|
+
mu = mu[0]
|
|
233
|
+
sigma = sigma[0]
|
|
234
|
+
|
|
235
|
+
if sigma < 1e-10:
|
|
236
|
+
return 0.0
|
|
237
|
+
|
|
238
|
+
y_best = np.min(self.y_)
|
|
239
|
+
improvement = y_best - mu
|
|
240
|
+
Z = improvement / sigma
|
|
241
|
+
|
|
242
|
+
from scipy.stats import norm
|
|
243
|
+
|
|
244
|
+
ei = improvement * norm.cdf(Z) + sigma * norm.pdf(Z)
|
|
245
|
+
return -ei # Minimize negative EI
|
|
246
|
+
|
|
247
|
+
elif self.acquisition == "pi":
|
|
248
|
+
# Probability of Improvement
|
|
249
|
+
mu, sigma = self.surrogate.predict(x, return_std=True)
|
|
250
|
+
mu = mu[0]
|
|
251
|
+
sigma = sigma[0]
|
|
252
|
+
|
|
253
|
+
if sigma < 1e-10:
|
|
254
|
+
return 0.0
|
|
255
|
+
|
|
256
|
+
y_best = np.min(self.y_)
|
|
257
|
+
Z = (y_best - mu) / sigma
|
|
258
|
+
|
|
259
|
+
from scipy.stats import norm
|
|
260
|
+
|
|
261
|
+
pi = norm.cdf(Z)
|
|
262
|
+
return -pi # Minimize negative PI
|
|
263
|
+
|
|
264
|
+
else:
|
|
265
|
+
raise ValueError(f"Unknown acquisition function: {self.acquisition}")
|
|
266
|
+
|
|
267
|
+
def _suggest_next_point(self) -> np.ndarray:
|
|
268
|
+
"""
|
|
269
|
+
Suggest next point to evaluate using acquisition function optimization.
|
|
270
|
+
|
|
271
|
+
Returns
|
|
272
|
+
-------
|
|
273
|
+
ndarray of shape (n_features,)
|
|
274
|
+
Next point to evaluate.
|
|
275
|
+
"""
|
|
276
|
+
result = differential_evolution(
|
|
277
|
+
func=self._acquisition_function,
|
|
278
|
+
bounds=self.bounds,
|
|
279
|
+
seed=self.seed,
|
|
280
|
+
maxiter=1000,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
x_next = result.x
|
|
284
|
+
|
|
285
|
+
# Ensure minimum distance to existing points
|
|
286
|
+
x_next_2d = x_next.reshape(1, -1)
|
|
287
|
+
x_new, _ = self._select_new(A=x_next_2d, X=self.X_, tolerance=self.tolerance_x)
|
|
288
|
+
|
|
289
|
+
if x_new.shape[0] == 0:
|
|
290
|
+
# If too close, generate random point
|
|
291
|
+
if self.verbose:
|
|
292
|
+
print("Proposed point too close, generating random point")
|
|
293
|
+
# Generate a random point using LHS
|
|
294
|
+
x_next_unit = self.lhs_sampler.random(n=1)[0]
|
|
295
|
+
x_next = self.lower + x_next_unit * (self.upper - self.lower)
|
|
296
|
+
|
|
297
|
+
return self._repair_non_numeric(x_next.reshape(1, -1), self.var_type)[0]
|
|
298
|
+
|
|
299
|
+
def optimize(self, X0: Optional[np.ndarray] = None) -> OptimizeResult:
|
|
300
|
+
"""
|
|
301
|
+
Run the optimization process.
|
|
302
|
+
|
|
303
|
+
Parameters
|
|
304
|
+
----------
|
|
305
|
+
X0 : ndarray of shape (n_initial, n_features), optional
|
|
306
|
+
Initial design points. If None, generates space-filling design.
|
|
307
|
+
|
|
308
|
+
Returns
|
|
309
|
+
-------
|
|
310
|
+
OptimizeResult
|
|
311
|
+
Optimization result with fields:
|
|
312
|
+
- x : best point found
|
|
313
|
+
- fun : best function value
|
|
314
|
+
- nfev : number of function evaluations
|
|
315
|
+
- success : whether optimization succeeded
|
|
316
|
+
- message : termination message
|
|
317
|
+
- X : all evaluated points
|
|
318
|
+
- y : all function values
|
|
319
|
+
"""
|
|
320
|
+
# Generate or use provided initial design
|
|
321
|
+
if X0 is None:
|
|
322
|
+
X0 = self._generate_initial_design()
|
|
323
|
+
else:
|
|
324
|
+
X0 = np.atleast_2d(X0)
|
|
325
|
+
X0 = self._repair_non_numeric(X0, self.var_type)
|
|
326
|
+
|
|
327
|
+
# Evaluate initial design
|
|
328
|
+
y0 = self._evaluate_function(X0)
|
|
329
|
+
|
|
330
|
+
# Initialize storage
|
|
331
|
+
self.X_ = X0.copy()
|
|
332
|
+
self.y_ = y0.copy()
|
|
333
|
+
self.n_iter_ = 0
|
|
334
|
+
|
|
335
|
+
# Initial best
|
|
336
|
+
best_idx = np.argmin(self.y_)
|
|
337
|
+
self.best_x_ = self.X_[best_idx].copy()
|
|
338
|
+
self.best_y_ = self.y_[best_idx]
|
|
339
|
+
|
|
340
|
+
if self.verbose:
|
|
341
|
+
print(f"Initial best: f(x) = {self.best_y_:.6f}")
|
|
342
|
+
|
|
343
|
+
# Main optimization loop
|
|
344
|
+
for iteration in range(self.max_iter):
|
|
345
|
+
self.n_iter_ = iteration + 1
|
|
346
|
+
|
|
347
|
+
# Fit surrogate
|
|
348
|
+
self._fit_surrogate(self.X_, self.y_)
|
|
349
|
+
|
|
350
|
+
# Suggest next point
|
|
351
|
+
x_next = self._suggest_next_point()
|
|
352
|
+
|
|
353
|
+
# Evaluate next point
|
|
354
|
+
y_next = self._evaluate_function(x_next.reshape(1, -1))
|
|
355
|
+
|
|
356
|
+
# Update storage
|
|
357
|
+
self.X_ = np.vstack([self.X_, x_next])
|
|
358
|
+
self.y_ = np.append(self.y_, y_next)
|
|
359
|
+
|
|
360
|
+
# Update best
|
|
361
|
+
if y_next[0] < self.best_y_:
|
|
362
|
+
self.best_x_ = x_next.copy()
|
|
363
|
+
self.best_y_ = y_next[0]
|
|
364
|
+
|
|
365
|
+
if self.verbose:
|
|
366
|
+
print(
|
|
367
|
+
f"Iteration {iteration+1}: New best f(x) = {self.best_y_:.6f}"
|
|
368
|
+
)
|
|
369
|
+
elif self.verbose:
|
|
370
|
+
print(f"Iteration {iteration+1}: f(x) = {y_next[0]:.6f}")
|
|
371
|
+
|
|
372
|
+
# Return scipy-style result
|
|
373
|
+
return OptimizeResult(
|
|
374
|
+
x=self.best_x_,
|
|
375
|
+
fun=self.best_y_,
|
|
376
|
+
nfev=len(self.y_),
|
|
377
|
+
nit=self.n_iter_,
|
|
378
|
+
success=True,
|
|
379
|
+
message="Optimization finished successfully",
|
|
380
|
+
X=self.X_,
|
|
381
|
+
y=self.y_,
|
|
382
|
+
)
|
spotoptim/__init__.py
ADDED
spotoptim/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# SpotOptim Surrogate Models
|
|
2
|
+
|
|
3
|
+
This module provides surrogate models for use with the SpotOptim optimizer.
|
|
4
|
+
|
|
5
|
+
## Kriging Surrogate
|
|
6
|
+
|
|
7
|
+
The `Kriging` class provides a simplified Gaussian Process (Kriging) surrogate model that can be used as an alternative to scikit-learn's `GaussianProcessRegressor`.
|
|
8
|
+
|
|
9
|
+
### Features
|
|
10
|
+
|
|
11
|
+
- **Scikit-learn compatible interface**: Implements `fit()` and `predict()` methods
|
|
12
|
+
- **Automatic hyperparameter optimization**: Uses maximum likelihood estimation
|
|
13
|
+
- **Gaussian (RBF) kernel**: Exponential correlation function
|
|
14
|
+
- **Prediction uncertainty**: Supports `return_std=True` for standard deviations
|
|
15
|
+
- **Reproducible**: Supports random seed for consistent results
|
|
16
|
+
|
|
17
|
+
### Basic Usage
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
import numpy as np
|
|
21
|
+
from spotoptim import SpotOptim, Kriging
|
|
22
|
+
|
|
23
|
+
# Define objective function
|
|
24
|
+
def sphere(X):
|
|
25
|
+
X = np.atleast_2d(X)
|
|
26
|
+
return np.sum(X**2, axis=1)
|
|
27
|
+
|
|
28
|
+
# Create Kriging surrogate
|
|
29
|
+
kriging = Kriging(
|
|
30
|
+
noise=1e-6,
|
|
31
|
+
min_theta=-3.0,
|
|
32
|
+
max_theta=2.0,
|
|
33
|
+
seed=42
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Use with SpotOptim
|
|
37
|
+
optimizer = SpotOptim(
|
|
38
|
+
fun=sphere,
|
|
39
|
+
bounds=[(-5, 5), (-5, 5)],
|
|
40
|
+
max_iter=20,
|
|
41
|
+
n_initial=10,
|
|
42
|
+
surrogate=kriging, # Use Kriging surrogate
|
|
43
|
+
seed=42
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
result = optimizer.optimize()
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Parameters
|
|
50
|
+
|
|
51
|
+
- **noise** (float, optional): Regularization parameter (nugget effect). If None, uses sqrt(machine epsilon).
|
|
52
|
+
- **kernel** (str, default='gauss'): Kernel type. Currently only 'gauss' (Gaussian/RBF) is supported.
|
|
53
|
+
- **n_theta** (int, optional): Number of theta parameters. If None, uses k (number of dimensions).
|
|
54
|
+
- **min_theta** (float, default=-3.0): Minimum log10(theta) bound for optimization.
|
|
55
|
+
- **max_theta** (float, default=2.0): Maximum log10(theta) bound for optimization.
|
|
56
|
+
- **seed** (int, optional): Random seed for reproducibility.
|
|
57
|
+
|
|
58
|
+
### Methods
|
|
59
|
+
|
|
60
|
+
#### fit(X, y)
|
|
61
|
+
|
|
62
|
+
Fit the Kriging model to training data.
|
|
63
|
+
|
|
64
|
+
**Parameters:**
|
|
65
|
+
- `X`: ndarray of shape (n_samples, n_features) - Training input data
|
|
66
|
+
- `y`: ndarray of shape (n_samples,) - Training target values
|
|
67
|
+
|
|
68
|
+
**Returns:**
|
|
69
|
+
- `self`: Fitted estimator
|
|
70
|
+
|
|
71
|
+
#### predict(X, return_std=False)
|
|
72
|
+
|
|
73
|
+
Predict using the Kriging model.
|
|
74
|
+
|
|
75
|
+
**Parameters:**
|
|
76
|
+
- `X`: ndarray of shape (n_samples, n_features) - Points to predict at
|
|
77
|
+
- `return_std`: bool, default=False - If True, return standard deviations as well
|
|
78
|
+
|
|
79
|
+
**Returns:**
|
|
80
|
+
- `y_pred`: ndarray of shape (n_samples,) - Predicted values
|
|
81
|
+
- `y_std`: ndarray of shape (n_samples,) - Standard deviations (only if return_std=True)
|
|
82
|
+
|
|
83
|
+
### Comparison with scikit-learn's GaussianProcessRegressor
|
|
84
|
+
|
|
85
|
+
| Feature | Kriging | GaussianProcessRegressor |
|
|
86
|
+
|---------|---------|--------------------------|
|
|
87
|
+
| Interface | scikit-learn compatible | Native scikit-learn |
|
|
88
|
+
| Kernel | Gaussian (RBF) | Multiple kernel types |
|
|
89
|
+
| Hyperparameters | Theta (length scales) | Flexible kernel parameters |
|
|
90
|
+
| Dependencies | NumPy, SciPy | NumPy, SciPy, scikit-learn |
|
|
91
|
+
| Optimization | Differential evolution | L-BFGS-B with restarts |
|
|
92
|
+
| Code complexity | Simplified, ~350 lines | Full-featured, complex |
|
|
93
|
+
|
|
94
|
+
### When to Use Kriging
|
|
95
|
+
|
|
96
|
+
- You want a **self-contained** surrogate without heavy scikit-learn dependency
|
|
97
|
+
- You need a **simple, interpretable** Gaussian kernel
|
|
98
|
+
- You want **explicit control** over hyperparameter bounds
|
|
99
|
+
- You're working with **moderate-dimensional** problems (< 20 dimensions)
|
|
100
|
+
|
|
101
|
+
### When to Use Default GP
|
|
102
|
+
|
|
103
|
+
- You need **multiple kernel types** (Matern, RationalQuadratic, etc.)
|
|
104
|
+
- You want **advanced features** like gradient-based predictions
|
|
105
|
+
- You're working with **very high-dimensional** problems
|
|
106
|
+
- You need **production-tested** robustness
|
|
107
|
+
|
|
108
|
+
## Example: Comparison
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from spotoptim import SpotOptim, Kriging
|
|
112
|
+
|
|
113
|
+
def rosenbrock(X):
|
|
114
|
+
X = np.atleast_2d(X)
|
|
115
|
+
x, y = X[:, 0], X[:, 1]
|
|
116
|
+
return (1 - x)**2 + 100 * (y - x**2)**2
|
|
117
|
+
|
|
118
|
+
bounds = [(-2, 2), (-2, 2)]
|
|
119
|
+
|
|
120
|
+
# With Kriging
|
|
121
|
+
optimizer_kriging = SpotOptim(
|
|
122
|
+
fun=rosenbrock,
|
|
123
|
+
bounds=bounds,
|
|
124
|
+
surrogate=Kriging(seed=42),
|
|
125
|
+
seed=42
|
|
126
|
+
)
|
|
127
|
+
result_kriging = optimizer_kriging.optimize()
|
|
128
|
+
|
|
129
|
+
# With default GP (no surrogate argument)
|
|
130
|
+
optimizer_gp = SpotOptim(
|
|
131
|
+
fun=rosenbrock,
|
|
132
|
+
bounds=bounds,
|
|
133
|
+
seed=42
|
|
134
|
+
)
|
|
135
|
+
result_gp = optimizer_gp.optimize()
|
|
136
|
+
|
|
137
|
+
print(f"Kriging result: {result_kriging.fun:.6f}")
|
|
138
|
+
print(f"GP result: {result_gp.fun:.6f}")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Future Extensions
|
|
142
|
+
|
|
143
|
+
Planned features for future releases:
|
|
144
|
+
|
|
145
|
+
- Additional kernel types (Matern, Exponential, etc.)
|
|
146
|
+
- Anisotropic hyperparameters
|
|
147
|
+
- Gradient information
|
|
148
|
+
- Batch predictions
|
|
149
|
+
- Parallel hyperparameter optimization
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simplified Kriging surrogate model for SpotOptim.
|
|
3
|
+
|
|
4
|
+
This is a streamlined version adapted from spotpython.surrogate.kriging
|
|
5
|
+
for use with the SpotOptim optimizer.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from typing import Optional, Tuple, List
|
|
10
|
+
from numpy.linalg import LinAlgError, cholesky, solve
|
|
11
|
+
from scipy.optimize import differential_evolution
|
|
12
|
+
from sklearn.base import BaseEstimator, RegressorMixin
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Kriging(BaseEstimator, RegressorMixin):
|
|
16
|
+
"""
|
|
17
|
+
A simplified Kriging (Gaussian Process) surrogate model for SpotOptim.
|
|
18
|
+
|
|
19
|
+
This class provides a scikit-learn compatible interface with fit() and predict()
|
|
20
|
+
methods, making it suitable for use as a surrogate in SpotOptim.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
noise : float, optional
|
|
25
|
+
Regularization parameter (nugget effect). If None, uses sqrt(eps).
|
|
26
|
+
kernel : str, default='gauss'
|
|
27
|
+
Kernel type. Currently only 'gauss' (Gaussian/RBF) is supported.
|
|
28
|
+
n_theta : int, optional
|
|
29
|
+
Number of theta parameters. If None, uses k (number of dimensions).
|
|
30
|
+
min_theta : float, default=-3.0
|
|
31
|
+
Minimum log10(theta) bound for optimization.
|
|
32
|
+
max_theta : float, default=2.0
|
|
33
|
+
Maximum log10(theta) bound for optimization.
|
|
34
|
+
seed : int, optional
|
|
35
|
+
Random seed for reproducibility.
|
|
36
|
+
|
|
37
|
+
Attributes
|
|
38
|
+
----------
|
|
39
|
+
X_ : ndarray of shape (n_samples, n_features)
|
|
40
|
+
Training data.
|
|
41
|
+
y_ : ndarray of shape (n_samples,)
|
|
42
|
+
Training targets.
|
|
43
|
+
theta_ : ndarray
|
|
44
|
+
Optimized theta parameters (log10 scale).
|
|
45
|
+
mu_ : float
|
|
46
|
+
Mean of the Kriging predictor.
|
|
47
|
+
sigma2_ : float
|
|
48
|
+
Variance of the Kriging predictor.
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
>>> import numpy as np
|
|
53
|
+
>>> from spotoptim.surrogate import Kriging
|
|
54
|
+
>>> X = np.array([[0.0], [0.5], [1.0]])
|
|
55
|
+
>>> y = np.array([0.0, 0.25, 1.0])
|
|
56
|
+
>>> model = Kriging()
|
|
57
|
+
>>> model.fit(X, y)
|
|
58
|
+
>>> predictions = model.predict(np.array([[0.25], [0.75]]))
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
noise: Optional[float] = None,
|
|
64
|
+
kernel: str = "gauss",
|
|
65
|
+
n_theta: Optional[int] = None,
|
|
66
|
+
min_theta: float = -3.0,
|
|
67
|
+
max_theta: float = 2.0,
|
|
68
|
+
seed: Optional[int] = None,
|
|
69
|
+
):
|
|
70
|
+
self.noise = noise
|
|
71
|
+
self.kernel = kernel
|
|
72
|
+
self.n_theta = n_theta
|
|
73
|
+
self.min_theta = min_theta
|
|
74
|
+
self.max_theta = max_theta
|
|
75
|
+
self.seed = seed
|
|
76
|
+
|
|
77
|
+
# Fitted attributes
|
|
78
|
+
self.X_ = None
|
|
79
|
+
self.y_ = None
|
|
80
|
+
self.theta_ = None
|
|
81
|
+
self.mu_ = None
|
|
82
|
+
self.sigma2_ = None
|
|
83
|
+
self.U_ = None # Cholesky factor
|
|
84
|
+
self.Rinv_one_ = None
|
|
85
|
+
self.Rinv_r_ = None
|
|
86
|
+
|
|
87
|
+
def _get_noise(self) -> float:
|
|
88
|
+
"""Get the noise/regularization parameter."""
|
|
89
|
+
if self.noise is None:
|
|
90
|
+
return np.sqrt(np.finfo(float).eps)
|
|
91
|
+
return self.noise
|
|
92
|
+
|
|
93
|
+
def _correlation(self, D: np.ndarray) -> np.ndarray:
|
|
94
|
+
"""
|
|
95
|
+
Compute correlation from distance matrix using Gaussian kernel.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
D : ndarray
|
|
100
|
+
Squared distance matrix.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
ndarray
|
|
105
|
+
Correlation matrix.
|
|
106
|
+
"""
|
|
107
|
+
if self.kernel == "gauss":
|
|
108
|
+
return np.exp(-D)
|
|
109
|
+
else:
|
|
110
|
+
raise ValueError(f"Unsupported kernel: {self.kernel}")
|
|
111
|
+
|
|
112
|
+
def _build_correlation_matrix(self, X: np.ndarray, theta: np.ndarray) -> np.ndarray:
|
|
113
|
+
"""
|
|
114
|
+
Build correlation matrix R for training data.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
X : ndarray of shape (n, k)
|
|
119
|
+
Input data.
|
|
120
|
+
theta : ndarray of shape (k,)
|
|
121
|
+
Theta parameters (10^theta used as weights).
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
ndarray of shape (n, n)
|
|
126
|
+
Correlation matrix with noise on diagonal.
|
|
127
|
+
"""
|
|
128
|
+
n = X.shape[0]
|
|
129
|
+
theta10 = 10.0**theta
|
|
130
|
+
|
|
131
|
+
# Compute weighted squared distances
|
|
132
|
+
R = np.zeros((n, n))
|
|
133
|
+
for i in range(n):
|
|
134
|
+
for j in range(i + 1, n):
|
|
135
|
+
diff = X[i] - X[j]
|
|
136
|
+
dist = np.sum(theta10 * diff**2)
|
|
137
|
+
R[i, j] = dist
|
|
138
|
+
R[j, i] = dist
|
|
139
|
+
|
|
140
|
+
# Apply correlation function
|
|
141
|
+
R = self._correlation(R)
|
|
142
|
+
|
|
143
|
+
# Add noise to diagonal
|
|
144
|
+
noise_val = self._get_noise()
|
|
145
|
+
np.fill_diagonal(R, 1.0 + noise_val)
|
|
146
|
+
|
|
147
|
+
return R
|
|
148
|
+
|
|
149
|
+
def _build_correlation_vector(
|
|
150
|
+
self, x: np.ndarray, X: np.ndarray, theta: np.ndarray
|
|
151
|
+
) -> np.ndarray:
|
|
152
|
+
"""
|
|
153
|
+
Build correlation vector between new point x and training data X.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
x : ndarray of shape (k,)
|
|
158
|
+
New point.
|
|
159
|
+
X : ndarray of shape (n, k)
|
|
160
|
+
Training data.
|
|
161
|
+
theta : ndarray of shape (k,)
|
|
162
|
+
Theta parameters.
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
ndarray of shape (n,)
|
|
167
|
+
Correlation vector.
|
|
168
|
+
"""
|
|
169
|
+
theta10 = 10.0**theta
|
|
170
|
+
diff = X - x.reshape(1, -1)
|
|
171
|
+
D = np.sum(theta10 * diff**2, axis=1)
|
|
172
|
+
return self._correlation(D)
|
|
173
|
+
|
|
174
|
+
def _neg_log_likelihood(self, log_theta: np.ndarray) -> float:
|
|
175
|
+
"""
|
|
176
|
+
Compute negative concentrated log-likelihood.
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
log_theta : ndarray
|
|
181
|
+
Log10(theta) parameters.
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
float
|
|
186
|
+
Negative log-likelihood (to be minimized).
|
|
187
|
+
"""
|
|
188
|
+
try:
|
|
189
|
+
n = self.X_.shape[0]
|
|
190
|
+
y = self.y_.flatten()
|
|
191
|
+
one = np.ones(n)
|
|
192
|
+
|
|
193
|
+
# Build correlation matrix
|
|
194
|
+
R = self._build_correlation_matrix(self.X_, log_theta)
|
|
195
|
+
|
|
196
|
+
# Cholesky decomposition
|
|
197
|
+
try:
|
|
198
|
+
U = cholesky(R)
|
|
199
|
+
except LinAlgError:
|
|
200
|
+
return 1e10 # Penalty for ill-conditioned matrix
|
|
201
|
+
|
|
202
|
+
# Solve for mean and variance
|
|
203
|
+
Uy = solve(U, y)
|
|
204
|
+
Uone = solve(U, one)
|
|
205
|
+
|
|
206
|
+
Rinv_y = solve(U.T, Uy)
|
|
207
|
+
Rinv_one = solve(U.T, Uone)
|
|
208
|
+
|
|
209
|
+
mu = (one @ Rinv_y) / (one @ Rinv_one)
|
|
210
|
+
r = y - one * mu
|
|
211
|
+
|
|
212
|
+
Ur = solve(U, r)
|
|
213
|
+
Rinv_r = solve(U.T, Ur)
|
|
214
|
+
|
|
215
|
+
sigma2 = (r @ Rinv_r) / n
|
|
216
|
+
|
|
217
|
+
if sigma2 <= 0:
|
|
218
|
+
return 1e10
|
|
219
|
+
|
|
220
|
+
# Concentrated log-likelihood
|
|
221
|
+
log_det_R = 2.0 * np.sum(np.log(np.abs(np.diag(U))))
|
|
222
|
+
neg_log_like = (n / 2.0) * np.log(sigma2) + 0.5 * log_det_R
|
|
223
|
+
|
|
224
|
+
return neg_log_like
|
|
225
|
+
|
|
226
|
+
except (LinAlgError, ValueError):
|
|
227
|
+
return 1e10
|
|
228
|
+
|
|
229
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "Kriging":
|
|
230
|
+
"""
|
|
231
|
+
Fit the Kriging model to training data.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
X : ndarray of shape (n_samples, n_features)
|
|
236
|
+
Training input data.
|
|
237
|
+
y : ndarray of shape (n_samples,)
|
|
238
|
+
Training target values.
|
|
239
|
+
|
|
240
|
+
Returns
|
|
241
|
+
-------
|
|
242
|
+
self
|
|
243
|
+
Fitted estimator.
|
|
244
|
+
"""
|
|
245
|
+
X = np.atleast_2d(X)
|
|
246
|
+
y = np.asarray(y).flatten()
|
|
247
|
+
|
|
248
|
+
if X.ndim != 2:
|
|
249
|
+
raise ValueError(f"X must be 2-dimensional, got shape {X.shape}")
|
|
250
|
+
if y.ndim != 1:
|
|
251
|
+
raise ValueError(f"y must be 1-dimensional, got shape {y.shape}")
|
|
252
|
+
if X.shape[0] != y.shape[0]:
|
|
253
|
+
raise ValueError(f"X and y must have same number of samples")
|
|
254
|
+
|
|
255
|
+
self.X_ = X
|
|
256
|
+
self.y_ = y
|
|
257
|
+
n, k = X.shape
|
|
258
|
+
|
|
259
|
+
# Set number of theta parameters
|
|
260
|
+
if self.n_theta is None:
|
|
261
|
+
self.n_theta = k
|
|
262
|
+
|
|
263
|
+
# Optimize theta via maximum likelihood
|
|
264
|
+
bounds = [(self.min_theta, self.max_theta)] * self.n_theta
|
|
265
|
+
|
|
266
|
+
result = differential_evolution(
|
|
267
|
+
func=self._neg_log_likelihood,
|
|
268
|
+
bounds=bounds,
|
|
269
|
+
seed=self.seed,
|
|
270
|
+
maxiter=100,
|
|
271
|
+
atol=1e-6,
|
|
272
|
+
tol=0.01,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
self.theta_ = result.x
|
|
276
|
+
|
|
277
|
+
# Compute final model parameters
|
|
278
|
+
one = np.ones(n)
|
|
279
|
+
R = self._build_correlation_matrix(X, self.theta_)
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
self.U_ = cholesky(R)
|
|
283
|
+
except LinAlgError:
|
|
284
|
+
# Add more regularization if needed
|
|
285
|
+
R = self._build_correlation_matrix(X, self.theta_)
|
|
286
|
+
R += np.eye(n) * 1e-8
|
|
287
|
+
self.U_ = cholesky(R)
|
|
288
|
+
|
|
289
|
+
Uy = solve(self.U_, y)
|
|
290
|
+
Uone = solve(self.U_, one)
|
|
291
|
+
|
|
292
|
+
Rinv_y = solve(self.U_.T, Uy)
|
|
293
|
+
Rinv_one = solve(self.U_.T, Uone)
|
|
294
|
+
|
|
295
|
+
self.mu_ = float((one @ Rinv_y) / (one @ Rinv_one))
|
|
296
|
+
|
|
297
|
+
r = y - one * self.mu_
|
|
298
|
+
Ur = solve(self.U_, r)
|
|
299
|
+
Rinv_r = solve(self.U_.T, Ur)
|
|
300
|
+
|
|
301
|
+
self.sigma2_ = float((r @ Rinv_r) / n)
|
|
302
|
+
|
|
303
|
+
# Store for prediction
|
|
304
|
+
self.Rinv_one_ = Rinv_one
|
|
305
|
+
self.Rinv_r_ = Rinv_r
|
|
306
|
+
|
|
307
|
+
return self
|
|
308
|
+
|
|
309
|
+
def predict(self, X: np.ndarray, return_std: bool = False) -> np.ndarray:
|
|
310
|
+
"""
|
|
311
|
+
Predict using the Kriging model.
|
|
312
|
+
|
|
313
|
+
Parameters
|
|
314
|
+
----------
|
|
315
|
+
X : ndarray of shape (n_samples, n_features)
|
|
316
|
+
Points to predict at.
|
|
317
|
+
return_std : bool, default=False
|
|
318
|
+
If True, return standard deviations as well.
|
|
319
|
+
|
|
320
|
+
Returns
|
|
321
|
+
-------
|
|
322
|
+
y_pred : ndarray of shape (n_samples,)
|
|
323
|
+
Predicted values.
|
|
324
|
+
y_std : ndarray of shape (n_samples,), optional
|
|
325
|
+
Standard deviations (only if return_std=True).
|
|
326
|
+
"""
|
|
327
|
+
X = np.atleast_2d(X)
|
|
328
|
+
|
|
329
|
+
if X.ndim == 1:
|
|
330
|
+
X = X.reshape(1, -1)
|
|
331
|
+
|
|
332
|
+
if X.shape[1] != self.X_.shape[1]:
|
|
333
|
+
raise ValueError(
|
|
334
|
+
f"X has {X.shape[1]} features, expected {self.X_.shape[1]}"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
n_pred = X.shape[0]
|
|
338
|
+
predictions = np.zeros(n_pred)
|
|
339
|
+
|
|
340
|
+
if return_std:
|
|
341
|
+
std_devs = np.zeros(n_pred)
|
|
342
|
+
|
|
343
|
+
for i, x in enumerate(X):
|
|
344
|
+
# Build correlation vector
|
|
345
|
+
psi = self._build_correlation_vector(x, self.X_, self.theta_)
|
|
346
|
+
|
|
347
|
+
# Predict mean
|
|
348
|
+
predictions[i] = self.mu_ + psi @ self.Rinv_r_
|
|
349
|
+
|
|
350
|
+
if return_std:
|
|
351
|
+
# Predict variance
|
|
352
|
+
Upsi = solve(self.U_, psi)
|
|
353
|
+
psi_Rinv_psi = psi @ solve(self.U_.T, Upsi)
|
|
354
|
+
|
|
355
|
+
variance = self.sigma2_ * (1.0 + self._get_noise() - psi_Rinv_psi)
|
|
356
|
+
std_devs[i] = np.sqrt(max(0.0, variance))
|
|
357
|
+
|
|
358
|
+
if return_std:
|
|
359
|
+
return predictions, std_devs
|
|
360
|
+
return predictions
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: spotoptim
|
|
3
|
+
Version: 0.0.5
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Author: bartzbeielstein
|
|
6
|
+
Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
|
|
7
|
+
Requires-Dist: numpy>=1.24.3
|
|
8
|
+
Requires-Dist: scipy>=1.10.1
|
|
9
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# SpotOptim
|
|
14
|
+
|
|
15
|
+
Sequential Parameter Optimization with Bayesian Optimization.
|
|
16
|
+
|
|
17
|
+
## Features
|
|
18
|
+
|
|
19
|
+
- **Bayesian Optimization**: Uses surrogate models to efficiently optimize expensive black-box functions
|
|
20
|
+
- **Multiple Acquisition Functions**: Expected Improvement (EI), Predicted Mean (y), Probability of Improvement (PI)
|
|
21
|
+
- **Flexible Surrogates**: Default Gaussian Process or custom Kriging surrogate
|
|
22
|
+
- **Variable Types**: Support for continuous, integer, and mixed variable types
|
|
23
|
+
- **scipy-compatible**: Returns OptimizeResult objects compatible with scipy.optimize
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install spotoptim
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import numpy as np
|
|
35
|
+
from spotoptim import SpotOptim
|
|
36
|
+
|
|
37
|
+
# Define objective function
|
|
38
|
+
def rosenbrock(X):
|
|
39
|
+
X = np.atleast_2d(X)
|
|
40
|
+
x, y = X[:, 0], X[:, 1]
|
|
41
|
+
return (1 - x)**2 + 100 * (y - x**2)**2
|
|
42
|
+
|
|
43
|
+
# Set up optimization
|
|
44
|
+
bounds = [(-2, 2), (-2, 2)]
|
|
45
|
+
|
|
46
|
+
optimizer = SpotOptim(
|
|
47
|
+
fun=rosenbrock,
|
|
48
|
+
bounds=bounds,
|
|
49
|
+
max_iter=50,
|
|
50
|
+
n_initial=10,
|
|
51
|
+
seed=42
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Run optimization
|
|
55
|
+
result = optimizer.optimize()
|
|
56
|
+
|
|
57
|
+
print(f"Best point: {result.x}")
|
|
58
|
+
print(f"Best value: {result.fun}")
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Using Kriging Surrogate
|
|
62
|
+
|
|
63
|
+
SpotOptim includes a simplified Kriging (Gaussian Process) surrogate as an alternative to scikit-learn's GaussianProcessRegressor:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from spotoptim import SpotOptim, Kriging
|
|
67
|
+
|
|
68
|
+
# Create Kriging surrogate
|
|
69
|
+
kriging = Kriging(
|
|
70
|
+
noise=1e-6,
|
|
71
|
+
min_theta=-3.0,
|
|
72
|
+
max_theta=2.0,
|
|
73
|
+
seed=42
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Use with SpotOptim
|
|
77
|
+
optimizer = SpotOptim(
|
|
78
|
+
fun=rosenbrock,
|
|
79
|
+
bounds=bounds,
|
|
80
|
+
surrogate=kriging, # Use Kriging instead of default GP
|
|
81
|
+
seed=42
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
result = optimizer.optimize()
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## API Reference
|
|
88
|
+
|
|
89
|
+
### SpotOptim
|
|
90
|
+
|
|
91
|
+
**Parameters:**
|
|
92
|
+
- `fun` (callable): Objective function to minimize
|
|
93
|
+
- `bounds` (list of tuples): Bounds for each dimension as [(low, high), ...]
|
|
94
|
+
- `max_iter` (int, default=20): Maximum number of optimization iterations
|
|
95
|
+
- `n_initial` (int, default=10): Number of initial design points
|
|
96
|
+
- `surrogate` (object, optional): Surrogate model (default: GaussianProcessRegressor)
|
|
97
|
+
- `acquisition` (str, default='ei'): Acquisition function ('ei', 'y', 'pi')
|
|
98
|
+
- `var_type` (list of str, optional): Variable types for each dimension
|
|
99
|
+
- `tolerance_x` (float, optional): Minimum distance between points
|
|
100
|
+
- `seed` (int, optional): Random seed for reproducibility
|
|
101
|
+
- `verbose` (bool, default=False): Print progress information
|
|
102
|
+
|
|
103
|
+
**Methods:**
|
|
104
|
+
- `optimize(X0=None)`: Run optimization, optionally with initial design points
|
|
105
|
+
|
|
106
|
+
### Kriging
|
|
107
|
+
|
|
108
|
+
**Parameters:**
|
|
109
|
+
- `noise` (float, optional): Regularization parameter
|
|
110
|
+
- `kernel` (str, default='gauss'): Kernel type
|
|
111
|
+
- `n_theta` (int, optional): Number of theta parameters
|
|
112
|
+
- `min_theta` (float, default=-3.0): Minimum log10(theta) bound
|
|
113
|
+
- `max_theta` (float, default=2.0): Maximum log10(theta) bound
|
|
114
|
+
- `seed` (int, optional): Random seed
|
|
115
|
+
|
|
116
|
+
**Methods:**
|
|
117
|
+
- `fit(X, y)`: Fit the model to training data
|
|
118
|
+
- `predict(X, return_std=False)`: Predict at new points
|
|
119
|
+
|
|
120
|
+
## Examples
|
|
121
|
+
|
|
122
|
+
See the `notebooks/demos.ipynb` for comprehensive examples including:
|
|
123
|
+
1. 2D Rosenbrock function optimization
|
|
124
|
+
2. 6D Rosenbrock with budget constraints
|
|
125
|
+
3. Using Kriging surrogate vs default GP
|
|
126
|
+
|
|
127
|
+
## Development
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# Clone repository
|
|
131
|
+
git clone https://github.com/sequential-parameter-optimization/spotoptim.git
|
|
132
|
+
cd spotoptim
|
|
133
|
+
|
|
134
|
+
# Install with uv
|
|
135
|
+
uv pip install -e .
|
|
136
|
+
|
|
137
|
+
# Run tests
|
|
138
|
+
uv run pytest tests/
|
|
139
|
+
|
|
140
|
+
# Build package
|
|
141
|
+
uv build
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
See LICENSE file.
|
|
147
|
+
|
|
148
|
+
## References
|
|
149
|
+
|
|
150
|
+
Based on the SPOT (Sequential Parameter Optimization Toolbox) methodology.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
spotoptim/.DS_Store,sha256=NALR4k7q4RK-hxhN2yt2Ct3OqovMkT4s6EfHz5uLQGI,6148
|
|
2
|
+
spotoptim/SpotOptim.py,sha256=b2rcrATNBZ4_gld3Ymp0mrBaZ4bTy-5xBo4lxWPsA34,11882
|
|
3
|
+
spotoptim/__init__.py,sha256=pj08U2Sa3KPPm5FBjhcSvKEsfJVPt1TQ4x9bCh5m6Vw,233
|
|
4
|
+
spotoptim/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
spotoptim/surrogate/README.md,sha256=Lt0Dp6ZqdegFh0zLbmfzyfeUkoRiE71evOfg9iMhCRc,4417
|
|
6
|
+
spotoptim/surrogate/__init__.py,sha256=1s4mM9dfOMMFAvqUrsjzc8NT3TpduZgiLDIPsaXpqMg,91
|
|
7
|
+
spotoptim/surrogate/kriging.py,sha256=YDzab4bn2gsdSv1V_XouwYFigTv8---BDZflJALWWHo,10255
|
|
8
|
+
spotoptim-0.0.5.dist-info/WHEEL,sha256=DpNsHFUm_gffZe1FgzmqwuqiuPC6Y-uBCzibcJcdupM,78
|
|
9
|
+
spotoptim-0.0.5.dist-info/METADATA,sha256=YSlFWVLb-8pERjPQPbX3FUkXJZtNVly7y0pwzMcNE8A,3854
|
|
10
|
+
spotoptim-0.0.5.dist-info/RECORD,,
|