spotoptim 0.0.3__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- spotoptim-0.0.5/PKG-INFO +150 -0
- spotoptim-0.0.5/README.md +138 -0
- {spotoptim-0.0.3 → spotoptim-0.0.5}/pyproject.toml +2 -5
- spotoptim-0.0.5/src/spotoptim/.DS_Store +0 -0
- {spotoptim-0.0.3 → spotoptim-0.0.5}/src/spotoptim/SpotOptim.py +24 -12
- spotoptim-0.0.5/src/spotoptim/__init__.py +11 -0
- spotoptim-0.0.5/src/spotoptim/surrogate/README.md +149 -0
- spotoptim-0.0.5/src/spotoptim/surrogate/__init__.py +5 -0
- spotoptim-0.0.5/src/spotoptim/surrogate/kriging.py +360 -0
- spotoptim-0.0.3/PKG-INFO +0 -15
- spotoptim-0.0.3/README.md +0 -0
- spotoptim-0.0.3/src/spotoptim/__init__.py +0 -2
- {spotoptim-0.0.3 → spotoptim-0.0.5}/src/spotoptim/py.typed +0 -0
spotoptim-0.0.5/PKG-INFO
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: spotoptim
|
|
3
|
+
Version: 0.0.5
|
|
4
|
+
Summary: Add your description here
|
|
5
|
+
Author: bartzbeielstein
|
|
6
|
+
Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
|
|
7
|
+
Requires-Dist: numpy>=1.24.3
|
|
8
|
+
Requires-Dist: scipy>=1.10.1
|
|
9
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
|
|
13
|
+
# SpotOptim
|
|
14
|
+
|
|
15
|
+
Sequential Parameter Optimization with Bayesian Optimization.
|
|
16
|
+
|
|
17
|
+
## Features
|
|
18
|
+
|
|
19
|
+
- **Bayesian Optimization**: Uses surrogate models to efficiently optimize expensive black-box functions
|
|
20
|
+
- **Multiple Acquisition Functions**: Expected Improvement (EI), Predicted Mean (y), Probability of Improvement (PI)
|
|
21
|
+
- **Flexible Surrogates**: Default Gaussian Process or custom Kriging surrogate
|
|
22
|
+
- **Variable Types**: Support for continuous, integer, and mixed variable types
|
|
23
|
+
- **scipy-compatible**: Returns OptimizeResult objects compatible with scipy.optimize
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install spotoptim
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Quick Start
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
import numpy as np
|
|
35
|
+
from spotoptim import SpotOptim
|
|
36
|
+
|
|
37
|
+
# Define objective function
|
|
38
|
+
def rosenbrock(X):
|
|
39
|
+
X = np.atleast_2d(X)
|
|
40
|
+
x, y = X[:, 0], X[:, 1]
|
|
41
|
+
return (1 - x)**2 + 100 * (y - x**2)**2
|
|
42
|
+
|
|
43
|
+
# Set up optimization
|
|
44
|
+
bounds = [(-2, 2), (-2, 2)]
|
|
45
|
+
|
|
46
|
+
optimizer = SpotOptim(
|
|
47
|
+
fun=rosenbrock,
|
|
48
|
+
bounds=bounds,
|
|
49
|
+
max_iter=50,
|
|
50
|
+
n_initial=10,
|
|
51
|
+
seed=42
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Run optimization
|
|
55
|
+
result = optimizer.optimize()
|
|
56
|
+
|
|
57
|
+
print(f"Best point: {result.x}")
|
|
58
|
+
print(f"Best value: {result.fun}")
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Using Kriging Surrogate
|
|
62
|
+
|
|
63
|
+
SpotOptim includes a simplified Kriging (Gaussian Process) surrogate as an alternative to scikit-learn's GaussianProcessRegressor:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
from spotoptim import SpotOptim, Kriging
|
|
67
|
+
|
|
68
|
+
# Create Kriging surrogate
|
|
69
|
+
kriging = Kriging(
|
|
70
|
+
noise=1e-6,
|
|
71
|
+
min_theta=-3.0,
|
|
72
|
+
max_theta=2.0,
|
|
73
|
+
seed=42
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Use with SpotOptim
|
|
77
|
+
optimizer = SpotOptim(
|
|
78
|
+
fun=rosenbrock,
|
|
79
|
+
bounds=bounds,
|
|
80
|
+
surrogate=kriging, # Use Kriging instead of default GP
|
|
81
|
+
seed=42
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
result = optimizer.optimize()
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## API Reference
|
|
88
|
+
|
|
89
|
+
### SpotOptim
|
|
90
|
+
|
|
91
|
+
**Parameters:**
|
|
92
|
+
- `fun` (callable): Objective function to minimize
|
|
93
|
+
- `bounds` (list of tuples): Bounds for each dimension as [(low, high), ...]
|
|
94
|
+
- `max_iter` (int, default=20): Maximum number of optimization iterations
|
|
95
|
+
- `n_initial` (int, default=10): Number of initial design points
|
|
96
|
+
- `surrogate` (object, optional): Surrogate model (default: GaussianProcessRegressor)
|
|
97
|
+
- `acquisition` (str, default='ei'): Acquisition function ('ei', 'y', 'pi')
|
|
98
|
+
- `var_type` (list of str, optional): Variable types for each dimension
|
|
99
|
+
- `tolerance_x` (float, optional): Minimum distance between points
|
|
100
|
+
- `seed` (int, optional): Random seed for reproducibility
|
|
101
|
+
- `verbose` (bool, default=False): Print progress information
|
|
102
|
+
|
|
103
|
+
**Methods:**
|
|
104
|
+
- `optimize(X0=None)`: Run optimization, optionally with initial design points
|
|
105
|
+
|
|
106
|
+
### Kriging
|
|
107
|
+
|
|
108
|
+
**Parameters:**
|
|
109
|
+
- `noise` (float, optional): Regularization parameter
|
|
110
|
+
- `kernel` (str, default='gauss'): Kernel type
|
|
111
|
+
- `n_theta` (int, optional): Number of theta parameters
|
|
112
|
+
- `min_theta` (float, default=-3.0): Minimum log10(theta) bound
|
|
113
|
+
- `max_theta` (float, default=2.0): Maximum log10(theta) bound
|
|
114
|
+
- `seed` (int, optional): Random seed
|
|
115
|
+
|
|
116
|
+
**Methods:**
|
|
117
|
+
- `fit(X, y)`: Fit the model to training data
|
|
118
|
+
- `predict(X, return_std=False)`: Predict at new points
|
|
119
|
+
|
|
120
|
+
## Examples
|
|
121
|
+
|
|
122
|
+
See the `notebooks/demos.ipynb` for comprehensive examples including:
|
|
123
|
+
1. 2D Rosenbrock function optimization
|
|
124
|
+
2. 6D Rosenbrock with budget constraints
|
|
125
|
+
3. Using Kriging surrogate vs default GP
|
|
126
|
+
|
|
127
|
+
## Development
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
# Clone repository
|
|
131
|
+
git clone https://github.com/sequential-parameter-optimization/spotoptim.git
|
|
132
|
+
cd spotoptim
|
|
133
|
+
|
|
134
|
+
# Install with uv
|
|
135
|
+
uv pip install -e .
|
|
136
|
+
|
|
137
|
+
# Run tests
|
|
138
|
+
uv run pytest tests/
|
|
139
|
+
|
|
140
|
+
# Build package
|
|
141
|
+
uv build
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
See LICENSE file.
|
|
147
|
+
|
|
148
|
+
## References
|
|
149
|
+
|
|
150
|
+
Based on the SPOT (Sequential Parameter Optimization Toolbox) methodology.
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# SpotOptim
|
|
2
|
+
|
|
3
|
+
Sequential Parameter Optimization with Bayesian Optimization.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Bayesian Optimization**: Uses surrogate models to efficiently optimize expensive black-box functions
|
|
8
|
+
- **Multiple Acquisition Functions**: Expected Improvement (EI), Predicted Mean (y), Probability of Improvement (PI)
|
|
9
|
+
- **Flexible Surrogates**: Default Gaussian Process or custom Kriging surrogate
|
|
10
|
+
- **Variable Types**: Support for continuous, integer, and mixed variable types
|
|
11
|
+
- **scipy-compatible**: Returns OptimizeResult objects compatible with scipy.optimize
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install spotoptim
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Quick Start
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
import numpy as np
|
|
23
|
+
from spotoptim import SpotOptim
|
|
24
|
+
|
|
25
|
+
# Define objective function
|
|
26
|
+
def rosenbrock(X):
|
|
27
|
+
X = np.atleast_2d(X)
|
|
28
|
+
x, y = X[:, 0], X[:, 1]
|
|
29
|
+
return (1 - x)**2 + 100 * (y - x**2)**2
|
|
30
|
+
|
|
31
|
+
# Set up optimization
|
|
32
|
+
bounds = [(-2, 2), (-2, 2)]
|
|
33
|
+
|
|
34
|
+
optimizer = SpotOptim(
|
|
35
|
+
fun=rosenbrock,
|
|
36
|
+
bounds=bounds,
|
|
37
|
+
max_iter=50,
|
|
38
|
+
n_initial=10,
|
|
39
|
+
seed=42
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Run optimization
|
|
43
|
+
result = optimizer.optimize()
|
|
44
|
+
|
|
45
|
+
print(f"Best point: {result.x}")
|
|
46
|
+
print(f"Best value: {result.fun}")
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Using Kriging Surrogate
|
|
50
|
+
|
|
51
|
+
SpotOptim includes a simplified Kriging (Gaussian Process) surrogate as an alternative to scikit-learn's GaussianProcessRegressor:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from spotoptim import SpotOptim, Kriging
|
|
55
|
+
|
|
56
|
+
# Create Kriging surrogate
|
|
57
|
+
kriging = Kriging(
|
|
58
|
+
noise=1e-6,
|
|
59
|
+
min_theta=-3.0,
|
|
60
|
+
max_theta=2.0,
|
|
61
|
+
seed=42
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Use with SpotOptim
|
|
65
|
+
optimizer = SpotOptim(
|
|
66
|
+
fun=rosenbrock,
|
|
67
|
+
bounds=bounds,
|
|
68
|
+
surrogate=kriging, # Use Kriging instead of default GP
|
|
69
|
+
seed=42
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
result = optimizer.optimize()
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## API Reference
|
|
76
|
+
|
|
77
|
+
### SpotOptim
|
|
78
|
+
|
|
79
|
+
**Parameters:**
|
|
80
|
+
- `fun` (callable): Objective function to minimize
|
|
81
|
+
- `bounds` (list of tuples): Bounds for each dimension as [(low, high), ...]
|
|
82
|
+
- `max_iter` (int, default=20): Maximum number of optimization iterations
|
|
83
|
+
- `n_initial` (int, default=10): Number of initial design points
|
|
84
|
+
- `surrogate` (object, optional): Surrogate model (default: GaussianProcessRegressor)
|
|
85
|
+
- `acquisition` (str, default='ei'): Acquisition function ('ei', 'y', 'pi')
|
|
86
|
+
- `var_type` (list of str, optional): Variable types for each dimension
|
|
87
|
+
- `tolerance_x` (float, optional): Minimum distance between points
|
|
88
|
+
- `seed` (int, optional): Random seed for reproducibility
|
|
89
|
+
- `verbose` (bool, default=False): Print progress information
|
|
90
|
+
|
|
91
|
+
**Methods:**
|
|
92
|
+
- `optimize(X0=None)`: Run optimization, optionally with initial design points
|
|
93
|
+
|
|
94
|
+
### Kriging
|
|
95
|
+
|
|
96
|
+
**Parameters:**
|
|
97
|
+
- `noise` (float, optional): Regularization parameter
|
|
98
|
+
- `kernel` (str, default='gauss'): Kernel type
|
|
99
|
+
- `n_theta` (int, optional): Number of theta parameters
|
|
100
|
+
- `min_theta` (float, default=-3.0): Minimum log10(theta) bound
|
|
101
|
+
- `max_theta` (float, default=2.0): Maximum log10(theta) bound
|
|
102
|
+
- `seed` (int, optional): Random seed
|
|
103
|
+
|
|
104
|
+
**Methods:**
|
|
105
|
+
- `fit(X, y)`: Fit the model to training data
|
|
106
|
+
- `predict(X, return_std=False)`: Predict at new points
|
|
107
|
+
|
|
108
|
+
## Examples
|
|
109
|
+
|
|
110
|
+
See the `notebooks/demos.ipynb` for comprehensive examples including:
|
|
111
|
+
1. 2D Rosenbrock function optimization
|
|
112
|
+
2. 6D Rosenbrock with budget constraints
|
|
113
|
+
3. Using Kriging surrogate vs default GP
|
|
114
|
+
|
|
115
|
+
## Development
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
# Clone repository
|
|
119
|
+
git clone https://github.com/sequential-parameter-optimization/spotoptim.git
|
|
120
|
+
cd spotoptim
|
|
121
|
+
|
|
122
|
+
# Install with uv
|
|
123
|
+
uv pip install -e .
|
|
124
|
+
|
|
125
|
+
# Run tests
|
|
126
|
+
uv run pytest tests/
|
|
127
|
+
|
|
128
|
+
# Build package
|
|
129
|
+
uv build
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## License
|
|
133
|
+
|
|
134
|
+
See LICENSE file.
|
|
135
|
+
|
|
136
|
+
## References
|
|
137
|
+
|
|
138
|
+
Based on the SPOT (Sequential Parameter Optimization Toolbox) methodology.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "spotoptim"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.5"
|
|
4
4
|
description = "Add your description here"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
authors = [
|
|
@@ -8,12 +8,9 @@ authors = [
|
|
|
8
8
|
]
|
|
9
9
|
requires-python = ">=3.10"
|
|
10
10
|
dependencies = [
|
|
11
|
-
"fastapi>=0.121.1",
|
|
12
11
|
"numpy>=1.24.3",
|
|
13
12
|
"scipy>=1.10.1",
|
|
14
|
-
"
|
|
15
|
-
"scikit-learn",
|
|
16
|
-
"spotpython"
|
|
13
|
+
"scikit-learn>=1.3.0",
|
|
17
14
|
]
|
|
18
15
|
|
|
19
16
|
[dependency-groups]
|
|
Binary file
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
from typing import Callable, Optional, Tuple, List
|
|
3
3
|
from scipy.optimize import OptimizeResult, differential_evolution
|
|
4
|
+
from scipy.stats.qmc import LatinHypercube
|
|
4
5
|
from sklearn.base import BaseEstimator
|
|
5
6
|
from sklearn.gaussian_process import GaussianProcessRegressor
|
|
6
7
|
from sklearn.gaussian_process.kernels import Matern, ConstantKernel
|
|
7
|
-
from spotpython.design.spacefilling import SpaceFilling
|
|
8
8
|
import warnings
|
|
9
9
|
|
|
10
10
|
|
|
@@ -72,7 +72,7 @@ class SpotOptim(BaseEstimator):
|
|
|
72
72
|
surrogate: Optional[object] = None,
|
|
73
73
|
acquisition: str = "ei",
|
|
74
74
|
var_type: Optional[list] = None,
|
|
75
|
-
tolerance_x: float =
|
|
75
|
+
tolerance_x: Optional[float] = None,
|
|
76
76
|
seed: Optional[int] = None,
|
|
77
77
|
verbose: bool = False,
|
|
78
78
|
warnings_filter: str = "ignore",
|
|
@@ -80,6 +80,14 @@ class SpotOptim(BaseEstimator):
|
|
|
80
80
|
|
|
81
81
|
warnings.filterwarnings(warnings_filter)
|
|
82
82
|
|
|
83
|
+
# small value, converted to float
|
|
84
|
+
self.eps = np.sqrt(np.spacing(1))
|
|
85
|
+
|
|
86
|
+
if tolerance_x is None:
|
|
87
|
+
self.tolerance_x = self.eps
|
|
88
|
+
else:
|
|
89
|
+
self.tolerance_x = tolerance_x
|
|
90
|
+
|
|
83
91
|
self.fun = fun
|
|
84
92
|
self.bounds = bounds
|
|
85
93
|
self.max_iter = max_iter
|
|
@@ -87,7 +95,6 @@ class SpotOptim(BaseEstimator):
|
|
|
87
95
|
self.surrogate = surrogate
|
|
88
96
|
self.acquisition = acquisition
|
|
89
97
|
self.var_type = var_type
|
|
90
|
-
self.tolerance_x = tolerance_x
|
|
91
98
|
self.seed = seed
|
|
92
99
|
self.verbose = verbose
|
|
93
100
|
|
|
@@ -113,7 +120,7 @@ class SpotOptim(BaseEstimator):
|
|
|
113
120
|
)
|
|
114
121
|
|
|
115
122
|
# Design generator
|
|
116
|
-
self.
|
|
123
|
+
self.lhs_sampler = LatinHypercube(d=self.n_dim, seed=self.seed)
|
|
117
124
|
|
|
118
125
|
# Storage for results
|
|
119
126
|
self.X_ = None
|
|
@@ -139,17 +146,22 @@ class SpotOptim(BaseEstimator):
|
|
|
139
146
|
return y
|
|
140
147
|
|
|
141
148
|
def _generate_initial_design(self) -> np.ndarray:
|
|
142
|
-
"""Generate initial space-filling design."""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
149
|
+
"""Generate initial space-filling design using Latin Hypercube Sampling."""
|
|
150
|
+
# Generate samples in [0, 1]^d
|
|
151
|
+
X0_unit = self.lhs_sampler.random(n=self.n_initial)
|
|
152
|
+
|
|
153
|
+
# Scale to [lower, upper]
|
|
154
|
+
X0 = self.lower + X0_unit * (self.upper - self.lower)
|
|
155
|
+
|
|
146
156
|
return self._repair_non_numeric(X0, self.var_type)
|
|
147
157
|
|
|
148
158
|
def _fit_surrogate(self, X: np.ndarray, y: np.ndarray) -> None:
|
|
149
159
|
"""Fit surrogate model to data."""
|
|
150
160
|
self.surrogate.fit(X, y)
|
|
151
161
|
|
|
152
|
-
def _select_new(
|
|
162
|
+
def _select_new(
|
|
163
|
+
self, A: np.ndarray, X: np.ndarray, tolerance: float = 0
|
|
164
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
|
153
165
|
"""
|
|
154
166
|
Select rows from A that are not in X.
|
|
155
167
|
|
|
@@ -278,9 +290,9 @@ class SpotOptim(BaseEstimator):
|
|
|
278
290
|
# If too close, generate random point
|
|
279
291
|
if self.verbose:
|
|
280
292
|
print("Proposed point too close, generating random point")
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
)
|
|
293
|
+
# Generate a random point using LHS
|
|
294
|
+
x_next_unit = self.lhs_sampler.random(n=1)[0]
|
|
295
|
+
x_next = self.lower + x_next_unit * (self.upper - self.lower)
|
|
284
296
|
|
|
285
297
|
return self._repair_non_numeric(x_next.reshape(1, -1), self.var_type)[0]
|
|
286
298
|
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# SpotOptim Surrogate Models
|
|
2
|
+
|
|
3
|
+
This module provides surrogate models for use with the SpotOptim optimizer.
|
|
4
|
+
|
|
5
|
+
## Kriging Surrogate
|
|
6
|
+
|
|
7
|
+
The `Kriging` class provides a simplified Gaussian Process (Kriging) surrogate model that can be used as an alternative to scikit-learn's `GaussianProcessRegressor`.
|
|
8
|
+
|
|
9
|
+
### Features
|
|
10
|
+
|
|
11
|
+
- **Scikit-learn compatible interface**: Implements `fit()` and `predict()` methods
|
|
12
|
+
- **Automatic hyperparameter optimization**: Uses maximum likelihood estimation
|
|
13
|
+
- **Gaussian (RBF) kernel**: Exponential correlation function
|
|
14
|
+
- **Prediction uncertainty**: Supports `return_std=True` for standard deviations
|
|
15
|
+
- **Reproducible**: Supports random seed for consistent results
|
|
16
|
+
|
|
17
|
+
### Basic Usage
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
import numpy as np
|
|
21
|
+
from spotoptim import SpotOptim, Kriging
|
|
22
|
+
|
|
23
|
+
# Define objective function
|
|
24
|
+
def sphere(X):
|
|
25
|
+
X = np.atleast_2d(X)
|
|
26
|
+
return np.sum(X**2, axis=1)
|
|
27
|
+
|
|
28
|
+
# Create Kriging surrogate
|
|
29
|
+
kriging = Kriging(
|
|
30
|
+
noise=1e-6,
|
|
31
|
+
min_theta=-3.0,
|
|
32
|
+
max_theta=2.0,
|
|
33
|
+
seed=42
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Use with SpotOptim
|
|
37
|
+
optimizer = SpotOptim(
|
|
38
|
+
fun=sphere,
|
|
39
|
+
bounds=[(-5, 5), (-5, 5)],
|
|
40
|
+
max_iter=20,
|
|
41
|
+
n_initial=10,
|
|
42
|
+
surrogate=kriging, # Use Kriging surrogate
|
|
43
|
+
seed=42
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
result = optimizer.optimize()
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Parameters
|
|
50
|
+
|
|
51
|
+
- **noise** (float, optional): Regularization parameter (nugget effect). If None, uses sqrt(machine epsilon).
|
|
52
|
+
- **kernel** (str, default='gauss'): Kernel type. Currently only 'gauss' (Gaussian/RBF) is supported.
|
|
53
|
+
- **n_theta** (int, optional): Number of theta parameters. If None, uses k (number of dimensions).
|
|
54
|
+
- **min_theta** (float, default=-3.0): Minimum log10(theta) bound for optimization.
|
|
55
|
+
- **max_theta** (float, default=2.0): Maximum log10(theta) bound for optimization.
|
|
56
|
+
- **seed** (int, optional): Random seed for reproducibility.
|
|
57
|
+
|
|
58
|
+
### Methods
|
|
59
|
+
|
|
60
|
+
#### fit(X, y)
|
|
61
|
+
|
|
62
|
+
Fit the Kriging model to training data.
|
|
63
|
+
|
|
64
|
+
**Parameters:**
|
|
65
|
+
- `X`: ndarray of shape (n_samples, n_features) - Training input data
|
|
66
|
+
- `y`: ndarray of shape (n_samples,) - Training target values
|
|
67
|
+
|
|
68
|
+
**Returns:**
|
|
69
|
+
- `self`: Fitted estimator
|
|
70
|
+
|
|
71
|
+
#### predict(X, return_std=False)
|
|
72
|
+
|
|
73
|
+
Predict using the Kriging model.
|
|
74
|
+
|
|
75
|
+
**Parameters:**
|
|
76
|
+
- `X`: ndarray of shape (n_samples, n_features) - Points to predict at
|
|
77
|
+
- `return_std`: bool, default=False - If True, return standard deviations as well
|
|
78
|
+
|
|
79
|
+
**Returns:**
|
|
80
|
+
- `y_pred`: ndarray of shape (n_samples,) - Predicted values
|
|
81
|
+
- `y_std`: ndarray of shape (n_samples,) - Standard deviations (only if return_std=True)
|
|
82
|
+
|
|
83
|
+
### Comparison with scikit-learn's GaussianProcessRegressor
|
|
84
|
+
|
|
85
|
+
| Feature | Kriging | GaussianProcessRegressor |
|
|
86
|
+
|---------|---------|--------------------------|
|
|
87
|
+
| Interface | scikit-learn compatible | Native scikit-learn |
|
|
88
|
+
| Kernel | Gaussian (RBF) | Multiple kernel types |
|
|
89
|
+
| Hyperparameters | Theta (length scales) | Flexible kernel parameters |
|
|
90
|
+
| Dependencies | NumPy, SciPy | NumPy, SciPy, scikit-learn |
|
|
91
|
+
| Optimization | Differential evolution | L-BFGS-B with restarts |
|
|
92
|
+
| Code complexity | Simplified, ~350 lines | Full-featured, complex |
|
|
93
|
+
|
|
94
|
+
### When to Use Kriging
|
|
95
|
+
|
|
96
|
+
- You want a **self-contained** surrogate without heavy scikit-learn dependency
|
|
97
|
+
- You need a **simple, interpretable** Gaussian kernel
|
|
98
|
+
- You want **explicit control** over hyperparameter bounds
|
|
99
|
+
- You're working with **moderate-dimensional** problems (< 20 dimensions)
|
|
100
|
+
|
|
101
|
+
### When to Use Default GP
|
|
102
|
+
|
|
103
|
+
- You need **multiple kernel types** (Matern, RationalQuadratic, etc.)
|
|
104
|
+
- You want **advanced features** like gradient-based predictions
|
|
105
|
+
- You're working with **very high-dimensional** problems
|
|
106
|
+
- You need **production-tested** robustness
|
|
107
|
+
|
|
108
|
+
## Example: Comparison
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from spotoptim import SpotOptim, Kriging
|
|
112
|
+
|
|
113
|
+
def rosenbrock(X):
|
|
114
|
+
X = np.atleast_2d(X)
|
|
115
|
+
x, y = X[:, 0], X[:, 1]
|
|
116
|
+
return (1 - x)**2 + 100 * (y - x**2)**2
|
|
117
|
+
|
|
118
|
+
bounds = [(-2, 2), (-2, 2)]
|
|
119
|
+
|
|
120
|
+
# With Kriging
|
|
121
|
+
optimizer_kriging = SpotOptim(
|
|
122
|
+
fun=rosenbrock,
|
|
123
|
+
bounds=bounds,
|
|
124
|
+
surrogate=Kriging(seed=42),
|
|
125
|
+
seed=42
|
|
126
|
+
)
|
|
127
|
+
result_kriging = optimizer_kriging.optimize()
|
|
128
|
+
|
|
129
|
+
# With default GP (no surrogate argument)
|
|
130
|
+
optimizer_gp = SpotOptim(
|
|
131
|
+
fun=rosenbrock,
|
|
132
|
+
bounds=bounds,
|
|
133
|
+
seed=42
|
|
134
|
+
)
|
|
135
|
+
result_gp = optimizer_gp.optimize()
|
|
136
|
+
|
|
137
|
+
print(f"Kriging result: {result_kriging.fun:.6f}")
|
|
138
|
+
print(f"GP result: {result_gp.fun:.6f}")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Future Extensions
|
|
142
|
+
|
|
143
|
+
Planned features for future releases:
|
|
144
|
+
|
|
145
|
+
- Additional kernel types (Matern, Exponential, etc.)
|
|
146
|
+
- Anisotropic hyperparameters
|
|
147
|
+
- Gradient information
|
|
148
|
+
- Batch predictions
|
|
149
|
+
- Parallel hyperparameter optimization
|
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simplified Kriging surrogate model for SpotOptim.
|
|
3
|
+
|
|
4
|
+
This is a streamlined version adapted from spotpython.surrogate.kriging
|
|
5
|
+
for use with the SpotOptim optimizer.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from typing import Optional, Tuple, List
|
|
10
|
+
from numpy.linalg import LinAlgError, cholesky, solve
|
|
11
|
+
from scipy.optimize import differential_evolution
|
|
12
|
+
from sklearn.base import BaseEstimator, RegressorMixin
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Kriging(BaseEstimator, RegressorMixin):
|
|
16
|
+
"""
|
|
17
|
+
A simplified Kriging (Gaussian Process) surrogate model for SpotOptim.
|
|
18
|
+
|
|
19
|
+
This class provides a scikit-learn compatible interface with fit() and predict()
|
|
20
|
+
methods, making it suitable for use as a surrogate in SpotOptim.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
noise : float, optional
|
|
25
|
+
Regularization parameter (nugget effect). If None, uses sqrt(eps).
|
|
26
|
+
kernel : str, default='gauss'
|
|
27
|
+
Kernel type. Currently only 'gauss' (Gaussian/RBF) is supported.
|
|
28
|
+
n_theta : int, optional
|
|
29
|
+
Number of theta parameters. If None, uses k (number of dimensions).
|
|
30
|
+
min_theta : float, default=-3.0
|
|
31
|
+
Minimum log10(theta) bound for optimization.
|
|
32
|
+
max_theta : float, default=2.0
|
|
33
|
+
Maximum log10(theta) bound for optimization.
|
|
34
|
+
seed : int, optional
|
|
35
|
+
Random seed for reproducibility.
|
|
36
|
+
|
|
37
|
+
Attributes
|
|
38
|
+
----------
|
|
39
|
+
X_ : ndarray of shape (n_samples, n_features)
|
|
40
|
+
Training data.
|
|
41
|
+
y_ : ndarray of shape (n_samples,)
|
|
42
|
+
Training targets.
|
|
43
|
+
theta_ : ndarray
|
|
44
|
+
Optimized theta parameters (log10 scale).
|
|
45
|
+
mu_ : float
|
|
46
|
+
Mean of the Kriging predictor.
|
|
47
|
+
sigma2_ : float
|
|
48
|
+
Variance of the Kriging predictor.
|
|
49
|
+
|
|
50
|
+
Examples
|
|
51
|
+
--------
|
|
52
|
+
>>> import numpy as np
|
|
53
|
+
>>> from spotoptim.surrogate import Kriging
|
|
54
|
+
>>> X = np.array([[0.0], [0.5], [1.0]])
|
|
55
|
+
>>> y = np.array([0.0, 0.25, 1.0])
|
|
56
|
+
>>> model = Kriging()
|
|
57
|
+
>>> model.fit(X, y)
|
|
58
|
+
>>> predictions = model.predict(np.array([[0.25], [0.75]]))
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
noise: Optional[float] = None,
|
|
64
|
+
kernel: str = "gauss",
|
|
65
|
+
n_theta: Optional[int] = None,
|
|
66
|
+
min_theta: float = -3.0,
|
|
67
|
+
max_theta: float = 2.0,
|
|
68
|
+
seed: Optional[int] = None,
|
|
69
|
+
):
|
|
70
|
+
self.noise = noise
|
|
71
|
+
self.kernel = kernel
|
|
72
|
+
self.n_theta = n_theta
|
|
73
|
+
self.min_theta = min_theta
|
|
74
|
+
self.max_theta = max_theta
|
|
75
|
+
self.seed = seed
|
|
76
|
+
|
|
77
|
+
# Fitted attributes
|
|
78
|
+
self.X_ = None
|
|
79
|
+
self.y_ = None
|
|
80
|
+
self.theta_ = None
|
|
81
|
+
self.mu_ = None
|
|
82
|
+
self.sigma2_ = None
|
|
83
|
+
self.U_ = None # Cholesky factor
|
|
84
|
+
self.Rinv_one_ = None
|
|
85
|
+
self.Rinv_r_ = None
|
|
86
|
+
|
|
87
|
+
def _get_noise(self) -> float:
|
|
88
|
+
"""Get the noise/regularization parameter."""
|
|
89
|
+
if self.noise is None:
|
|
90
|
+
return np.sqrt(np.finfo(float).eps)
|
|
91
|
+
return self.noise
|
|
92
|
+
|
|
93
|
+
def _correlation(self, D: np.ndarray) -> np.ndarray:
|
|
94
|
+
"""
|
|
95
|
+
Compute correlation from distance matrix using Gaussian kernel.
|
|
96
|
+
|
|
97
|
+
Parameters
|
|
98
|
+
----------
|
|
99
|
+
D : ndarray
|
|
100
|
+
Squared distance matrix.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
ndarray
|
|
105
|
+
Correlation matrix.
|
|
106
|
+
"""
|
|
107
|
+
if self.kernel == "gauss":
|
|
108
|
+
return np.exp(-D)
|
|
109
|
+
else:
|
|
110
|
+
raise ValueError(f"Unsupported kernel: {self.kernel}")
|
|
111
|
+
|
|
112
|
+
def _build_correlation_matrix(self, X: np.ndarray, theta: np.ndarray) -> np.ndarray:
|
|
113
|
+
"""
|
|
114
|
+
Build correlation matrix R for training data.
|
|
115
|
+
|
|
116
|
+
Parameters
|
|
117
|
+
----------
|
|
118
|
+
X : ndarray of shape (n, k)
|
|
119
|
+
Input data.
|
|
120
|
+
theta : ndarray of shape (k,)
|
|
121
|
+
Theta parameters (10^theta used as weights).
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
ndarray of shape (n, n)
|
|
126
|
+
Correlation matrix with noise on diagonal.
|
|
127
|
+
"""
|
|
128
|
+
n = X.shape[0]
|
|
129
|
+
theta10 = 10.0**theta
|
|
130
|
+
|
|
131
|
+
# Compute weighted squared distances
|
|
132
|
+
R = np.zeros((n, n))
|
|
133
|
+
for i in range(n):
|
|
134
|
+
for j in range(i + 1, n):
|
|
135
|
+
diff = X[i] - X[j]
|
|
136
|
+
dist = np.sum(theta10 * diff**2)
|
|
137
|
+
R[i, j] = dist
|
|
138
|
+
R[j, i] = dist
|
|
139
|
+
|
|
140
|
+
# Apply correlation function
|
|
141
|
+
R = self._correlation(R)
|
|
142
|
+
|
|
143
|
+
# Add noise to diagonal
|
|
144
|
+
noise_val = self._get_noise()
|
|
145
|
+
np.fill_diagonal(R, 1.0 + noise_val)
|
|
146
|
+
|
|
147
|
+
return R
|
|
148
|
+
|
|
149
|
+
def _build_correlation_vector(
|
|
150
|
+
self, x: np.ndarray, X: np.ndarray, theta: np.ndarray
|
|
151
|
+
) -> np.ndarray:
|
|
152
|
+
"""
|
|
153
|
+
Build correlation vector between new point x and training data X.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
x : ndarray of shape (k,)
|
|
158
|
+
New point.
|
|
159
|
+
X : ndarray of shape (n, k)
|
|
160
|
+
Training data.
|
|
161
|
+
theta : ndarray of shape (k,)
|
|
162
|
+
Theta parameters.
|
|
163
|
+
|
|
164
|
+
Returns
|
|
165
|
+
-------
|
|
166
|
+
ndarray of shape (n,)
|
|
167
|
+
Correlation vector.
|
|
168
|
+
"""
|
|
169
|
+
theta10 = 10.0**theta
|
|
170
|
+
diff = X - x.reshape(1, -1)
|
|
171
|
+
D = np.sum(theta10 * diff**2, axis=1)
|
|
172
|
+
return self._correlation(D)
|
|
173
|
+
|
|
174
|
+
def _neg_log_likelihood(self, log_theta: np.ndarray) -> float:
|
|
175
|
+
"""
|
|
176
|
+
Compute negative concentrated log-likelihood.
|
|
177
|
+
|
|
178
|
+
Parameters
|
|
179
|
+
----------
|
|
180
|
+
log_theta : ndarray
|
|
181
|
+
Log10(theta) parameters.
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
float
|
|
186
|
+
Negative log-likelihood (to be minimized).
|
|
187
|
+
"""
|
|
188
|
+
try:
|
|
189
|
+
n = self.X_.shape[0]
|
|
190
|
+
y = self.y_.flatten()
|
|
191
|
+
one = np.ones(n)
|
|
192
|
+
|
|
193
|
+
# Build correlation matrix
|
|
194
|
+
R = self._build_correlation_matrix(self.X_, log_theta)
|
|
195
|
+
|
|
196
|
+
# Cholesky decomposition
|
|
197
|
+
try:
|
|
198
|
+
U = cholesky(R)
|
|
199
|
+
except LinAlgError:
|
|
200
|
+
return 1e10 # Penalty for ill-conditioned matrix
|
|
201
|
+
|
|
202
|
+
# Solve for mean and variance
|
|
203
|
+
Uy = solve(U, y)
|
|
204
|
+
Uone = solve(U, one)
|
|
205
|
+
|
|
206
|
+
Rinv_y = solve(U.T, Uy)
|
|
207
|
+
Rinv_one = solve(U.T, Uone)
|
|
208
|
+
|
|
209
|
+
mu = (one @ Rinv_y) / (one @ Rinv_one)
|
|
210
|
+
r = y - one * mu
|
|
211
|
+
|
|
212
|
+
Ur = solve(U, r)
|
|
213
|
+
Rinv_r = solve(U.T, Ur)
|
|
214
|
+
|
|
215
|
+
sigma2 = (r @ Rinv_r) / n
|
|
216
|
+
|
|
217
|
+
if sigma2 <= 0:
|
|
218
|
+
return 1e10
|
|
219
|
+
|
|
220
|
+
# Concentrated log-likelihood
|
|
221
|
+
log_det_R = 2.0 * np.sum(np.log(np.abs(np.diag(U))))
|
|
222
|
+
neg_log_like = (n / 2.0) * np.log(sigma2) + 0.5 * log_det_R
|
|
223
|
+
|
|
224
|
+
return neg_log_like
|
|
225
|
+
|
|
226
|
+
except (LinAlgError, ValueError):
|
|
227
|
+
return 1e10
|
|
228
|
+
|
|
229
|
+
def fit(self, X: np.ndarray, y: np.ndarray) -> "Kriging":
|
|
230
|
+
"""
|
|
231
|
+
Fit the Kriging model to training data.
|
|
232
|
+
|
|
233
|
+
Parameters
|
|
234
|
+
----------
|
|
235
|
+
X : ndarray of shape (n_samples, n_features)
|
|
236
|
+
Training input data.
|
|
237
|
+
y : ndarray of shape (n_samples,)
|
|
238
|
+
Training target values.
|
|
239
|
+
|
|
240
|
+
Returns
|
|
241
|
+
-------
|
|
242
|
+
self
|
|
243
|
+
Fitted estimator.
|
|
244
|
+
"""
|
|
245
|
+
X = np.atleast_2d(X)
|
|
246
|
+
y = np.asarray(y).flatten()
|
|
247
|
+
|
|
248
|
+
if X.ndim != 2:
|
|
249
|
+
raise ValueError(f"X must be 2-dimensional, got shape {X.shape}")
|
|
250
|
+
if y.ndim != 1:
|
|
251
|
+
raise ValueError(f"y must be 1-dimensional, got shape {y.shape}")
|
|
252
|
+
if X.shape[0] != y.shape[0]:
|
|
253
|
+
raise ValueError(f"X and y must have same number of samples")
|
|
254
|
+
|
|
255
|
+
self.X_ = X
|
|
256
|
+
self.y_ = y
|
|
257
|
+
n, k = X.shape
|
|
258
|
+
|
|
259
|
+
# Set number of theta parameters
|
|
260
|
+
if self.n_theta is None:
|
|
261
|
+
self.n_theta = k
|
|
262
|
+
|
|
263
|
+
# Optimize theta via maximum likelihood
|
|
264
|
+
bounds = [(self.min_theta, self.max_theta)] * self.n_theta
|
|
265
|
+
|
|
266
|
+
result = differential_evolution(
|
|
267
|
+
func=self._neg_log_likelihood,
|
|
268
|
+
bounds=bounds,
|
|
269
|
+
seed=self.seed,
|
|
270
|
+
maxiter=100,
|
|
271
|
+
atol=1e-6,
|
|
272
|
+
tol=0.01,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
self.theta_ = result.x
|
|
276
|
+
|
|
277
|
+
# Compute final model parameters
|
|
278
|
+
one = np.ones(n)
|
|
279
|
+
R = self._build_correlation_matrix(X, self.theta_)
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
self.U_ = cholesky(R)
|
|
283
|
+
except LinAlgError:
|
|
284
|
+
# Add more regularization if needed
|
|
285
|
+
R = self._build_correlation_matrix(X, self.theta_)
|
|
286
|
+
R += np.eye(n) * 1e-8
|
|
287
|
+
self.U_ = cholesky(R)
|
|
288
|
+
|
|
289
|
+
Uy = solve(self.U_, y)
|
|
290
|
+
Uone = solve(self.U_, one)
|
|
291
|
+
|
|
292
|
+
Rinv_y = solve(self.U_.T, Uy)
|
|
293
|
+
Rinv_one = solve(self.U_.T, Uone)
|
|
294
|
+
|
|
295
|
+
self.mu_ = float((one @ Rinv_y) / (one @ Rinv_one))
|
|
296
|
+
|
|
297
|
+
r = y - one * self.mu_
|
|
298
|
+
Ur = solve(self.U_, r)
|
|
299
|
+
Rinv_r = solve(self.U_.T, Ur)
|
|
300
|
+
|
|
301
|
+
self.sigma2_ = float((r @ Rinv_r) / n)
|
|
302
|
+
|
|
303
|
+
# Store for prediction
|
|
304
|
+
self.Rinv_one_ = Rinv_one
|
|
305
|
+
self.Rinv_r_ = Rinv_r
|
|
306
|
+
|
|
307
|
+
return self
|
|
308
|
+
|
|
309
|
+
def predict(self, X: np.ndarray, return_std: bool = False) -> np.ndarray:
|
|
310
|
+
"""
|
|
311
|
+
Predict using the Kriging model.
|
|
312
|
+
|
|
313
|
+
Parameters
|
|
314
|
+
----------
|
|
315
|
+
X : ndarray of shape (n_samples, n_features)
|
|
316
|
+
Points to predict at.
|
|
317
|
+
return_std : bool, default=False
|
|
318
|
+
If True, return standard deviations as well.
|
|
319
|
+
|
|
320
|
+
Returns
|
|
321
|
+
-------
|
|
322
|
+
y_pred : ndarray of shape (n_samples,)
|
|
323
|
+
Predicted values.
|
|
324
|
+
y_std : ndarray of shape (n_samples,), optional
|
|
325
|
+
Standard deviations (only if return_std=True).
|
|
326
|
+
"""
|
|
327
|
+
X = np.atleast_2d(X)
|
|
328
|
+
|
|
329
|
+
if X.ndim == 1:
|
|
330
|
+
X = X.reshape(1, -1)
|
|
331
|
+
|
|
332
|
+
if X.shape[1] != self.X_.shape[1]:
|
|
333
|
+
raise ValueError(
|
|
334
|
+
f"X has {X.shape[1]} features, expected {self.X_.shape[1]}"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
n_pred = X.shape[0]
|
|
338
|
+
predictions = np.zeros(n_pred)
|
|
339
|
+
|
|
340
|
+
if return_std:
|
|
341
|
+
std_devs = np.zeros(n_pred)
|
|
342
|
+
|
|
343
|
+
for i, x in enumerate(X):
|
|
344
|
+
# Build correlation vector
|
|
345
|
+
psi = self._build_correlation_vector(x, self.X_, self.theta_)
|
|
346
|
+
|
|
347
|
+
# Predict mean
|
|
348
|
+
predictions[i] = self.mu_ + psi @ self.Rinv_r_
|
|
349
|
+
|
|
350
|
+
if return_std:
|
|
351
|
+
# Predict variance
|
|
352
|
+
Upsi = solve(self.U_, psi)
|
|
353
|
+
psi_Rinv_psi = psi @ solve(self.U_.T, Upsi)
|
|
354
|
+
|
|
355
|
+
variance = self.sigma2_ * (1.0 + self._get_noise() - psi_Rinv_psi)
|
|
356
|
+
std_devs[i] = np.sqrt(max(0.0, variance))
|
|
357
|
+
|
|
358
|
+
if return_std:
|
|
359
|
+
return predictions, std_devs
|
|
360
|
+
return predictions
|
spotoptim-0.0.3/PKG-INFO
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: spotoptim
|
|
3
|
-
Version: 0.0.3
|
|
4
|
-
Summary: Add your description here
|
|
5
|
-
Author: bartzbeielstein
|
|
6
|
-
Author-email: bartzbeielstein <32470350+bartzbeielstein@users.noreply.github.com>
|
|
7
|
-
Requires-Dist: fastapi>=0.121.1
|
|
8
|
-
Requires-Dist: numpy>=1.24.3
|
|
9
|
-
Requires-Dist: scipy>=1.10.1
|
|
10
|
-
Requires-Dist: uvicorn>=0.22.0
|
|
11
|
-
Requires-Dist: scikit-learn
|
|
12
|
-
Requires-Dist: spotpython
|
|
13
|
-
Requires-Python: >=3.10
|
|
14
|
-
Description-Content-Type: text/markdown
|
|
15
|
-
|
spotoptim-0.0.3/README.md
DELETED
|
File without changes
|
|
File without changes
|