tequila-basic 1.9.9__py3-none-any.whl → 1.9.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tequila/__init__.py +29 -14
- tequila/apps/__init__.py +14 -5
- tequila/apps/_unary_state_prep_impl.py +145 -112
- tequila/apps/adapt/__init__.py +9 -1
- tequila/apps/adapt/adapt.py +154 -113
- tequila/apps/krylov/__init__.py +1 -1
- tequila/apps/krylov/krylov.py +23 -21
- tequila/apps/robustness/helpers.py +10 -6
- tequila/apps/robustness/interval.py +238 -156
- tequila/apps/unary_state_prep.py +29 -23
- tequila/autograd_imports.py +8 -5
- tequila/circuit/__init__.py +2 -1
- tequila/circuit/_gates_impl.py +135 -67
- tequila/circuit/circuit.py +163 -79
- tequila/circuit/compiler.py +114 -105
- tequila/circuit/gates.py +288 -120
- tequila/circuit/gradient.py +35 -23
- tequila/circuit/noise.py +83 -74
- tequila/circuit/postselection.py +120 -0
- tequila/circuit/pyzx.py +10 -6
- tequila/circuit/qasm.py +201 -83
- tequila/circuit/qpic.py +63 -61
- tequila/grouping/binary_rep.py +148 -146
- tequila/grouping/binary_utils.py +84 -75
- tequila/grouping/compile_groups.py +334 -230
- tequila/grouping/ev_utils.py +77 -41
- tequila/grouping/fermionic_functions.py +383 -308
- tequila/grouping/fermionic_methods.py +170 -123
- tequila/grouping/overlapping_methods.py +69 -52
- tequila/hamiltonian/paulis.py +12 -13
- tequila/hamiltonian/paulistring.py +1 -1
- tequila/hamiltonian/qubit_hamiltonian.py +45 -35
- tequila/ml/__init__.py +1 -0
- tequila/ml/interface_torch.py +19 -16
- tequila/ml/ml_api.py +11 -10
- tequila/ml/utils_ml.py +12 -11
- tequila/objective/__init__.py +8 -3
- tequila/objective/braket.py +55 -47
- tequila/objective/objective.py +87 -55
- tequila/objective/qtensor.py +36 -27
- tequila/optimizers/__init__.py +31 -23
- tequila/optimizers/_containers.py +11 -7
- tequila/optimizers/optimizer_base.py +111 -83
- tequila/optimizers/optimizer_gd.py +258 -231
- tequila/optimizers/optimizer_gpyopt.py +56 -42
- tequila/optimizers/optimizer_scipy.py +157 -112
- tequila/quantumchemistry/__init__.py +66 -38
- tequila/quantumchemistry/chemistry_tools.py +393 -209
- tequila/quantumchemistry/encodings.py +121 -13
- tequila/quantumchemistry/madness_interface.py +170 -96
- tequila/quantumchemistry/orbital_optimizer.py +86 -41
- tequila/quantumchemistry/psi4_interface.py +166 -97
- tequila/quantumchemistry/pyscf_interface.py +70 -23
- tequila/quantumchemistry/qc_base.py +866 -414
- tequila/simulators/__init__.py +0 -3
- tequila/simulators/simulator_api.py +247 -105
- tequila/simulators/simulator_aqt.py +102 -0
- tequila/simulators/simulator_base.py +147 -53
- tequila/simulators/simulator_cirq.py +58 -42
- tequila/simulators/simulator_cudaq.py +600 -0
- tequila/simulators/simulator_ddsim.py +390 -0
- tequila/simulators/simulator_mqp.py +30 -0
- tequila/simulators/simulator_pyquil.py +190 -171
- tequila/simulators/simulator_qibo.py +95 -87
- tequila/simulators/simulator_qiskit.py +119 -107
- tequila/simulators/simulator_qlm.py +52 -26
- tequila/simulators/simulator_qulacs.py +74 -52
- tequila/simulators/simulator_spex.py +95 -60
- tequila/simulators/simulator_symbolic.py +6 -5
- tequila/simulators/test_spex_simulator.py +8 -11
- tequila/tools/convenience.py +4 -4
- tequila/tools/qng.py +72 -64
- tequila/tools/random_generators.py +38 -34
- tequila/utils/bitstrings.py +7 -7
- tequila/utils/exceptions.py +19 -5
- tequila/utils/joined_transformation.py +8 -10
- tequila/utils/keymap.py +0 -5
- tequila/utils/misc.py +6 -4
- tequila/version.py +1 -1
- tequila/wavefunction/qubit_wavefunction.py +47 -28
- {tequila_basic-1.9.9.dist-info → tequila_basic-1.9.10.dist-info}/METADATA +13 -16
- tequila_basic-1.9.10.dist-info/RECORD +93 -0
- {tequila_basic-1.9.9.dist-info → tequila_basic-1.9.10.dist-info}/WHEEL +1 -1
- tequila_basic-1.9.9.dist-info/RECORD +0 -88
- {tequila_basic-1.9.9.dist-info → tequila_basic-1.9.10.dist-info}/licenses/LICENSE +0 -0
- {tequila_basic-1.9.9.dist-info → tequila_basic-1.9.10.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,6 @@
|
|
1
|
-
import numpy
|
1
|
+
import numpy
|
2
|
+
import typing
|
3
|
+
import numbers
|
2
4
|
from tequila.objective import Objective
|
3
5
|
from tequila.objective.objective import Variable, format_variable_dictionary
|
4
6
|
from .optimizer_base import Optimizer, OptimizerResults, dataclass
|
@@ -6,12 +8,13 @@ from tequila.circuit.noise import NoiseModel
|
|
6
8
|
from tequila.tools.qng import get_qng_combos, CallableVector, QNGVector
|
7
9
|
from tequila.utils import TequilaException
|
8
10
|
|
11
|
+
|
9
12
|
@dataclass
|
10
13
|
class GDResults(OptimizerResults):
|
11
|
-
|
12
14
|
moments: dict = None
|
13
15
|
num_iteration: int = 0
|
14
|
-
|
16
|
+
|
17
|
+
|
15
18
|
class OptimizerGD(Optimizer):
|
16
19
|
"""
|
17
20
|
The gradient descent optimizer for tequila.
|
@@ -85,36 +88,49 @@ class OptimizerGD(Optimizer):
|
|
85
88
|
|
86
89
|
|
87
90
|
"""
|
91
|
+
|
88
92
|
@classmethod
|
89
93
|
def available_methods(cls):
|
90
94
|
""":return: All tested available methods"""
|
91
|
-
return [
|
92
|
-
|
95
|
+
return [
|
96
|
+
"adam",
|
97
|
+
"adagrad",
|
98
|
+
"adamax",
|
99
|
+
"nadam",
|
100
|
+
"sgd",
|
101
|
+
"momentum",
|
102
|
+
"nesterov",
|
103
|
+
"rmsprop",
|
104
|
+
"rmsprop-nesterov",
|
105
|
+
"spsa",
|
106
|
+
]
|
93
107
|
|
94
108
|
@classmethod
|
95
109
|
def available_diis(cls):
|
96
110
|
""":return: All tested methods that can be diis accelerated"""
|
97
|
-
return [
|
98
|
-
|
99
|
-
def __init__(
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
111
|
+
return ["sgd"]
|
112
|
+
|
113
|
+
def __init__(
|
114
|
+
self,
|
115
|
+
maxiter=100,
|
116
|
+
method="sgd",
|
117
|
+
tol: numbers.Real = None,
|
118
|
+
lr: typing.Union[numbers.Real, typing.List[numbers.Real]] = 0.1,
|
119
|
+
alpha: numbers.Real = None,
|
120
|
+
gamma: numbers.Real = None,
|
121
|
+
beta: numbers.Real = 0.9,
|
122
|
+
rho: numbers.Real = 0.999,
|
123
|
+
c: typing.Union[numbers.Real, typing.List[numbers.Real]] = 0.2,
|
124
|
+
epsilon: numbers.Real = 1.0 * 10 ** (-7),
|
125
|
+
diis: typing.Optional[dict] = None,
|
126
|
+
backend=None,
|
127
|
+
samples=None,
|
128
|
+
device=None,
|
129
|
+
noise=None,
|
130
|
+
silent=True,
|
131
|
+
calibrate_lr: bool = False,
|
132
|
+
**kwargs,
|
133
|
+
):
|
118
134
|
"""
|
119
135
|
|
120
136
|
Parameters
|
@@ -169,22 +185,21 @@ class OptimizerGD(Optimizer):
|
|
169
185
|
kwargs
|
170
186
|
"""
|
171
187
|
|
172
|
-
super().__init__(
|
173
|
-
|
174
|
-
|
175
|
-
**kwargs)
|
188
|
+
super().__init__(
|
189
|
+
maxiter=maxiter, samples=samples, device=device, backend=backend, silent=silent, noise=noise, **kwargs
|
190
|
+
)
|
176
191
|
method_dict = {
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
192
|
+
"adam": self._adam,
|
193
|
+
"adagrad": self._adagrad,
|
194
|
+
"adamax": self._adamax,
|
195
|
+
"nadam": self._nadam,
|
196
|
+
"sgd": self._sgd,
|
197
|
+
"momentum": self._momentum,
|
198
|
+
"nesterov": self._nesterov,
|
199
|
+
"rmsprop": self._rms,
|
200
|
+
"rmsprop-nesterov": self._rms_nesterov,
|
201
|
+
"spsa": self._spsa,
|
202
|
+
}
|
188
203
|
|
189
204
|
self.f = method_dict[method.lower()]
|
190
205
|
self.gradient_lookup = {}
|
@@ -219,28 +234,31 @@ class OptimizerGD(Optimizer):
|
|
219
234
|
else:
|
220
235
|
raise TypeError("Type of DIIS is not dict")
|
221
236
|
|
222
|
-
if
|
237
|
+
if isinstance(lr, list):
|
223
238
|
self.nextLRIndex = 0
|
224
239
|
for i in lr:
|
225
|
-
assert
|
240
|
+
assert i > 0.0
|
226
241
|
else:
|
227
242
|
self.nextLRIndex = -1
|
228
|
-
assert
|
243
|
+
assert lr > 0.0
|
229
244
|
|
230
|
-
assert all([k > .0 for k in [beta, rho, epsilon]])
|
245
|
+
assert all([k > 0.0 for k in [beta, rho, epsilon]])
|
231
246
|
self.tol = tol
|
232
247
|
if self.tol is not None:
|
233
248
|
self.tol = abs(float(tol))
|
234
249
|
|
235
|
-
def __call__(
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
250
|
+
def __call__(
|
251
|
+
self,
|
252
|
+
objective: Objective,
|
253
|
+
maxiter: int = None,
|
254
|
+
initial_values: typing.Dict[Variable, numbers.Real] = None,
|
255
|
+
variables: typing.List[Variable] = None,
|
256
|
+
reset_history: bool = True,
|
257
|
+
method_options: dict = None,
|
258
|
+
gradient=None,
|
259
|
+
*args,
|
260
|
+
**kwargs,
|
261
|
+
) -> GDResults:
|
244
262
|
"""
|
245
263
|
perform a gradient descent optimization of an objective.
|
246
264
|
|
@@ -273,7 +291,6 @@ class OptimizerGD(Optimizer):
|
|
273
291
|
all the results of optimization.
|
274
292
|
"""
|
275
293
|
|
276
|
-
|
277
294
|
if self.save_history and reset_history:
|
278
295
|
self.reset_history()
|
279
296
|
|
@@ -310,10 +327,10 @@ class OptimizerGD(Optimizer):
|
|
310
327
|
best = e
|
311
328
|
best_angles = v
|
312
329
|
|
313
|
-
if self.tol
|
330
|
+
if self.tol is not None:
|
314
331
|
if numpy.abs(e - last) <= self.tol:
|
315
332
|
if not self.silent:
|
316
|
-
print(
|
333
|
+
print("delta f smaller than tolerance {}. Stopping optimization.".format(str(self.tol)))
|
317
334
|
break
|
318
335
|
|
319
336
|
### get new parameters with self.step!
|
@@ -322,37 +339,43 @@ class OptimizerGD(Optimizer):
|
|
322
339
|
# From http://vergil.chemistry.gatech.edu/notes/diis/node3.html
|
323
340
|
if self.__diis:
|
324
341
|
self.__diis.push(
|
325
|
-
numpy.array([vn[k] for k in active_angles]),
|
326
|
-
|
342
|
+
numpy.array([vn[k] for k in active_angles]), numpy.array([vn[k] - v[k] for k in active_angles])
|
343
|
+
)
|
327
344
|
|
328
345
|
new = self.__diis.update()
|
329
346
|
if new is not None:
|
330
347
|
self.reset_momenta()
|
331
348
|
comment = "DIIS"
|
332
|
-
for i,k in enumerate(active_angles):
|
349
|
+
for i, k in enumerate(active_angles):
|
333
350
|
vn[k] = new[i]
|
334
351
|
|
335
|
-
|
336
352
|
if not self.silent:
|
337
353
|
self.__dx = numpy.asarray(self.__dx)
|
338
|
-
print(
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
354
|
+
print(
|
355
|
+
"%3i %+15.8f %+7.2e %7.3e %7.3e %s"
|
356
|
+
% (
|
357
|
+
step,
|
358
|
+
e,
|
359
|
+
e - last,
|
360
|
+
numpy.max([abs(x) for x in self.__dx]),
|
361
|
+
numpy.sqrt(numpy.average(self.__dx**2)),
|
362
|
+
comment,
|
363
|
+
)
|
364
|
+
)
|
346
365
|
|
347
366
|
last = e
|
348
367
|
v = vn
|
349
368
|
self.iteration += 1
|
350
369
|
E_final, angles_final = best, best_angles
|
351
|
-
return GDResults(
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
370
|
+
return GDResults(
|
371
|
+
energy=E_final,
|
372
|
+
variables=format_variable_dictionary(angles_final),
|
373
|
+
history=self.history,
|
374
|
+
moments=self.moments_trajectory[id(comp)],
|
375
|
+
num_iteration=self.iteration,
|
376
|
+
)
|
377
|
+
|
378
|
+
def prepare(self, objective: Objective, initial_values: dict = None, variables: list = None, gradient=None):
|
356
379
|
"""
|
357
380
|
perform all initialization for an objective, register it with lookup tables, and return it compiled.
|
358
381
|
MUST be called before step is used.
|
@@ -380,45 +403,60 @@ class OptimizerGD(Optimizer):
|
|
380
403
|
active_angles, passive_angles, variables = self.initialize_variables(objective, initial_values, variables)
|
381
404
|
comp = self.compile_objective(objective=objective)
|
382
405
|
for arg in comp.args:
|
383
|
-
if hasattr(arg,
|
406
|
+
if hasattr(arg, "U"):
|
384
407
|
if arg.U.device is not None:
|
385
408
|
# don't retrieve computer 100 times; pyquil errors out if this happens!
|
386
409
|
self.device = arg.U.device
|
387
410
|
break
|
388
411
|
|
389
|
-
if
|
412
|
+
if self.f == self._spsa:
|
390
413
|
gradient = {"method": "standard_spsa", "stepsize": self.c, "gamma": self.gamma}
|
391
414
|
|
392
415
|
compile_gradient = True
|
393
416
|
dE = None
|
394
417
|
if isinstance(gradient, str):
|
395
|
-
if gradient.lower() ==
|
418
|
+
if gradient.lower() == "qng":
|
396
419
|
compile_gradient = False
|
397
420
|
|
398
|
-
combos = get_qng_combos(
|
399
|
-
|
400
|
-
|
401
|
-
|
421
|
+
combos = get_qng_combos(
|
422
|
+
objective,
|
423
|
+
initial_values=initial_values,
|
424
|
+
backend=self.backend,
|
425
|
+
device=self.device,
|
426
|
+
samples=self.samples,
|
427
|
+
noise=self.noise,
|
428
|
+
)
|
402
429
|
dE = QNGVector(combos)
|
403
430
|
else:
|
404
|
-
gradient = {"method": gradient, "stepsize": 1.
|
431
|
+
gradient = {"method": gradient, "stepsize": 1.0e-4}
|
405
432
|
|
406
|
-
elif isinstance(gradient,dict):
|
407
|
-
if gradient[
|
408
|
-
func = gradient[
|
433
|
+
elif isinstance(gradient, dict):
|
434
|
+
if gradient["method"] == "qng":
|
435
|
+
func = gradient["function"]
|
409
436
|
compile_gradient = False
|
410
|
-
combos = get_qng_combos(
|
411
|
-
|
412
|
-
|
437
|
+
combos = get_qng_combos(
|
438
|
+
objective,
|
439
|
+
func=func,
|
440
|
+
initial_values=initial_values,
|
441
|
+
backend=self.backend,
|
442
|
+
device=self.device,
|
443
|
+
samples=self.samples,
|
444
|
+
noise=self.noise,
|
445
|
+
)
|
413
446
|
dE = QNGVector(combos)
|
414
447
|
|
415
448
|
if compile_gradient:
|
416
449
|
grad_obj, comp_grad_obj = self.compile_gradient(objective=objective, variables=variables, gradient=gradient)
|
417
|
-
spsa =
|
450
|
+
spsa = (
|
451
|
+
isinstance(gradient, dict)
|
452
|
+
and "method" in gradient
|
453
|
+
and isinstance(gradient["method"], str)
|
454
|
+
and "spsa" in gradient["method"].lower()
|
455
|
+
)
|
418
456
|
if spsa:
|
419
457
|
dE = comp_grad_obj
|
420
|
-
if
|
421
|
-
self.lr = dE.calibrated_lr(self.lr,initial_values, 50, samples=self.samples)
|
458
|
+
if self.calibrate_lr:
|
459
|
+
self.lr = dE.calibrated_lr(self.lr, initial_values, 50, samples=self.samples)
|
422
460
|
else:
|
423
461
|
dE = CallableVector([comp_grad_obj[k] for k in comp_grad_obj.keys()])
|
424
462
|
|
@@ -444,8 +482,9 @@ class OptimizerGD(Optimizer):
|
|
444
482
|
self.step_lookup[ostring] = 0
|
445
483
|
return comp
|
446
484
|
|
447
|
-
def step(
|
448
|
-
|
485
|
+
def step(
|
486
|
+
self, objective: Objective, parameters: typing.Dict[Variable, numbers.Real]
|
487
|
+
) -> typing.Dict[Variable, numbers.Real]:
|
449
488
|
"""
|
450
489
|
perform a single optimization step and return suggested parameters.
|
451
490
|
Parameters
|
@@ -466,15 +505,18 @@ class OptimizerGD(Optimizer):
|
|
466
505
|
active_keys = self.active_key_lookup[s]
|
467
506
|
last_moment = self.moments_lookup[s]
|
468
507
|
adam_step = self.step_lookup[s]
|
469
|
-
except:
|
508
|
+
except Exception:
|
470
509
|
raise TequilaException(
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
510
|
+
"Could not retrieve necessary information. Please use the prepare function before optimizing!"
|
511
|
+
)
|
512
|
+
new, moments, grads = self.f(
|
513
|
+
step=adam_step,
|
514
|
+
gradients=gradients,
|
515
|
+
active_keys=active_keys,
|
516
|
+
moments=last_moment,
|
517
|
+
v=parameters,
|
518
|
+
iteration=self.iteration,
|
519
|
+
)
|
478
520
|
back = {**parameters}
|
479
521
|
for k in new.keys():
|
480
522
|
back[k] = new[k]
|
@@ -486,7 +528,7 @@ class OptimizerGD(Optimizer):
|
|
486
528
|
save_grad[k] = grads[i]
|
487
529
|
self.history.gradients.append(save_grad)
|
488
530
|
self.step_lookup[s] += 1
|
489
|
-
self.__dx = grads
|
531
|
+
self.__dx = grads # most recent gradient
|
490
532
|
return back
|
491
533
|
|
492
534
|
def reset_stepper(self):
|
@@ -539,13 +581,10 @@ class OptimizerGD(Optimizer):
|
|
539
581
|
self.moments_lookup[k] = (first, second)
|
540
582
|
self.moments_trajectory[k] = [(first, second)]
|
541
583
|
self.step_lookup[k] = 0
|
542
|
-
except:
|
543
|
-
print(
|
544
|
-
|
545
|
-
def _adam(self, gradients, step,
|
546
|
-
v, moments, active_keys,
|
547
|
-
**kwargs):
|
584
|
+
except Exception:
|
585
|
+
print("found no compiled objective with id {} in lookup. Did you pass the correct object?".format(k))
|
548
586
|
|
587
|
+
def _adam(self, gradients, step, v, moments, active_keys, **kwargs):
|
549
588
|
learningRate = self.nextLearningRate()
|
550
589
|
t = step + 1
|
551
590
|
s = moments[0]
|
@@ -553,11 +592,11 @@ class OptimizerGD(Optimizer):
|
|
553
592
|
grads = gradients(v, samples=self.samples)
|
554
593
|
s = self.beta * s + (1 - self.beta) * grads
|
555
594
|
r = self.rho * r + (1 - self.rho) * numpy.square(grads)
|
556
|
-
s_hat = s / (1 - self.beta
|
557
|
-
r_hat = r / (1 - self.rho
|
595
|
+
s_hat = s / (1 - self.beta**t)
|
596
|
+
r_hat = r / (1 - self.rho**t)
|
558
597
|
updates = []
|
559
598
|
for i in range(len(grads)):
|
560
|
-
rule = -
|
599
|
+
rule = -learningRate * s_hat[i] / (numpy.sqrt(r_hat[i]) + self.epsilon)
|
561
600
|
updates.append(rule)
|
562
601
|
new = {}
|
563
602
|
for i, k in enumerate(active_keys):
|
@@ -565,9 +604,7 @@ class OptimizerGD(Optimizer):
|
|
565
604
|
back_moment = [s, r]
|
566
605
|
return new, back_moment, grads
|
567
606
|
|
568
|
-
def _adagrad(self, gradients,
|
569
|
-
v, moments, active_keys, **kwargs):
|
570
|
-
|
607
|
+
def _adagrad(self, gradients, v, moments, active_keys, **kwargs):
|
571
608
|
learningRate = self.nextLearningRate()
|
572
609
|
r = moments[1]
|
573
610
|
grads = gradients(v, self.samples)
|
@@ -580,9 +617,7 @@ class OptimizerGD(Optimizer):
|
|
580
617
|
back_moments = [moments[0], r]
|
581
618
|
return new, back_moments, grads
|
582
619
|
|
583
|
-
def _adamax(self, gradients,
|
584
|
-
v, moments, active_keys, **kwargs):
|
585
|
-
|
620
|
+
def _adamax(self, gradients, v, moments, active_keys, **kwargs):
|
586
621
|
learningRate = self.nextLearningRate()
|
587
622
|
s = moments[0]
|
588
623
|
r = moments[1]
|
@@ -591,7 +626,7 @@ class OptimizerGD(Optimizer):
|
|
591
626
|
r = self.rho * r + (1 - self.rho) * numpy.linalg.norm(grads, numpy.inf)
|
592
627
|
updates = []
|
593
628
|
for i in range(len(grads)):
|
594
|
-
rule = -
|
629
|
+
rule = -learningRate * s[i] / r[i]
|
595
630
|
updates.append(rule)
|
596
631
|
new = {}
|
597
632
|
for i, k in enumerate(active_keys):
|
@@ -599,10 +634,7 @@ class OptimizerGD(Optimizer):
|
|
599
634
|
back_moment = [s, r]
|
600
635
|
return new, back_moment, grads
|
601
636
|
|
602
|
-
def _nadam(self, step, gradients,
|
603
|
-
v, moments, active_keys,
|
604
|
-
**kwargs):
|
605
|
-
|
637
|
+
def _nadam(self, step, gradients, v, moments, active_keys, **kwargs):
|
606
638
|
learningRate = self.nextLearningRate()
|
607
639
|
s = moments[0]
|
608
640
|
r = moments[1]
|
@@ -610,12 +642,15 @@ class OptimizerGD(Optimizer):
|
|
610
642
|
grads = gradients(v, samples=self.samples)
|
611
643
|
s = self.beta * s + (1 - self.beta) * grads
|
612
644
|
r = self.rho * r + (1 - self.rho) * numpy.square(grads)
|
613
|
-
s_hat = s / (1 - self.beta
|
614
|
-
r_hat = r / (1 - self.rho
|
645
|
+
s_hat = s / (1 - self.beta**t)
|
646
|
+
r_hat = r / (1 - self.rho**t)
|
615
647
|
updates = []
|
616
648
|
for i in range(len(grads)):
|
617
|
-
rule =
|
618
|
-
|
649
|
+
rule = (
|
650
|
+
-learningRate
|
651
|
+
* (self.beta * s_hat[i] + (1 - self.beta) * grads[i] / (1 - self.beta**t))
|
652
|
+
/ (numpy.sqrt(r_hat[i]) + self.epsilon)
|
653
|
+
)
|
619
654
|
updates.append(rule)
|
620
655
|
new = {}
|
621
656
|
for i, k in enumerate(active_keys):
|
@@ -623,9 +658,7 @@ class OptimizerGD(Optimizer):
|
|
623
658
|
back_moment = [s, r]
|
624
659
|
return new, back_moment, grads
|
625
660
|
|
626
|
-
def _sgd(self, gradients,
|
627
|
-
v, moments, active_keys, **kwargs):
|
628
|
-
|
661
|
+
def _sgd(self, gradients, v, moments, active_keys, **kwargs):
|
629
662
|
learningRate = self.nextLearningRate()
|
630
663
|
grads = gradients(v, samples=self.samples)
|
631
664
|
new = {}
|
@@ -634,7 +667,6 @@ class OptimizerGD(Optimizer):
|
|
634
667
|
return new, moments, grads
|
635
668
|
|
636
669
|
def _spsa(self, gradients, v, moments, active_keys, **kwargs):
|
637
|
-
|
638
670
|
learningRate = self.nextLearningRate()
|
639
671
|
grads = gradients(v, samples=self.samples, iteration=self.iteration)
|
640
672
|
new = {}
|
@@ -642,9 +674,7 @@ class OptimizerGD(Optimizer):
|
|
642
674
|
new[k] = v[k] - learningRate * grads[i]
|
643
675
|
return new, moments, grads
|
644
676
|
|
645
|
-
def _momentum(self, gradients,
|
646
|
-
v, moments, active_keys, **kwargs):
|
647
|
-
|
677
|
+
def _momentum(self, gradients, v, moments, active_keys, **kwargs):
|
648
678
|
learningRate = self.nextLearningRate()
|
649
679
|
m = moments[0]
|
650
680
|
grads = gradients(v, samples=self.samples)
|
@@ -657,9 +687,7 @@ class OptimizerGD(Optimizer):
|
|
657
687
|
back_moments = [m, moments[1]]
|
658
688
|
return new, back_moments, grads
|
659
689
|
|
660
|
-
def _nesterov(self, gradients,
|
661
|
-
v, moments, active_keys, **kwargs):
|
662
|
-
|
690
|
+
def _nesterov(self, gradients, v, moments, active_keys, **kwargs):
|
663
691
|
learningRate = self.nextLearningRate()
|
664
692
|
m = moments[0]
|
665
693
|
|
@@ -681,10 +709,7 @@ class OptimizerGD(Optimizer):
|
|
681
709
|
back_moments = [m, moments[1]]
|
682
710
|
return new, back_moments, grads
|
683
711
|
|
684
|
-
def _rms(self, gradients,
|
685
|
-
v, moments, active_keys,
|
686
|
-
**kwargs):
|
687
|
-
|
712
|
+
def _rms(self, gradients, v, moments, active_keys, **kwargs):
|
688
713
|
learningRate = self.nextLearningRate()
|
689
714
|
r = moments[1]
|
690
715
|
grads = gradients(v, samples=self.samples)
|
@@ -696,10 +721,7 @@ class OptimizerGD(Optimizer):
|
|
696
721
|
back_moments = [moments[0], r]
|
697
722
|
return new, back_moments, grads
|
698
723
|
|
699
|
-
def _rms_nesterov(self, gradients,
|
700
|
-
v, moments, active_keys,
|
701
|
-
**kwargs):
|
702
|
-
|
724
|
+
def _rms_nesterov(self, gradients, v, moments, active_keys, **kwargs):
|
703
725
|
learningRate = self.nextLearningRate()
|
704
726
|
m = moments[0]
|
705
727
|
r = moments[1]
|
@@ -725,30 +747,32 @@ class OptimizerGD(Optimizer):
|
|
725
747
|
return new, back_moments, grads
|
726
748
|
|
727
749
|
def nextLearningRate(self):
|
728
|
-
"""
|
750
|
+
"""Return the learning rate to use
|
729
751
|
|
730
752
|
Returns
|
731
753
|
-------
|
732
754
|
float representing the learning rate to use
|
733
755
|
"""
|
734
|
-
if
|
735
|
-
if
|
736
|
-
return self.lr/(self.iteration
|
756
|
+
if self.nextLRIndex == -1:
|
757
|
+
if self.alpha is not None:
|
758
|
+
return self.lr / (self.iteration**self.alpha)
|
737
759
|
return self.lr
|
738
760
|
else:
|
739
|
-
if
|
761
|
+
if self.nextLRIndex != len(self.lr) - 1:
|
740
762
|
self.nextLRIndex += 1
|
741
|
-
return self.lr[self.nextLRIndex-1]
|
763
|
+
return self.lr[self.nextLRIndex - 1]
|
742
764
|
else:
|
743
765
|
return self.lr[self.nextLRIndex]
|
744
766
|
|
767
|
+
|
745
768
|
class DIIS:
|
746
|
-
def __init__(
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
769
|
+
def __init__(
|
770
|
+
self: "DIIS",
|
771
|
+
ndiis: int = 8,
|
772
|
+
min_vectors: int = 3,
|
773
|
+
tol: float = 5e-2,
|
774
|
+
drop: str = "error",
|
775
|
+
) -> None:
|
752
776
|
"""DIIS accelerator for gradient descent methods.
|
753
777
|
|
754
778
|
Setup a DIIS accelerator. Every gradient step, the optimizer should
|
@@ -793,37 +817,32 @@ class DIIS:
|
|
793
817
|
self.error = []
|
794
818
|
self.P = []
|
795
819
|
|
796
|
-
if drop ==
|
820
|
+
if drop == "error":
|
797
821
|
self.drop = self.drop_error
|
798
|
-
elif drop ==
|
822
|
+
elif drop == "first":
|
799
823
|
self.drop = self.drop_first
|
800
824
|
else:
|
801
825
|
raise NotImplementedError("Drop type %s not implemented" % drop)
|
802
826
|
|
803
|
-
def reset(self:
|
827
|
+
def reset(self: "DIIS") -> None:
|
804
828
|
"""Reset containers."""
|
805
829
|
self.P = []
|
806
830
|
self.error = []
|
807
831
|
|
808
|
-
def drop_first(
|
809
|
-
|
810
|
-
|
811
|
-
) -> typing.Tuple[typing.List[numpy.ndarray], typing.List[numpy.ndarray]]:
|
832
|
+
def drop_first(
|
833
|
+
self: "DIIS", p: typing.Sequence[numpy.ndarray], e: typing.Sequence[numpy.ndarray]
|
834
|
+
) -> typing.Tuple[typing.List[numpy.ndarray], typing.List[numpy.ndarray]]:
|
812
835
|
"""Return P,E with the first element removed."""
|
813
836
|
return p[1:], e[1:]
|
814
837
|
|
815
|
-
def drop_error(
|
816
|
-
|
817
|
-
|
818
|
-
) -> typing.Tuple[typing.List[numpy.ndarray], typing.List[numpy.ndarray]]:
|
838
|
+
def drop_error(
|
839
|
+
self: "DIIS", p: typing.Sequence[numpy.ndarray], e: typing.Sequence[numpy.ndarray]
|
840
|
+
) -> typing.Tuple[typing.List[numpy.ndarray], typing.List[numpy.ndarray]]:
|
819
841
|
"""Return P,E with the largest magnitude error vector removed."""
|
820
842
|
i = numpy.argmax([v.dot(v) for v in e])
|
821
|
-
return p[:i] + p[i+1:], e[:i] + e[i+1:]
|
843
|
+
return p[:i] + p[i + 1 :], e[:i] + e[i + 1 :]
|
822
844
|
|
823
|
-
def push(self:
|
824
|
-
param_vector: numpy.ndarray,
|
825
|
-
error_vector: numpy.ndarray
|
826
|
-
) -> None:
|
845
|
+
def push(self: "DIIS", param_vector: numpy.ndarray, error_vector: numpy.ndarray) -> None:
|
827
846
|
"""Update DIIS calculator with parameter and error vectors."""
|
828
847
|
if len(self.error) == self.ndiis:
|
829
848
|
self.drop(self.P, self.error)
|
@@ -831,7 +850,7 @@ class DIIS:
|
|
831
850
|
self.error += [error_vector]
|
832
851
|
self.P += [param_vector]
|
833
852
|
|
834
|
-
def do_diis(self:
|
853
|
+
def do_diis(self: "DIIS") -> bool:
|
835
854
|
"""Return with DIIS should be performed."""
|
836
855
|
if len(self.error) < self.min_vectors:
|
837
856
|
# No point in DIIS with less than 2 vectors!
|
@@ -842,7 +861,7 @@ class DIIS:
|
|
842
861
|
|
843
862
|
return True
|
844
863
|
|
845
|
-
def update(self:
|
864
|
+
def update(self: "DIIS") -> typing.Optional[numpy.ndarray]:
|
846
865
|
"""Get update parameter from DIIS iteration, or None if DIIS is not doable."""
|
847
866
|
# Check if we should do DIIS
|
848
867
|
if not self.do_diis():
|
@@ -850,23 +869,23 @@ class DIIS:
|
|
850
869
|
|
851
870
|
# Making the B matrix
|
852
871
|
N = len(self.error)
|
853
|
-
B = numpy.zeros((N+1, N+1))
|
872
|
+
B = numpy.zeros((N + 1, N + 1))
|
854
873
|
for i in range(N):
|
855
|
-
for j in range(i,N):
|
856
|
-
B[i,j] = self.error[i].dot(self.error[j])
|
857
|
-
B[j,i] = B[i,j]
|
874
|
+
for j in range(i, N):
|
875
|
+
B[i, j] = self.error[i].dot(self.error[j])
|
876
|
+
B[j, i] = B[i, j]
|
858
877
|
|
859
|
-
B[N
|
860
|
-
B[:,N] = -1
|
861
|
-
B[N,N] = 0
|
878
|
+
B[N, :] = -1
|
879
|
+
B[:, N] = -1
|
880
|
+
B[N, N] = 0
|
862
881
|
|
863
882
|
# Making the K vector
|
864
|
-
K = numpy.zeros((N+1,))
|
883
|
+
K = numpy.zeros((N + 1,))
|
865
884
|
K[-1] = -1.0
|
866
885
|
|
867
886
|
# Solve DIIS for great convergence!
|
868
887
|
try:
|
869
|
-
diis_v, res, rank, s = numpy.linalg.lstsq(B,K,rcond=None)
|
888
|
+
diis_v, res, rank, s = numpy.linalg.lstsq(B, K, rcond=None)
|
870
889
|
except numpy.linalg.LinAlgError:
|
871
890
|
self.reset()
|
872
891
|
return None
|
@@ -875,32 +894,33 @@ class DIIS:
|
|
875
894
|
return new
|
876
895
|
|
877
896
|
|
878
|
-
def minimize(
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
897
|
+
def minimize(
|
898
|
+
objective: Objective,
|
899
|
+
lr: typing.Union[float, typing.List[float]] = 0.1,
|
900
|
+
method="sgd",
|
901
|
+
initial_values: typing.Dict[typing.Hashable, numbers.Real] = None,
|
902
|
+
variables: typing.List[typing.Hashable] = None,
|
903
|
+
gradient: str = None,
|
904
|
+
samples: int = None,
|
905
|
+
maxiter: int = 100,
|
906
|
+
diis: int = None,
|
907
|
+
backend: str = None,
|
908
|
+
noise: NoiseModel = None,
|
909
|
+
device: str = None,
|
910
|
+
tol: float = None,
|
911
|
+
silent: bool = False,
|
912
|
+
save_history: bool = True,
|
913
|
+
alpha: float = None,
|
914
|
+
gamma: float = None,
|
915
|
+
beta: float = 0.9,
|
916
|
+
rho: float = 0.999,
|
917
|
+
c: typing.Union[float, typing.List[float]] = 0.2,
|
918
|
+
epsilon: float = 1.0 * 10 ** (-7),
|
919
|
+
calibrate_lr: bool = False,
|
920
|
+
*args,
|
921
|
+
**kwargs,
|
922
|
+
) -> GDResults:
|
923
|
+
"""Initialize and call the GD optimizer.
|
904
924
|
Parameters
|
905
925
|
----------
|
906
926
|
objective: Objective :
|
@@ -966,25 +986,32 @@ def minimize(objective: Objective,
|
|
966
986
|
if isinstance(gradient, dict) or hasattr(gradient, "items"):
|
967
987
|
if all([isinstance(x, Objective) for x in gradient.values()]):
|
968
988
|
gradient = format_variable_dictionary(gradient)
|
969
|
-
optimizer = OptimizerGD(
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
989
|
+
optimizer = OptimizerGD(
|
990
|
+
save_history=save_history,
|
991
|
+
method=method,
|
992
|
+
lr=lr,
|
993
|
+
alpha=alpha,
|
994
|
+
gamma=gamma,
|
995
|
+
beta=beta,
|
996
|
+
rho=rho,
|
997
|
+
c=c,
|
998
|
+
tol=tol,
|
999
|
+
diis=diis,
|
1000
|
+
epsilon=epsilon,
|
1001
|
+
samples=samples,
|
1002
|
+
backend=backend,
|
1003
|
+
device=device,
|
1004
|
+
noise=noise,
|
1005
|
+
maxiter=maxiter,
|
1006
|
+
silent=silent,
|
1007
|
+
calibrate_lr=calibrate_lr,
|
1008
|
+
)
|
1009
|
+
return optimizer(
|
1010
|
+
objective=objective,
|
1011
|
+
maxiter=maxiter,
|
1012
|
+
gradient=gradient,
|
1013
|
+
initial_values=initial_values,
|
1014
|
+
variables=variables,
|
1015
|
+
*args,
|
1016
|
+
**kwargs,
|
1017
|
+
)
|