x-evolution 0.1.25__tar.gz → 0.1.26__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {x_evolution-0.1.25 → x_evolution-0.1.26}/PKG-INFO +2 -2
- {x_evolution-0.1.25 → x_evolution-0.1.26}/pyproject.toml +2 -2
- {x_evolution-0.1.25 → x_evolution-0.1.26}/tests/test_evolution.py +21 -0
- x_evolution-0.1.26/train_humanoid.py +215 -0
- x_evolution-0.1.26/train_lunar.py +141 -0
- x_evolution-0.1.26/train_mnist.py +91 -0
- x_evolution-0.1.26/train_xor.py +83 -0
- {x_evolution-0.1.25 → x_evolution-0.1.26}/x_evolution/x_evolution.py +23 -15
- x_evolution-0.1.25/train_humanoid.py +0 -130
- x_evolution-0.1.25/train_lunar.py +0 -107
- x_evolution-0.1.25/train_mnist.py +0 -63
- x_evolution-0.1.25/train_xor.py +0 -57
- {x_evolution-0.1.25 → x_evolution-0.1.26}/.github/workflows/python-publish.yml +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.26}/.github/workflows/test.yml +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.26}/.gitignore +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.26}/LICENSE +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.26}/README.md +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.26}/scripts/install-humanoid.sh +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.26}/x_evolution/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: x-evolution
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.26
|
|
4
4
|
Summary: x-evolution
|
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/x-evolution/
|
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/x-evolution
|
|
@@ -38,7 +38,7 @@ Requires-Dist: accelerate
|
|
|
38
38
|
Requires-Dist: beartype
|
|
39
39
|
Requires-Dist: einops>=0.8.0
|
|
40
40
|
Requires-Dist: torch>=2.4
|
|
41
|
-
Requires-Dist: x-mlps-pytorch>=0.
|
|
41
|
+
Requires-Dist: x-mlps-pytorch>=0.2.0
|
|
42
42
|
Requires-Dist: x-transformers>=2.11.23
|
|
43
43
|
Provides-Extra: examples
|
|
44
44
|
Requires-Dist: gymnasium[box2d]>=1.0.0; extra == 'examples'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "x-evolution"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.26"
|
|
4
4
|
description = "x-evolution"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
|
|
@@ -27,7 +27,7 @@ dependencies = [
|
|
|
27
27
|
"beartype",
|
|
28
28
|
"einops>=0.8.0",
|
|
29
29
|
"torch>=2.4",
|
|
30
|
-
"x-mlps-pytorch>=0.
|
|
30
|
+
"x-mlps-pytorch>=0.2.0",
|
|
31
31
|
"x-transformers>=2.11.23"
|
|
32
32
|
]
|
|
33
33
|
|
|
@@ -47,3 +47,24 @@ def test_evo_strat(
|
|
|
47
47
|
evo_strat('more.evolve', 1)
|
|
48
48
|
|
|
49
49
|
fitnesses = evo_strat('more.evolve', 2, rollback_model_at_end = True)
|
|
50
|
+
|
|
51
|
+
@param('vector_size', (2, 4))
|
|
52
|
+
def test_evo_strat_vectorized(vector_size):
|
|
53
|
+
from x_evolution.x_evolution import EvoStrategy
|
|
54
|
+
|
|
55
|
+
model = MLP(8, 16, 4)
|
|
56
|
+
|
|
57
|
+
def environment(model):
|
|
58
|
+
# mock a vectorized environment returning multiple fitness scores
|
|
59
|
+
return torch.randn(vector_size)
|
|
60
|
+
|
|
61
|
+
evo_strat = EvoStrategy(
|
|
62
|
+
model,
|
|
63
|
+
environment = environment,
|
|
64
|
+
num_generations = 2,
|
|
65
|
+
vectorized = True,
|
|
66
|
+
vector_size = vector_size,
|
|
67
|
+
noise_population_size = 4
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
evo_strat()
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# dependencies = [
|
|
3
|
+
# "fire",
|
|
4
|
+
# "gymnasium[mujoco]>=1.0.0",
|
|
5
|
+
# "gymnasium[other]",
|
|
6
|
+
# "x-evolution>=0.0.20",
|
|
7
|
+
# "x-mlps-pytorch"
|
|
8
|
+
# ]
|
|
9
|
+
# ///
|
|
10
|
+
|
|
11
|
+
# import os
|
|
12
|
+
# os.environ["NCCL_P2P_DISABLE"] = "1"
|
|
13
|
+
# os.environ["NCCL_IB_DISABLE"] = "1"
|
|
14
|
+
# os.environ["MUJOCO_GL"] = "osmesa"
|
|
15
|
+
|
|
16
|
+
import fire
|
|
17
|
+
from shutil import rmtree
|
|
18
|
+
import gymnasium as gym
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
import torch
|
|
22
|
+
from torch.nn import Module, GRU, Linear
|
|
23
|
+
import torch.nn.functional as F
|
|
24
|
+
|
|
25
|
+
# functions
|
|
26
|
+
|
|
27
|
+
def exists(v):
|
|
28
|
+
return v is not None
|
|
29
|
+
|
|
30
|
+
def softclamp(t, value):
|
|
31
|
+
return (t / value).tanh() * value
|
|
32
|
+
|
|
33
|
+
class HumanoidEnvironment(Module):
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
video_folder = './recordings_humanoid',
|
|
37
|
+
render_every_eps = 100,
|
|
38
|
+
max_steps = 1000,
|
|
39
|
+
repeats = 1,
|
|
40
|
+
vectorized = False,
|
|
41
|
+
num_envs = 1
|
|
42
|
+
):
|
|
43
|
+
super().__init__()
|
|
44
|
+
|
|
45
|
+
self.vectorized = vectorized
|
|
46
|
+
self.num_envs = num_envs
|
|
47
|
+
|
|
48
|
+
if vectorized:
|
|
49
|
+
env = gym.make_vec('Humanoid-v5', num_envs = num_envs, render_mode = 'rgb_array')
|
|
50
|
+
else:
|
|
51
|
+
env = gym.make('Humanoid-v5', render_mode = 'rgb_array')
|
|
52
|
+
|
|
53
|
+
self.env = env
|
|
54
|
+
self.max_steps = max_steps
|
|
55
|
+
self.repeats = repeats
|
|
56
|
+
self.video_folder = video_folder
|
|
57
|
+
self.render_every_eps = render_every_eps
|
|
58
|
+
|
|
59
|
+
def pre_main_callback(self):
|
|
60
|
+
# the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
|
|
61
|
+
|
|
62
|
+
rmtree(self.video_folder, ignore_errors = True)
|
|
63
|
+
|
|
64
|
+
if not self.vectorized:
|
|
65
|
+
self.env = gym.wrappers.RecordVideo(
|
|
66
|
+
env = self.env,
|
|
67
|
+
video_folder = self.video_folder,
|
|
68
|
+
name_prefix = 'recording',
|
|
69
|
+
episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
|
|
70
|
+
disable_logger = True
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def forward(self, model):
|
|
74
|
+
|
|
75
|
+
device = next(model.parameters()).device
|
|
76
|
+
|
|
77
|
+
seed = torch.randint(0, int(1e6), ())
|
|
78
|
+
|
|
79
|
+
num_envs = self.num_envs if self.vectorized else 1
|
|
80
|
+
cum_reward = torch.zeros(num_envs, device = device)
|
|
81
|
+
|
|
82
|
+
for _ in range(self.repeats):
|
|
83
|
+
state, _ = self.env.reset(seed = seed.item())
|
|
84
|
+
|
|
85
|
+
step = 0
|
|
86
|
+
hiddens = None
|
|
87
|
+
last_action = None
|
|
88
|
+
|
|
89
|
+
dones = torch.zeros(num_envs, device = device, dtype = torch.bool)
|
|
90
|
+
|
|
91
|
+
while step < self.max_steps and not dones.all():
|
|
92
|
+
|
|
93
|
+
state_torch = torch.from_numpy(state).float().to(device)
|
|
94
|
+
|
|
95
|
+
action_logits, hiddens = model(state_torch, hiddens)
|
|
96
|
+
|
|
97
|
+
mean, log_var = action_logits.chunk(2, dim = -1)
|
|
98
|
+
|
|
99
|
+
# sample and then bound and scale to -0.4 to 0.4
|
|
100
|
+
|
|
101
|
+
std = (0.5 * softclamp(log_var, 5.)).exp()
|
|
102
|
+
sampled = mean + torch.randn_like(mean) * std
|
|
103
|
+
action = sampled.tanh() * 0.4
|
|
104
|
+
|
|
105
|
+
next_state, reward, truncated, terminated, info = self.env.step(action.detach().cpu().numpy() if self.vectorized else action.item())
|
|
106
|
+
|
|
107
|
+
reward_np = np.array(reward) if not isinstance(reward, np.ndarray) else reward
|
|
108
|
+
total_reward_base = torch.from_numpy(reward_np).float().to(device)
|
|
109
|
+
|
|
110
|
+
# reward functions
|
|
111
|
+
|
|
112
|
+
# encouraged to move forward (1.0) and stay upright (> 1.2 meters)
|
|
113
|
+
|
|
114
|
+
z_pos = torch.from_numpy(next_state[..., 0]).float().to(device)
|
|
115
|
+
x_vel = torch.from_numpy(next_state[..., 5]).float().to(device)
|
|
116
|
+
|
|
117
|
+
reward_forward = x_vel
|
|
118
|
+
reward_upright = (z_pos > 1.2).float()
|
|
119
|
+
|
|
120
|
+
exploration_bonus = std.mean(dim = -1) * 0.05
|
|
121
|
+
penalize_extreme_actions = (mean.abs() > 1.).float().mean(dim = -1) * 0.05
|
|
122
|
+
|
|
123
|
+
penalize_action_change = 0.
|
|
124
|
+
if exists(last_action):
|
|
125
|
+
penalize_action_change = (last_action - action).abs().mean(dim = -1) * 0.1
|
|
126
|
+
|
|
127
|
+
total_reward = total_reward_base + reward_forward + reward_upright + exploration_bonus - penalize_extreme_actions - penalize_action_change
|
|
128
|
+
|
|
129
|
+
# only add reward if not done
|
|
130
|
+
|
|
131
|
+
mask = (~dones).float()
|
|
132
|
+
cum_reward += total_reward * mask
|
|
133
|
+
|
|
134
|
+
# update dones
|
|
135
|
+
|
|
136
|
+
dones_np = np.array(truncated | terminated) if not isinstance(truncated | terminated, np.ndarray) else (truncated | terminated)
|
|
137
|
+
dones |= torch.from_numpy(dones_np).to(device)
|
|
138
|
+
|
|
139
|
+
step += 1
|
|
140
|
+
|
|
141
|
+
state = next_state
|
|
142
|
+
last_action = action
|
|
143
|
+
|
|
144
|
+
if not self.vectorized:
|
|
145
|
+
return cum_reward.item() / self.repeats
|
|
146
|
+
|
|
147
|
+
return cum_reward / self.repeats
|
|
148
|
+
|
|
149
|
+
# evo strategy
|
|
150
|
+
|
|
151
|
+
from x_evolution import EvoStrategy
|
|
152
|
+
|
|
153
|
+
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
154
|
+
|
|
155
|
+
class Model(Module):
|
|
156
|
+
|
|
157
|
+
def __init__(self):
|
|
158
|
+
super().__init__()
|
|
159
|
+
|
|
160
|
+
self.deep_mlp = ResidualNormedMLP(
|
|
161
|
+
dim_in = 348,
|
|
162
|
+
dim = 256,
|
|
163
|
+
depth = 8,
|
|
164
|
+
residual_every = 2
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
self.gru = GRU(256, 256, batch_first = True)
|
|
168
|
+
|
|
169
|
+
self.to_pred = Linear(256, 17 * 2, bias = False)
|
|
170
|
+
|
|
171
|
+
def forward(self, state, hiddens = None):
|
|
172
|
+
|
|
173
|
+
x = self.deep_mlp(state)
|
|
174
|
+
|
|
175
|
+
x = x.unsqueeze(-2)
|
|
176
|
+
gru_out, hiddens = self.gru(x, hiddens)
|
|
177
|
+
x = x + gru_out
|
|
178
|
+
x = x.squeeze(-2)
|
|
179
|
+
|
|
180
|
+
return self.to_pred(x), hiddens
|
|
181
|
+
|
|
182
|
+
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
183
|
+
|
|
184
|
+
def main(
|
|
185
|
+
vectorized = False,
|
|
186
|
+
num_envs = 8
|
|
187
|
+
):
|
|
188
|
+
evo_strat = EvoStrategy(
|
|
189
|
+
Model(),
|
|
190
|
+
environment = HumanoidEnvironment(
|
|
191
|
+
repeats = 1,
|
|
192
|
+
render_every_eps = 200,
|
|
193
|
+
vectorized = vectorized,
|
|
194
|
+
num_envs = num_envs
|
|
195
|
+
),
|
|
196
|
+
vectorized = vectorized,
|
|
197
|
+
vector_size = num_envs,
|
|
198
|
+
num_generations = 50_000,
|
|
199
|
+
noise_population_size = 200,
|
|
200
|
+
noise_low_rank = 1,
|
|
201
|
+
noise_scale = 1e-2,
|
|
202
|
+
noise_scale_clamp_range = (5e-3, 2e-2),
|
|
203
|
+
learned_noise_scale = True,
|
|
204
|
+
use_sigma_optimizer = True,
|
|
205
|
+
learning_rate = 1e-3,
|
|
206
|
+
noise_scale_learning_rate = 1e-4,
|
|
207
|
+
use_scheduler = True,
|
|
208
|
+
scheduler_klass = CosineAnnealingLR,
|
|
209
|
+
scheduler_kwargs = dict(T_max = 50_000)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
evo_strat()
|
|
213
|
+
|
|
214
|
+
if __name__ == '__main__':
|
|
215
|
+
fire.Fire(main)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# dependencies = [
|
|
3
|
+
# "fire",
|
|
4
|
+
# "gymnasium[box2d]>=1.0.0",
|
|
5
|
+
# "gymnasium[other]",
|
|
6
|
+
# "x-evolution>=0.0.20",
|
|
7
|
+
# "x-mlps-pytorch>=0.2.0"
|
|
8
|
+
# ]
|
|
9
|
+
# ///
|
|
10
|
+
|
|
11
|
+
import fire
|
|
12
|
+
from shutil import rmtree
|
|
13
|
+
import gymnasium as gym
|
|
14
|
+
import numpy as np
|
|
15
|
+
|
|
16
|
+
import torch
|
|
17
|
+
from torch.nn import Module
|
|
18
|
+
import torch.nn.functional as F
|
|
19
|
+
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
20
|
+
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
21
|
+
|
|
22
|
+
class LunarEnvironment(Module):
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
video_folder = './recordings',
|
|
26
|
+
render_every_eps = 500,
|
|
27
|
+
max_steps = 500,
|
|
28
|
+
repeats = 1,
|
|
29
|
+
vectorized = False,
|
|
30
|
+
num_envs = 1
|
|
31
|
+
):
|
|
32
|
+
super().__init__()
|
|
33
|
+
|
|
34
|
+
self.vectorized = vectorized
|
|
35
|
+
self.num_envs = num_envs
|
|
36
|
+
|
|
37
|
+
if vectorized:
|
|
38
|
+
env = gym.make_vec('LunarLander-v3', num_envs = num_envs, render_mode = 'rgb_array')
|
|
39
|
+
else:
|
|
40
|
+
env = gym.make('LunarLander-v3', render_mode = 'rgb_array')
|
|
41
|
+
|
|
42
|
+
self.env = env
|
|
43
|
+
self.max_steps = max_steps
|
|
44
|
+
self.repeats = repeats
|
|
45
|
+
self.video_folder = video_folder
|
|
46
|
+
self.render_every_eps = render_every_eps
|
|
47
|
+
|
|
48
|
+
def pre_main_callback(self):
|
|
49
|
+
# the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
|
|
50
|
+
|
|
51
|
+
rmtree(self.video_folder, ignore_errors = True)
|
|
52
|
+
|
|
53
|
+
if not self.vectorized:
|
|
54
|
+
self.env = gym.wrappers.RecordVideo(
|
|
55
|
+
env = self.env,
|
|
56
|
+
video_folder = self.video_folder,
|
|
57
|
+
name_prefix = 'recording',
|
|
58
|
+
episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
|
|
59
|
+
disable_logger = True
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def forward(self, model):
|
|
63
|
+
|
|
64
|
+
device = next(model.parameters()).device
|
|
65
|
+
|
|
66
|
+
seed = torch.randint(0, int(1e6), ())
|
|
67
|
+
|
|
68
|
+
num_envs = self.num_envs if self.vectorized else 1
|
|
69
|
+
cum_reward = torch.zeros(num_envs, device = device)
|
|
70
|
+
|
|
71
|
+
for _ in range(self.repeats):
|
|
72
|
+
state, _ = self.env.reset(seed = seed.item())
|
|
73
|
+
|
|
74
|
+
step = 0
|
|
75
|
+
dones = torch.zeros(num_envs, device = device, dtype = torch.bool)
|
|
76
|
+
|
|
77
|
+
while step < self.max_steps and not dones.all():
|
|
78
|
+
|
|
79
|
+
state_torch = torch.from_numpy(state).to(device)
|
|
80
|
+
|
|
81
|
+
action_logits = model(state_torch)
|
|
82
|
+
|
|
83
|
+
action = F.gumbel_softmax(action_logits, hard = True).argmax(dim = -1)
|
|
84
|
+
|
|
85
|
+
next_state, reward, truncated, terminated, *_ = self.env.step(action.detach().cpu().numpy() if self.vectorized else action.item())
|
|
86
|
+
|
|
87
|
+
reward_np = np.array(reward) if not isinstance(reward, np.ndarray) else reward
|
|
88
|
+
total_reward = torch.from_numpy(reward_np).float().to(device)
|
|
89
|
+
|
|
90
|
+
mask = (~dones).float()
|
|
91
|
+
cum_reward += total_reward * mask
|
|
92
|
+
|
|
93
|
+
dones_np = np.array(truncated | terminated) if not isinstance(truncated | terminated, np.ndarray) else (truncated | terminated)
|
|
94
|
+
dones |= torch.from_numpy(dones_np).to(device)
|
|
95
|
+
|
|
96
|
+
step += 1
|
|
97
|
+
|
|
98
|
+
state = next_state
|
|
99
|
+
|
|
100
|
+
if not self.vectorized:
|
|
101
|
+
return cum_reward.item() / self.repeats
|
|
102
|
+
|
|
103
|
+
return cum_reward / self.repeats
|
|
104
|
+
|
|
105
|
+
# evo strategy
|
|
106
|
+
|
|
107
|
+
from x_evolution import EvoStrategy
|
|
108
|
+
|
|
109
|
+
def main(
|
|
110
|
+
vectorized = False,
|
|
111
|
+
num_envs = 8
|
|
112
|
+
):
|
|
113
|
+
actor = ResidualNormedMLP(dim_in = 8, dim = 24, depth = 2, residual_every = 1, dim_out = 4)
|
|
114
|
+
|
|
115
|
+
evo_strat = EvoStrategy(
|
|
116
|
+
actor,
|
|
117
|
+
environment = LunarEnvironment(
|
|
118
|
+
repeats = 2,
|
|
119
|
+
vectorized = vectorized,
|
|
120
|
+
num_envs = num_envs
|
|
121
|
+
),
|
|
122
|
+
vectorized = vectorized,
|
|
123
|
+
vector_size = num_envs,
|
|
124
|
+
num_generations = 50_000,
|
|
125
|
+
noise_population_size = 50,
|
|
126
|
+
noise_low_rank = 1,
|
|
127
|
+
noise_scale = 1e-2,
|
|
128
|
+
noise_scale_clamp_range = (5e-3, 2e-2),
|
|
129
|
+
learned_noise_scale = True,
|
|
130
|
+
use_sigma_optimizer = True,
|
|
131
|
+
learning_rate = 1e-3,
|
|
132
|
+
noise_scale_learning_rate = 1e-4,
|
|
133
|
+
use_scheduler = True,
|
|
134
|
+
scheduler_klass = CosineAnnealingLR,
|
|
135
|
+
scheduler_kwargs = dict(T_max = 50_000)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
evo_strat()
|
|
139
|
+
|
|
140
|
+
if __name__ == '__main__':
|
|
141
|
+
fire.Fire(main)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# dependencies = [
|
|
3
|
+
# "fire",
|
|
4
|
+
# "torchvision",
|
|
5
|
+
# "x-mlps-pytorch>=0.2.0",
|
|
6
|
+
# "x-evolution>=0.0.20"
|
|
7
|
+
# ]
|
|
8
|
+
# ///
|
|
9
|
+
|
|
10
|
+
import fire
|
|
11
|
+
import torch
|
|
12
|
+
from torch import nn
|
|
13
|
+
import torch.nn.functional as F
|
|
14
|
+
from torchvision import datasets, transforms
|
|
15
|
+
from torch.utils.data import DataLoader
|
|
16
|
+
|
|
17
|
+
# model
|
|
18
|
+
|
|
19
|
+
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
20
|
+
|
|
21
|
+
model = nn.Sequential(
|
|
22
|
+
nn.Flatten(),
|
|
23
|
+
ResidualNormedMLP(dim_in = 784, dim = 512, depth = 8, residual_every = 2, dim_out = 10)
|
|
24
|
+
).half()
|
|
25
|
+
|
|
26
|
+
batch_size = 256
|
|
27
|
+
|
|
28
|
+
# data
|
|
29
|
+
|
|
30
|
+
dataset = datasets.MNIST('./data', train = True, download = True, transform = transforms.ToTensor())
|
|
31
|
+
|
|
32
|
+
# fitness as inverse of loss
|
|
33
|
+
|
|
34
|
+
def mnist_environment(
|
|
35
|
+
model,
|
|
36
|
+
num_envs = 1,
|
|
37
|
+
vectorized = False,
|
|
38
|
+
batch_size = 256
|
|
39
|
+
):
|
|
40
|
+
device = next(model.parameters()).device
|
|
41
|
+
|
|
42
|
+
iters = num_envs if vectorized else 1
|
|
43
|
+
|
|
44
|
+
losses = []
|
|
45
|
+
|
|
46
|
+
for _ in range(iters):
|
|
47
|
+
dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True)
|
|
48
|
+
data_iterator = iter(dataloader)
|
|
49
|
+
data, target = next(data_iterator)
|
|
50
|
+
|
|
51
|
+
data, target = data.to(device), target.to(device)
|
|
52
|
+
|
|
53
|
+
with torch.no_grad():
|
|
54
|
+
logits = model(data.half())
|
|
55
|
+
loss = F.cross_entropy(logits, target)
|
|
56
|
+
|
|
57
|
+
losses.append(-loss)
|
|
58
|
+
|
|
59
|
+
if not vectorized:
|
|
60
|
+
return losses[0]
|
|
61
|
+
|
|
62
|
+
return torch.stack(losses)
|
|
63
|
+
|
|
64
|
+
# evo
|
|
65
|
+
|
|
66
|
+
from x_evolution import EvoStrategy
|
|
67
|
+
|
|
68
|
+
def main(
|
|
69
|
+
vectorized = False,
|
|
70
|
+
num_envs = 8,
|
|
71
|
+
batch_size = 256
|
|
72
|
+
):
|
|
73
|
+
evo_strat = EvoStrategy(
|
|
74
|
+
model,
|
|
75
|
+
environment = lambda model: mnist_environment(model, num_envs = num_envs, vectorized = vectorized, batch_size = batch_size),
|
|
76
|
+
vectorized = vectorized,
|
|
77
|
+
vector_size = num_envs,
|
|
78
|
+
noise_population_size = 100,
|
|
79
|
+
noise_scale = 1e-2,
|
|
80
|
+
noise_scale_clamp_range = (8e-3, 2e-2),
|
|
81
|
+
noise_low_rank = 1,
|
|
82
|
+
num_generations = 10_000,
|
|
83
|
+
learning_rate = 1e-3,
|
|
84
|
+
learned_noise_scale = True,
|
|
85
|
+
noise_scale_learning_rate = 2e-5
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
evo_strat()
|
|
89
|
+
|
|
90
|
+
if __name__ == '__main__':
|
|
91
|
+
fire.Fire(main)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import fire
|
|
2
|
+
import torch
|
|
3
|
+
from torch import nn
|
|
4
|
+
import torch.nn.functional as F
|
|
5
|
+
from torch.optim.lr_scheduler import LambdaLR
|
|
6
|
+
|
|
7
|
+
# model
|
|
8
|
+
|
|
9
|
+
from torch import nn
|
|
10
|
+
|
|
11
|
+
model = nn.Sequential(
|
|
12
|
+
nn.Linear(2, 16),
|
|
13
|
+
nn.ReLU(),
|
|
14
|
+
nn.Linear(16, 2)
|
|
15
|
+
).half()
|
|
16
|
+
|
|
17
|
+
batch_size = 128
|
|
18
|
+
|
|
19
|
+
# fitness as inverse of loss
|
|
20
|
+
|
|
21
|
+
from x_evolution import EvoStrategy
|
|
22
|
+
|
|
23
|
+
def xor_environment(
|
|
24
|
+
model,
|
|
25
|
+
num_envs = 1,
|
|
26
|
+
vectorized = False,
|
|
27
|
+
batch_size = 128
|
|
28
|
+
):
|
|
29
|
+
device = next(model.parameters()).device
|
|
30
|
+
|
|
31
|
+
iters = num_envs if vectorized else 1
|
|
32
|
+
|
|
33
|
+
losses = []
|
|
34
|
+
|
|
35
|
+
for _ in range(iters):
|
|
36
|
+
data = torch.randint(0, 2, (batch_size, 2))
|
|
37
|
+
labels = data[:, 0] ^ data[:, 1]
|
|
38
|
+
|
|
39
|
+
data, labels = tuple(t.to(device) for t in (data, labels))
|
|
40
|
+
|
|
41
|
+
with torch.no_grad():
|
|
42
|
+
logits = model(data.half())
|
|
43
|
+
loss = F.cross_entropy(logits, labels)
|
|
44
|
+
|
|
45
|
+
losses.append(-loss)
|
|
46
|
+
|
|
47
|
+
if not vectorized:
|
|
48
|
+
return losses[0]
|
|
49
|
+
|
|
50
|
+
return torch.stack(losses)
|
|
51
|
+
|
|
52
|
+
# evo
|
|
53
|
+
|
|
54
|
+
def main(
|
|
55
|
+
vectorized = False,
|
|
56
|
+
num_envs = 8,
|
|
57
|
+
batch_size = 128
|
|
58
|
+
):
|
|
59
|
+
evo_strat = EvoStrategy(
|
|
60
|
+
model,
|
|
61
|
+
environment = lambda model: xor_environment(model, num_envs = num_envs, vectorized = vectorized, batch_size = batch_size),
|
|
62
|
+
vectorized = vectorized,
|
|
63
|
+
vector_size = num_envs,
|
|
64
|
+
noise_population_size = 100,
|
|
65
|
+
noise_low_rank = 1,
|
|
66
|
+
num_generations = 100_000,
|
|
67
|
+
learning_rate = 1e-1,
|
|
68
|
+
noise_scale = 1e-1,
|
|
69
|
+
noise_scale_clamp_range = (0.05, 0.2),
|
|
70
|
+
learned_noise_scale = True,
|
|
71
|
+
noise_scale_learning_rate = 5e-4,
|
|
72
|
+
use_scheduler = True,
|
|
73
|
+
scheduler_klass = LambdaLR,
|
|
74
|
+
scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.)),
|
|
75
|
+
use_sigma_scheduler = True,
|
|
76
|
+
sigma_scheduler_klass = LambdaLR,
|
|
77
|
+
sigma_scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.))
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
evo_strat()
|
|
81
|
+
|
|
82
|
+
if __name__ == '__main__':
|
|
83
|
+
fire.Fire(main)
|
|
@@ -93,11 +93,16 @@ class EvoStrategy(Module):
|
|
|
93
93
|
verbose = True,
|
|
94
94
|
accelerator: Accelerator | None = None,
|
|
95
95
|
accelerate_kwargs: dict = dict(),
|
|
96
|
-
reject_generation_fitnesses_if: Callable[[Tensor], bool] | None = None
|
|
96
|
+
reject_generation_fitnesses_if: Callable[[Tensor], bool] | None = None,
|
|
97
|
+
vectorized = False,
|
|
98
|
+
vector_size: int | None = None
|
|
97
99
|
):
|
|
98
100
|
super().__init__()
|
|
99
101
|
self.verbose = verbose
|
|
100
102
|
|
|
103
|
+
self.vectorized = vectorized
|
|
104
|
+
self.vector_size = vector_size
|
|
105
|
+
|
|
101
106
|
if not exists(accelerator):
|
|
102
107
|
accelerator = Accelerator(cpu = cpu, **accelerate_kwargs)
|
|
103
108
|
|
|
@@ -475,24 +480,28 @@ class EvoStrategy(Module):
|
|
|
475
480
|
fitnesses.append([0., 0.] if self.mirror_sampling else 0.)
|
|
476
481
|
continue
|
|
477
482
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
# determine noise scale, which can be fixed or learned
|
|
483
|
+
def get_fitness(negate = False):
|
|
484
|
+
individual_param_seeds = with_seed(individual_seed.item())(randint)(0, MAX_SEED_VALUE, (self.num_params,))
|
|
485
|
+
noise_config = dict(zip(self.param_names_to_optimize, individual_param_seeds.tolist()))
|
|
483
486
|
|
|
484
|
-
|
|
487
|
+
noise_config_with_scale = dict()
|
|
488
|
+
for param_name, seed in noise_config.items():
|
|
489
|
+
noise_scale = self._get_noise_scale(param_name)
|
|
490
|
+
noise_config_with_scale[param_name] = (seed, noise_scale)
|
|
485
491
|
|
|
486
|
-
|
|
492
|
+
with model.temp_add_noise_(noise_config_with_scale, negate = negate):
|
|
493
|
+
fitness = with_seed(maybe_rollout_seed)(self.environment)(model)
|
|
487
494
|
|
|
488
|
-
|
|
495
|
+
if isinstance(fitness, Tensor) and fitness.numel() > 1:
|
|
496
|
+
fitness = fitness.mean().item()
|
|
497
|
+
elif isinstance(fitness, Tensor):
|
|
498
|
+
fitness = fitness.item()
|
|
489
499
|
|
|
490
|
-
|
|
500
|
+
return fitness
|
|
491
501
|
|
|
492
|
-
#
|
|
502
|
+
# evaluate
|
|
493
503
|
|
|
494
|
-
|
|
495
|
-
fitness = with_seed(maybe_rollout_seed)(rollout_for_fitness)()
|
|
504
|
+
fitness = get_fitness(negate = False)
|
|
496
505
|
|
|
497
506
|
if not self.mirror_sampling:
|
|
498
507
|
fitnesses.append(fitness)
|
|
@@ -500,8 +509,7 @@ class EvoStrategy(Module):
|
|
|
500
509
|
|
|
501
510
|
# handle mirror sampling
|
|
502
511
|
|
|
503
|
-
|
|
504
|
-
fitness_mirrored = with_seed(maybe_rollout_seed)(rollout_for_fitness)()
|
|
512
|
+
fitness_mirrored = get_fitness(negate = True)
|
|
505
513
|
|
|
506
514
|
fitnesses.append([fitness, fitness_mirrored])
|
|
507
515
|
|
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
# /// script
|
|
2
|
-
# dependencies = [
|
|
3
|
-
# "gymnasium[mujoco]>=1.0.0",
|
|
4
|
-
# "gymnasium[other]",
|
|
5
|
-
# "x-evolution>=0.0.20",
|
|
6
|
-
# "x-mlps-pytorch"
|
|
7
|
-
# ]
|
|
8
|
-
# ///
|
|
9
|
-
|
|
10
|
-
# import os
|
|
11
|
-
# os.environ["NCCL_P2P_DISABLE"] = "1"
|
|
12
|
-
# os.environ["NCCL_IB_DISABLE"] = "1"
|
|
13
|
-
# os.environ["MUJOCO_GL"] = "osmesa"
|
|
14
|
-
|
|
15
|
-
from shutil import rmtree
|
|
16
|
-
import gymnasium as gym
|
|
17
|
-
import numpy as np
|
|
18
|
-
|
|
19
|
-
import torch
|
|
20
|
-
from torch.nn import Module
|
|
21
|
-
import torch.nn.functional as F
|
|
22
|
-
|
|
23
|
-
def softclamp(t, value):
|
|
24
|
-
return (t / value).tanh() * value
|
|
25
|
-
|
|
26
|
-
class HumanoidEnvironment(Module):
|
|
27
|
-
def __init__(
|
|
28
|
-
self,
|
|
29
|
-
video_folder = './recordings_humanoid',
|
|
30
|
-
render_every_eps = 100,
|
|
31
|
-
max_steps = 1000,
|
|
32
|
-
repeats = 1
|
|
33
|
-
):
|
|
34
|
-
super().__init__()
|
|
35
|
-
|
|
36
|
-
# Humanoid-v5
|
|
37
|
-
env = gym.make('Humanoid-v5', render_mode = 'rgb_array')
|
|
38
|
-
|
|
39
|
-
self.env = env
|
|
40
|
-
self.max_steps = max_steps
|
|
41
|
-
self.repeats = repeats
|
|
42
|
-
self.video_folder = video_folder
|
|
43
|
-
self.render_every_eps = render_every_eps
|
|
44
|
-
|
|
45
|
-
def pre_main_callback(self):
|
|
46
|
-
# the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
|
|
47
|
-
|
|
48
|
-
rmtree(self.video_folder, ignore_errors = True)
|
|
49
|
-
|
|
50
|
-
self.env = gym.wrappers.RecordVideo(
|
|
51
|
-
env = self.env,
|
|
52
|
-
video_folder = self.video_folder,
|
|
53
|
-
name_prefix = 'recording',
|
|
54
|
-
episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
|
|
55
|
-
disable_logger = True
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
def forward(self, model):
|
|
59
|
-
|
|
60
|
-
device = next(model.parameters()).device
|
|
61
|
-
|
|
62
|
-
seed = torch.randint(0, int(1e6), ())
|
|
63
|
-
|
|
64
|
-
cum_reward = 0.
|
|
65
|
-
|
|
66
|
-
for _ in range(self.repeats):
|
|
67
|
-
state, _ = self.env.reset(seed = seed.item())
|
|
68
|
-
|
|
69
|
-
step = 0
|
|
70
|
-
|
|
71
|
-
while step < self.max_steps:
|
|
72
|
-
|
|
73
|
-
state = torch.from_numpy(state).float().to(device)
|
|
74
|
-
|
|
75
|
-
action_logits = model(state)
|
|
76
|
-
|
|
77
|
-
mean, log_var = action_logits.chunk(2, dim = -1)
|
|
78
|
-
|
|
79
|
-
# sample and then bound and scale to -0.4 to 0.4
|
|
80
|
-
|
|
81
|
-
std = (0.5 * softclamp(log_var, 5.)).exp()
|
|
82
|
-
sampled = mean + torch.randn_like(mean) * std
|
|
83
|
-
action = sampled.tanh() * 0.4
|
|
84
|
-
|
|
85
|
-
next_state, reward, truncated, terminated, *_ = self.env.step(action.detach().cpu().numpy())
|
|
86
|
-
|
|
87
|
-
cum_reward += float(reward)
|
|
88
|
-
step += 1
|
|
89
|
-
|
|
90
|
-
state = next_state
|
|
91
|
-
|
|
92
|
-
if truncated or terminated:
|
|
93
|
-
break
|
|
94
|
-
|
|
95
|
-
return cum_reward / self.repeats
|
|
96
|
-
|
|
97
|
-
# evo strategy
|
|
98
|
-
|
|
99
|
-
from x_evolution import EvoStrategy
|
|
100
|
-
|
|
101
|
-
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
102
|
-
|
|
103
|
-
actor = ResidualNormedMLP(
|
|
104
|
-
dim_in = 348, # state
|
|
105
|
-
dim = 256,
|
|
106
|
-
depth = 8,
|
|
107
|
-
residual_every = 2,
|
|
108
|
-
dim_out = 17 * 2 # action mean logvar
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
112
|
-
|
|
113
|
-
evo_strat = EvoStrategy(
|
|
114
|
-
actor,
|
|
115
|
-
environment = HumanoidEnvironment(repeats = 2),
|
|
116
|
-
num_generations = 50_000,
|
|
117
|
-
noise_population_size = 200,
|
|
118
|
-
noise_low_rank = 1,
|
|
119
|
-
noise_scale = 1e-2,
|
|
120
|
-
noise_scale_clamp_range = (5e-3, 2e-2),
|
|
121
|
-
learned_noise_scale = True,
|
|
122
|
-
use_sigma_optimizer = True,
|
|
123
|
-
learning_rate = 1e-3,
|
|
124
|
-
noise_scale_learning_rate = 1e-4,
|
|
125
|
-
use_scheduler = True,
|
|
126
|
-
scheduler_klass = CosineAnnealingLR,
|
|
127
|
-
scheduler_kwargs = dict(T_max = 50_000)
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
evo_strat()
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
# /// script
|
|
2
|
-
# dependencies = [
|
|
3
|
-
# "gymnasium[box2d]>=1.0.0",
|
|
4
|
-
# "gymnasium[other]",
|
|
5
|
-
# "x-evolution>=0.0.20"
|
|
6
|
-
# ]
|
|
7
|
-
# ///
|
|
8
|
-
|
|
9
|
-
from shutil import rmtree
|
|
10
|
-
import gymnasium as gym
|
|
11
|
-
|
|
12
|
-
import torch
|
|
13
|
-
from torch.nn import Module
|
|
14
|
-
import torch.nn.functional as F
|
|
15
|
-
|
|
16
|
-
class LunarEnvironment(Module):
|
|
17
|
-
def __init__(
|
|
18
|
-
self,
|
|
19
|
-
video_folder = './recordings',
|
|
20
|
-
render_every_eps = 500,
|
|
21
|
-
max_steps = 500,
|
|
22
|
-
repeats = 1
|
|
23
|
-
):
|
|
24
|
-
super().__init__()
|
|
25
|
-
|
|
26
|
-
env = gym.make('LunarLander-v3', render_mode = 'rgb_array')
|
|
27
|
-
|
|
28
|
-
self.env = env
|
|
29
|
-
self.max_steps = max_steps
|
|
30
|
-
self.repeats = repeats
|
|
31
|
-
self.video_folder = video_folder
|
|
32
|
-
self.render_every_eps = render_every_eps
|
|
33
|
-
|
|
34
|
-
def pre_main_callback(self):
|
|
35
|
-
# the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
|
|
36
|
-
|
|
37
|
-
rmtree(self.video_folder, ignore_errors = True)
|
|
38
|
-
|
|
39
|
-
self.env = gym.wrappers.RecordVideo(
|
|
40
|
-
env = self.env,
|
|
41
|
-
video_folder = self.video_folder,
|
|
42
|
-
name_prefix = 'recording',
|
|
43
|
-
episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
|
|
44
|
-
disable_logger = True
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
def forward(self, model):
|
|
48
|
-
|
|
49
|
-
device = next(model.parameters()).device
|
|
50
|
-
|
|
51
|
-
seed = torch.randint(0, int(1e6), ())
|
|
52
|
-
|
|
53
|
-
cum_reward = 0.
|
|
54
|
-
|
|
55
|
-
for _ in range(self.repeats):
|
|
56
|
-
state, _ = self.env.reset(seed = seed.item())
|
|
57
|
-
|
|
58
|
-
step = 0
|
|
59
|
-
|
|
60
|
-
while step < self.max_steps:
|
|
61
|
-
|
|
62
|
-
state = torch.from_numpy(state).to(device)
|
|
63
|
-
|
|
64
|
-
action_logits = model(state)
|
|
65
|
-
|
|
66
|
-
action = F.gumbel_softmax(action_logits, hard = True).argmax(dim = -1)
|
|
67
|
-
|
|
68
|
-
next_state, reward, truncated, terminated, *_ = self.env.step(action.item())
|
|
69
|
-
|
|
70
|
-
cum_reward += float(reward)
|
|
71
|
-
step += 1
|
|
72
|
-
|
|
73
|
-
state = next_state
|
|
74
|
-
|
|
75
|
-
if truncated or terminated:
|
|
76
|
-
break
|
|
77
|
-
|
|
78
|
-
return cum_reward / self.repeats
|
|
79
|
-
|
|
80
|
-
# evo strategy
|
|
81
|
-
|
|
82
|
-
from x_evolution import EvoStrategy
|
|
83
|
-
|
|
84
|
-
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
85
|
-
|
|
86
|
-
actor = ResidualNormedMLP(dim_in = 8, dim = 24, depth = 2, residual_every = 1, dim_out = 4)
|
|
87
|
-
|
|
88
|
-
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
89
|
-
|
|
90
|
-
evo_strat = EvoStrategy(
|
|
91
|
-
actor,
|
|
92
|
-
environment = LunarEnvironment(repeats = 2),
|
|
93
|
-
num_generations = 50_000,
|
|
94
|
-
noise_population_size = 50,
|
|
95
|
-
noise_low_rank = 1,
|
|
96
|
-
noise_scale = 1e-2,
|
|
97
|
-
noise_scale_clamp_range = (5e-3, 2e-2),
|
|
98
|
-
learned_noise_scale = True,
|
|
99
|
-
use_sigma_optimizer = True,
|
|
100
|
-
learning_rate = 1e-3,
|
|
101
|
-
noise_scale_learning_rate = 1e-4,
|
|
102
|
-
use_scheduler = True,
|
|
103
|
-
scheduler_klass = CosineAnnealingLR,
|
|
104
|
-
scheduler_kwargs = dict(T_max = 50_000)
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
evo_strat()
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
# /// script
|
|
2
|
-
# dependencies = [
|
|
3
|
-
# "torchvision",
|
|
4
|
-
# "x-evolution>=0.0.20"
|
|
5
|
-
# ]
|
|
6
|
-
# ///
|
|
7
|
-
|
|
8
|
-
import torch
|
|
9
|
-
from torch import tensor, nn
|
|
10
|
-
import torch.nn.functional as F
|
|
11
|
-
from torchvision import datasets, transforms
|
|
12
|
-
from torch.utils.data import DataLoader
|
|
13
|
-
|
|
14
|
-
# model
|
|
15
|
-
|
|
16
|
-
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
17
|
-
|
|
18
|
-
model = nn.Sequential(
|
|
19
|
-
nn.Flatten(),
|
|
20
|
-
ResidualNormedMLP(dim_in = 784, dim = 512, depth = 8, residual_every = 2, dim_out = 10)
|
|
21
|
-
).half()
|
|
22
|
-
|
|
23
|
-
batch_size = 256
|
|
24
|
-
|
|
25
|
-
# data
|
|
26
|
-
|
|
27
|
-
dataset = datasets.MNIST('./data', train = True, download = True, transform = transforms.ToTensor())
|
|
28
|
-
|
|
29
|
-
# fitness as inverse of loss
|
|
30
|
-
|
|
31
|
-
def loss_mnist(model):
|
|
32
|
-
device = next(model.parameters()).device
|
|
33
|
-
|
|
34
|
-
dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True)
|
|
35
|
-
data_iterator = iter(dataloader)
|
|
36
|
-
data, target = next(data_iterator)
|
|
37
|
-
|
|
38
|
-
data, target = data.to(device), target.to(device)
|
|
39
|
-
|
|
40
|
-
with torch.no_grad():
|
|
41
|
-
logits = model(data.half())
|
|
42
|
-
loss = F.cross_entropy(logits, target)
|
|
43
|
-
|
|
44
|
-
return -loss
|
|
45
|
-
|
|
46
|
-
# evo
|
|
47
|
-
|
|
48
|
-
from x_evolution import EvoStrategy
|
|
49
|
-
|
|
50
|
-
evo_strat = EvoStrategy(
|
|
51
|
-
model,
|
|
52
|
-
environment = loss_mnist,
|
|
53
|
-
noise_population_size = 100,
|
|
54
|
-
noise_scale = 1e-2,
|
|
55
|
-
noise_scale_clamp_range = (8e-3, 2e-2),
|
|
56
|
-
noise_low_rank = 1,
|
|
57
|
-
num_generations = 10_000,
|
|
58
|
-
learning_rate = 1e-3,
|
|
59
|
-
learned_noise_scale = True,
|
|
60
|
-
noise_scale_learning_rate = 2e-5
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
evo_strat()
|
x_evolution-0.1.25/train_xor.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
import torch
|
|
2
|
-
from torch import tensor
|
|
3
|
-
import torch.nn.functional as F
|
|
4
|
-
from torch.optim.lr_scheduler import LambdaLR
|
|
5
|
-
|
|
6
|
-
# model
|
|
7
|
-
|
|
8
|
-
from torch import nn
|
|
9
|
-
|
|
10
|
-
model = nn.Sequential(
|
|
11
|
-
nn.Linear(2, 16),
|
|
12
|
-
nn.ReLU(),
|
|
13
|
-
nn.Linear(16, 2)
|
|
14
|
-
).half()
|
|
15
|
-
|
|
16
|
-
batch_size = 128
|
|
17
|
-
|
|
18
|
-
# fitness as inverse of loss
|
|
19
|
-
|
|
20
|
-
from x_evolution import EvoStrategy
|
|
21
|
-
|
|
22
|
-
def loss_xor(model):
|
|
23
|
-
device = next(model.parameters()).device
|
|
24
|
-
|
|
25
|
-
data = torch.randint(0, 2, (batch_size, 2))
|
|
26
|
-
labels = data[:, 0] ^ data[:, 1]
|
|
27
|
-
|
|
28
|
-
data, labels = tuple(t.to(device) for t in (data, labels))
|
|
29
|
-
|
|
30
|
-
with torch.no_grad():
|
|
31
|
-
logits = model(data.half())
|
|
32
|
-
loss = F.cross_entropy(logits, labels)
|
|
33
|
-
|
|
34
|
-
return -loss
|
|
35
|
-
|
|
36
|
-
# evo
|
|
37
|
-
|
|
38
|
-
evo_strat = EvoStrategy(
|
|
39
|
-
model,
|
|
40
|
-
environment = loss_xor,
|
|
41
|
-
noise_population_size = 100,
|
|
42
|
-
noise_low_rank = 1,
|
|
43
|
-
num_generations = 100_000,
|
|
44
|
-
learning_rate = 1e-1,
|
|
45
|
-
noise_scale = 1e-1,
|
|
46
|
-
noise_scale_clamp_range = (5e-2, 2e-1),
|
|
47
|
-
learned_noise_scale = True,
|
|
48
|
-
noise_scale_learning_rate = 5e-4,
|
|
49
|
-
use_scheduler = True,
|
|
50
|
-
scheduler_klass = LambdaLR,
|
|
51
|
-
scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.)),
|
|
52
|
-
use_sigma_scheduler = True,
|
|
53
|
-
sigma_scheduler_klass = LambdaLR,
|
|
54
|
-
sigma_scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.))
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
evo_strat()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|