x-evolution 0.1.25__tar.gz → 0.1.27__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {x_evolution-0.1.25 → x_evolution-0.1.27}/PKG-INFO +2 -2
- {x_evolution-0.1.25 → x_evolution-0.1.27}/pyproject.toml +2 -2
- {x_evolution-0.1.25 → x_evolution-0.1.27}/tests/test_evolution.py +21 -0
- x_evolution-0.1.27/train_humanoid.py +215 -0
- x_evolution-0.1.27/train_lunar.py +141 -0
- x_evolution-0.1.27/train_mnist.py +91 -0
- x_evolution-0.1.27/train_xor.py +83 -0
- {x_evolution-0.1.25 → x_evolution-0.1.27}/x_evolution/x_evolution.py +39 -20
- x_evolution-0.1.25/train_humanoid.py +0 -130
- x_evolution-0.1.25/train_lunar.py +0 -107
- x_evolution-0.1.25/train_mnist.py +0 -63
- x_evolution-0.1.25/train_xor.py +0 -57
- {x_evolution-0.1.25 → x_evolution-0.1.27}/.github/workflows/python-publish.yml +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.27}/.github/workflows/test.yml +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.27}/.gitignore +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.27}/LICENSE +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.27}/README.md +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.27}/scripts/install-humanoid.sh +0 -0
- {x_evolution-0.1.25 → x_evolution-0.1.27}/x_evolution/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: x-evolution
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.27
|
|
4
4
|
Summary: x-evolution
|
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/x-evolution/
|
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/x-evolution
|
|
@@ -38,7 +38,7 @@ Requires-Dist: accelerate
|
|
|
38
38
|
Requires-Dist: beartype
|
|
39
39
|
Requires-Dist: einops>=0.8.0
|
|
40
40
|
Requires-Dist: torch>=2.4
|
|
41
|
-
Requires-Dist: x-mlps-pytorch>=0.
|
|
41
|
+
Requires-Dist: x-mlps-pytorch>=0.2.0
|
|
42
42
|
Requires-Dist: x-transformers>=2.11.23
|
|
43
43
|
Provides-Extra: examples
|
|
44
44
|
Requires-Dist: gymnasium[box2d]>=1.0.0; extra == 'examples'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "x-evolution"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.27"
|
|
4
4
|
description = "x-evolution"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
|
|
@@ -27,7 +27,7 @@ dependencies = [
|
|
|
27
27
|
"beartype",
|
|
28
28
|
"einops>=0.8.0",
|
|
29
29
|
"torch>=2.4",
|
|
30
|
-
"x-mlps-pytorch>=0.
|
|
30
|
+
"x-mlps-pytorch>=0.2.0",
|
|
31
31
|
"x-transformers>=2.11.23"
|
|
32
32
|
]
|
|
33
33
|
|
|
@@ -47,3 +47,24 @@ def test_evo_strat(
|
|
|
47
47
|
evo_strat('more.evolve', 1)
|
|
48
48
|
|
|
49
49
|
fitnesses = evo_strat('more.evolve', 2, rollback_model_at_end = True)
|
|
50
|
+
|
|
51
|
+
@param('vector_size', (2, 4))
|
|
52
|
+
def test_evo_strat_vectorized(vector_size):
|
|
53
|
+
from x_evolution.x_evolution import EvoStrategy
|
|
54
|
+
|
|
55
|
+
model = MLP(8, 16, 4)
|
|
56
|
+
|
|
57
|
+
def environment(model):
|
|
58
|
+
# mock a vectorized environment returning multiple fitness scores
|
|
59
|
+
return torch.randn(vector_size)
|
|
60
|
+
|
|
61
|
+
evo_strat = EvoStrategy(
|
|
62
|
+
model,
|
|
63
|
+
environment = environment,
|
|
64
|
+
num_generations = 2,
|
|
65
|
+
vectorized = True,
|
|
66
|
+
vector_size = vector_size,
|
|
67
|
+
noise_population_size = 4
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
evo_strat()
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# dependencies = [
|
|
3
|
+
# "fire",
|
|
4
|
+
# "gymnasium[mujoco]>=1.0.0",
|
|
5
|
+
# "gymnasium[other]",
|
|
6
|
+
# "x-evolution>=0.0.20",
|
|
7
|
+
# "x-mlps-pytorch"
|
|
8
|
+
# ]
|
|
9
|
+
# ///
|
|
10
|
+
|
|
11
|
+
# import os
|
|
12
|
+
# os.environ["NCCL_P2P_DISABLE"] = "1"
|
|
13
|
+
# os.environ["NCCL_IB_DISABLE"] = "1"
|
|
14
|
+
# os.environ["MUJOCO_GL"] = "osmesa"
|
|
15
|
+
|
|
16
|
+
import fire
|
|
17
|
+
from shutil import rmtree
|
|
18
|
+
import gymnasium as gym
|
|
19
|
+
import numpy as np
|
|
20
|
+
|
|
21
|
+
import torch
|
|
22
|
+
from torch.nn import Module, GRU, Linear
|
|
23
|
+
import torch.nn.functional as F
|
|
24
|
+
|
|
25
|
+
# functions
|
|
26
|
+
|
|
27
|
+
def exists(v):
|
|
28
|
+
return v is not None
|
|
29
|
+
|
|
30
|
+
def softclamp(t, value):
|
|
31
|
+
return (t / value).tanh() * value
|
|
32
|
+
|
|
33
|
+
class HumanoidEnvironment(Module):
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
video_folder = './recordings_humanoid',
|
|
37
|
+
render_every_eps = 100,
|
|
38
|
+
max_steps = 1000,
|
|
39
|
+
repeats = 1,
|
|
40
|
+
vectorized = False,
|
|
41
|
+
num_envs = 1
|
|
42
|
+
):
|
|
43
|
+
super().__init__()
|
|
44
|
+
|
|
45
|
+
self.vectorized = vectorized
|
|
46
|
+
self.num_envs = num_envs
|
|
47
|
+
|
|
48
|
+
if vectorized:
|
|
49
|
+
env = gym.make_vec('Humanoid-v5', num_envs = num_envs, render_mode = 'rgb_array')
|
|
50
|
+
else:
|
|
51
|
+
env = gym.make('Humanoid-v5', render_mode = 'rgb_array')
|
|
52
|
+
|
|
53
|
+
self.env = env
|
|
54
|
+
self.max_steps = max_steps
|
|
55
|
+
self.repeats = repeats
|
|
56
|
+
self.video_folder = video_folder
|
|
57
|
+
self.render_every_eps = render_every_eps
|
|
58
|
+
|
|
59
|
+
def pre_main_callback(self):
|
|
60
|
+
# the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
|
|
61
|
+
|
|
62
|
+
rmtree(self.video_folder, ignore_errors = True)
|
|
63
|
+
|
|
64
|
+
if not self.vectorized:
|
|
65
|
+
self.env = gym.wrappers.RecordVideo(
|
|
66
|
+
env = self.env,
|
|
67
|
+
video_folder = self.video_folder,
|
|
68
|
+
name_prefix = 'recording',
|
|
69
|
+
episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
|
|
70
|
+
disable_logger = True
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def forward(self, model):
|
|
74
|
+
|
|
75
|
+
device = next(model.parameters()).device
|
|
76
|
+
|
|
77
|
+
seed = torch.randint(0, int(1e6), ())
|
|
78
|
+
|
|
79
|
+
num_envs = self.num_envs if self.vectorized else 1
|
|
80
|
+
cum_reward = torch.zeros(num_envs, device = device)
|
|
81
|
+
|
|
82
|
+
for _ in range(self.repeats):
|
|
83
|
+
state, _ = self.env.reset(seed = seed.item())
|
|
84
|
+
|
|
85
|
+
step = 0
|
|
86
|
+
hiddens = None
|
|
87
|
+
last_action = None
|
|
88
|
+
|
|
89
|
+
dones = torch.zeros(num_envs, device = device, dtype = torch.bool)
|
|
90
|
+
|
|
91
|
+
while step < self.max_steps and not dones.all():
|
|
92
|
+
|
|
93
|
+
state_torch = torch.from_numpy(state).float().to(device)
|
|
94
|
+
|
|
95
|
+
action_logits, hiddens = model(state_torch, hiddens)
|
|
96
|
+
|
|
97
|
+
mean, log_var = action_logits.chunk(2, dim = -1)
|
|
98
|
+
|
|
99
|
+
# sample and then bound and scale to -0.4 to 0.4
|
|
100
|
+
|
|
101
|
+
std = (0.5 * softclamp(log_var, 5.)).exp()
|
|
102
|
+
sampled = mean + torch.randn_like(mean) * std
|
|
103
|
+
action = sampled.tanh() * 0.4
|
|
104
|
+
|
|
105
|
+
next_state, reward, truncated, terminated, info = self.env.step(action.detach().cpu().numpy() if self.vectorized else action.item())
|
|
106
|
+
|
|
107
|
+
reward_np = np.array(reward) if not isinstance(reward, np.ndarray) else reward
|
|
108
|
+
total_reward_base = torch.from_numpy(reward_np).float().to(device)
|
|
109
|
+
|
|
110
|
+
# reward functions
|
|
111
|
+
|
|
112
|
+
# encouraged to move forward (1.0) and stay upright (> 1.2 meters)
|
|
113
|
+
|
|
114
|
+
z_pos = torch.from_numpy(next_state[..., 0]).float().to(device)
|
|
115
|
+
x_vel = torch.from_numpy(next_state[..., 5]).float().to(device)
|
|
116
|
+
|
|
117
|
+
reward_forward = x_vel
|
|
118
|
+
reward_upright = (z_pos > 1.2).float()
|
|
119
|
+
|
|
120
|
+
exploration_bonus = std.mean(dim = -1) * 0.05
|
|
121
|
+
penalize_extreme_actions = (mean.abs() > 1.).float().mean(dim = -1) * 0.05
|
|
122
|
+
|
|
123
|
+
penalize_action_change = 0.
|
|
124
|
+
if exists(last_action):
|
|
125
|
+
penalize_action_change = (last_action - action).abs().mean(dim = -1) * 0.1
|
|
126
|
+
|
|
127
|
+
total_reward = total_reward_base + reward_forward + reward_upright + exploration_bonus - penalize_extreme_actions - penalize_action_change
|
|
128
|
+
|
|
129
|
+
# only add reward if not done
|
|
130
|
+
|
|
131
|
+
mask = (~dones).float()
|
|
132
|
+
cum_reward += total_reward * mask
|
|
133
|
+
|
|
134
|
+
# update dones
|
|
135
|
+
|
|
136
|
+
dones_np = np.array(truncated | terminated) if not isinstance(truncated | terminated, np.ndarray) else (truncated | terminated)
|
|
137
|
+
dones |= torch.from_numpy(dones_np).to(device)
|
|
138
|
+
|
|
139
|
+
step += 1
|
|
140
|
+
|
|
141
|
+
state = next_state
|
|
142
|
+
last_action = action
|
|
143
|
+
|
|
144
|
+
if not self.vectorized:
|
|
145
|
+
return cum_reward.item() / self.repeats
|
|
146
|
+
|
|
147
|
+
return cum_reward / self.repeats
|
|
148
|
+
|
|
149
|
+
# evo strategy
|
|
150
|
+
|
|
151
|
+
from x_evolution import EvoStrategy
|
|
152
|
+
|
|
153
|
+
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
154
|
+
|
|
155
|
+
class Model(Module):
|
|
156
|
+
|
|
157
|
+
def __init__(self):
|
|
158
|
+
super().__init__()
|
|
159
|
+
|
|
160
|
+
self.deep_mlp = ResidualNormedMLP(
|
|
161
|
+
dim_in = 348,
|
|
162
|
+
dim = 256,
|
|
163
|
+
depth = 8,
|
|
164
|
+
residual_every = 2
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
self.gru = GRU(256, 256, batch_first = True)
|
|
168
|
+
|
|
169
|
+
self.to_pred = Linear(256, 17 * 2, bias = False)
|
|
170
|
+
|
|
171
|
+
def forward(self, state, hiddens = None):
|
|
172
|
+
|
|
173
|
+
x = self.deep_mlp(state)
|
|
174
|
+
|
|
175
|
+
x = x.unsqueeze(-2)
|
|
176
|
+
gru_out, hiddens = self.gru(x, hiddens)
|
|
177
|
+
x = x + gru_out
|
|
178
|
+
x = x.squeeze(-2)
|
|
179
|
+
|
|
180
|
+
return self.to_pred(x), hiddens
|
|
181
|
+
|
|
182
|
+
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
183
|
+
|
|
184
|
+
def main(
|
|
185
|
+
vectorized = False,
|
|
186
|
+
num_envs = 8
|
|
187
|
+
):
|
|
188
|
+
evo_strat = EvoStrategy(
|
|
189
|
+
Model(),
|
|
190
|
+
environment = HumanoidEnvironment(
|
|
191
|
+
repeats = 1,
|
|
192
|
+
render_every_eps = 200,
|
|
193
|
+
vectorized = vectorized,
|
|
194
|
+
num_envs = num_envs
|
|
195
|
+
),
|
|
196
|
+
vectorized = vectorized,
|
|
197
|
+
vector_size = num_envs,
|
|
198
|
+
num_generations = 50_000,
|
|
199
|
+
noise_population_size = 200,
|
|
200
|
+
noise_low_rank = 1,
|
|
201
|
+
noise_scale = 1e-2,
|
|
202
|
+
noise_scale_clamp_range = (5e-3, 2e-2),
|
|
203
|
+
learned_noise_scale = True,
|
|
204
|
+
use_sigma_optimizer = True,
|
|
205
|
+
learning_rate = 1e-3,
|
|
206
|
+
noise_scale_learning_rate = 1e-4,
|
|
207
|
+
use_scheduler = True,
|
|
208
|
+
scheduler_klass = CosineAnnealingLR,
|
|
209
|
+
scheduler_kwargs = dict(T_max = 50_000)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
evo_strat()
|
|
213
|
+
|
|
214
|
+
if __name__ == '__main__':
|
|
215
|
+
fire.Fire(main)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# dependencies = [
|
|
3
|
+
# "fire",
|
|
4
|
+
# "gymnasium[box2d]>=1.0.0",
|
|
5
|
+
# "gymnasium[other]",
|
|
6
|
+
# "x-evolution>=0.0.20",
|
|
7
|
+
# "x-mlps-pytorch>=0.2.0"
|
|
8
|
+
# ]
|
|
9
|
+
# ///
|
|
10
|
+
|
|
11
|
+
import fire
|
|
12
|
+
from shutil import rmtree
|
|
13
|
+
import gymnasium as gym
|
|
14
|
+
import numpy as np
|
|
15
|
+
|
|
16
|
+
import torch
|
|
17
|
+
from torch.nn import Module
|
|
18
|
+
import torch.nn.functional as F
|
|
19
|
+
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
20
|
+
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
21
|
+
|
|
22
|
+
class LunarEnvironment(Module):
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
video_folder = './recordings',
|
|
26
|
+
render_every_eps = 500,
|
|
27
|
+
max_steps = 500,
|
|
28
|
+
repeats = 1,
|
|
29
|
+
vectorized = False,
|
|
30
|
+
num_envs = 1
|
|
31
|
+
):
|
|
32
|
+
super().__init__()
|
|
33
|
+
|
|
34
|
+
self.vectorized = vectorized
|
|
35
|
+
self.num_envs = num_envs
|
|
36
|
+
|
|
37
|
+
if vectorized:
|
|
38
|
+
env = gym.make_vec('LunarLander-v3', num_envs = num_envs, render_mode = 'rgb_array')
|
|
39
|
+
else:
|
|
40
|
+
env = gym.make('LunarLander-v3', render_mode = 'rgb_array')
|
|
41
|
+
|
|
42
|
+
self.env = env
|
|
43
|
+
self.max_steps = max_steps
|
|
44
|
+
self.repeats = repeats
|
|
45
|
+
self.video_folder = video_folder
|
|
46
|
+
self.render_every_eps = render_every_eps
|
|
47
|
+
|
|
48
|
+
def pre_main_callback(self):
|
|
49
|
+
# the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
|
|
50
|
+
|
|
51
|
+
rmtree(self.video_folder, ignore_errors = True)
|
|
52
|
+
|
|
53
|
+
if not self.vectorized:
|
|
54
|
+
self.env = gym.wrappers.RecordVideo(
|
|
55
|
+
env = self.env,
|
|
56
|
+
video_folder = self.video_folder,
|
|
57
|
+
name_prefix = 'recording',
|
|
58
|
+
episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
|
|
59
|
+
disable_logger = True
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
def forward(self, model):
|
|
63
|
+
|
|
64
|
+
device = next(model.parameters()).device
|
|
65
|
+
|
|
66
|
+
seed = torch.randint(0, int(1e6), ())
|
|
67
|
+
|
|
68
|
+
num_envs = self.num_envs if self.vectorized else 1
|
|
69
|
+
cum_reward = torch.zeros(num_envs, device = device)
|
|
70
|
+
|
|
71
|
+
for _ in range(self.repeats):
|
|
72
|
+
state, _ = self.env.reset(seed = seed.item())
|
|
73
|
+
|
|
74
|
+
step = 0
|
|
75
|
+
dones = torch.zeros(num_envs, device = device, dtype = torch.bool)
|
|
76
|
+
|
|
77
|
+
while step < self.max_steps and not dones.all():
|
|
78
|
+
|
|
79
|
+
state_torch = torch.from_numpy(state).to(device)
|
|
80
|
+
|
|
81
|
+
action_logits = model(state_torch)
|
|
82
|
+
|
|
83
|
+
action = F.gumbel_softmax(action_logits, hard = True).argmax(dim = -1)
|
|
84
|
+
|
|
85
|
+
next_state, reward, truncated, terminated, *_ = self.env.step(action.detach().cpu().numpy() if self.vectorized else action.item())
|
|
86
|
+
|
|
87
|
+
reward_np = np.array(reward) if not isinstance(reward, np.ndarray) else reward
|
|
88
|
+
total_reward = torch.from_numpy(reward_np).float().to(device)
|
|
89
|
+
|
|
90
|
+
mask = (~dones).float()
|
|
91
|
+
cum_reward += total_reward * mask
|
|
92
|
+
|
|
93
|
+
dones_np = np.array(truncated | terminated) if not isinstance(truncated | terminated, np.ndarray) else (truncated | terminated)
|
|
94
|
+
dones |= torch.from_numpy(dones_np).to(device)
|
|
95
|
+
|
|
96
|
+
step += 1
|
|
97
|
+
|
|
98
|
+
state = next_state
|
|
99
|
+
|
|
100
|
+
if not self.vectorized:
|
|
101
|
+
return cum_reward.item() / self.repeats
|
|
102
|
+
|
|
103
|
+
return cum_reward / self.repeats
|
|
104
|
+
|
|
105
|
+
# evo strategy
|
|
106
|
+
|
|
107
|
+
from x_evolution import EvoStrategy
|
|
108
|
+
|
|
109
|
+
def main(
|
|
110
|
+
vectorized = False,
|
|
111
|
+
num_envs = 8
|
|
112
|
+
):
|
|
113
|
+
actor = ResidualNormedMLP(dim_in = 8, dim = 24, depth = 2, residual_every = 1, dim_out = 4)
|
|
114
|
+
|
|
115
|
+
evo_strat = EvoStrategy(
|
|
116
|
+
actor,
|
|
117
|
+
environment = LunarEnvironment(
|
|
118
|
+
repeats = 2,
|
|
119
|
+
vectorized = vectorized,
|
|
120
|
+
num_envs = num_envs
|
|
121
|
+
),
|
|
122
|
+
vectorized = vectorized,
|
|
123
|
+
vector_size = num_envs,
|
|
124
|
+
num_generations = 50_000,
|
|
125
|
+
noise_population_size = 50,
|
|
126
|
+
noise_low_rank = 1,
|
|
127
|
+
noise_scale = 1e-2,
|
|
128
|
+
noise_scale_clamp_range = (5e-3, 2e-2),
|
|
129
|
+
learned_noise_scale = True,
|
|
130
|
+
use_sigma_optimizer = True,
|
|
131
|
+
learning_rate = 1e-3,
|
|
132
|
+
noise_scale_learning_rate = 1e-4,
|
|
133
|
+
use_scheduler = True,
|
|
134
|
+
scheduler_klass = CosineAnnealingLR,
|
|
135
|
+
scheduler_kwargs = dict(T_max = 50_000)
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
evo_strat()
|
|
139
|
+
|
|
140
|
+
if __name__ == '__main__':
|
|
141
|
+
fire.Fire(main)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# dependencies = [
|
|
3
|
+
# "fire",
|
|
4
|
+
# "torchvision",
|
|
5
|
+
# "x-mlps-pytorch>=0.2.0",
|
|
6
|
+
# "x-evolution>=0.0.20"
|
|
7
|
+
# ]
|
|
8
|
+
# ///
|
|
9
|
+
|
|
10
|
+
import fire
|
|
11
|
+
import torch
|
|
12
|
+
from torch import nn
|
|
13
|
+
import torch.nn.functional as F
|
|
14
|
+
from torchvision import datasets, transforms
|
|
15
|
+
from torch.utils.data import DataLoader
|
|
16
|
+
|
|
17
|
+
# model
|
|
18
|
+
|
|
19
|
+
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
20
|
+
|
|
21
|
+
model = nn.Sequential(
|
|
22
|
+
nn.Flatten(),
|
|
23
|
+
ResidualNormedMLP(dim_in = 784, dim = 512, depth = 8, residual_every = 2, dim_out = 10)
|
|
24
|
+
).half()
|
|
25
|
+
|
|
26
|
+
batch_size = 256
|
|
27
|
+
|
|
28
|
+
# data
|
|
29
|
+
|
|
30
|
+
dataset = datasets.MNIST('./data', train = True, download = True, transform = transforms.ToTensor())
|
|
31
|
+
|
|
32
|
+
# fitness as inverse of loss
|
|
33
|
+
|
|
34
|
+
def mnist_environment(
|
|
35
|
+
model,
|
|
36
|
+
num_envs = 1,
|
|
37
|
+
vectorized = False,
|
|
38
|
+
batch_size = 256
|
|
39
|
+
):
|
|
40
|
+
device = next(model.parameters()).device
|
|
41
|
+
|
|
42
|
+
iters = num_envs if vectorized else 1
|
|
43
|
+
|
|
44
|
+
losses = []
|
|
45
|
+
|
|
46
|
+
for _ in range(iters):
|
|
47
|
+
dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True)
|
|
48
|
+
data_iterator = iter(dataloader)
|
|
49
|
+
data, target = next(data_iterator)
|
|
50
|
+
|
|
51
|
+
data, target = data.to(device), target.to(device)
|
|
52
|
+
|
|
53
|
+
with torch.no_grad():
|
|
54
|
+
logits = model(data.half())
|
|
55
|
+
loss = F.cross_entropy(logits, target)
|
|
56
|
+
|
|
57
|
+
losses.append(-loss)
|
|
58
|
+
|
|
59
|
+
if not vectorized:
|
|
60
|
+
return losses[0]
|
|
61
|
+
|
|
62
|
+
return torch.stack(losses)
|
|
63
|
+
|
|
64
|
+
# evo
|
|
65
|
+
|
|
66
|
+
from x_evolution import EvoStrategy
|
|
67
|
+
|
|
68
|
+
def main(
|
|
69
|
+
vectorized = False,
|
|
70
|
+
num_envs = 8,
|
|
71
|
+
batch_size = 256
|
|
72
|
+
):
|
|
73
|
+
evo_strat = EvoStrategy(
|
|
74
|
+
model,
|
|
75
|
+
environment = lambda model: mnist_environment(model, num_envs = num_envs, vectorized = vectorized, batch_size = batch_size),
|
|
76
|
+
vectorized = vectorized,
|
|
77
|
+
vector_size = num_envs,
|
|
78
|
+
noise_population_size = 100,
|
|
79
|
+
noise_scale = 1e-2,
|
|
80
|
+
noise_scale_clamp_range = (8e-3, 2e-2),
|
|
81
|
+
noise_low_rank = 1,
|
|
82
|
+
num_generations = 10_000,
|
|
83
|
+
learning_rate = 1e-3,
|
|
84
|
+
learned_noise_scale = True,
|
|
85
|
+
noise_scale_learning_rate = 2e-5
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
evo_strat()
|
|
89
|
+
|
|
90
|
+
if __name__ == '__main__':
|
|
91
|
+
fire.Fire(main)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import fire
|
|
2
|
+
import torch
|
|
3
|
+
from torch import nn
|
|
4
|
+
import torch.nn.functional as F
|
|
5
|
+
from torch.optim.lr_scheduler import LambdaLR
|
|
6
|
+
|
|
7
|
+
# model
|
|
8
|
+
|
|
9
|
+
from torch import nn
|
|
10
|
+
|
|
11
|
+
model = nn.Sequential(
|
|
12
|
+
nn.Linear(2, 16),
|
|
13
|
+
nn.ReLU(),
|
|
14
|
+
nn.Linear(16, 2)
|
|
15
|
+
).half()
|
|
16
|
+
|
|
17
|
+
batch_size = 128
|
|
18
|
+
|
|
19
|
+
# fitness as inverse of loss
|
|
20
|
+
|
|
21
|
+
from x_evolution import EvoStrategy
|
|
22
|
+
|
|
23
|
+
def xor_environment(
|
|
24
|
+
model,
|
|
25
|
+
num_envs = 1,
|
|
26
|
+
vectorized = False,
|
|
27
|
+
batch_size = 128
|
|
28
|
+
):
|
|
29
|
+
device = next(model.parameters()).device
|
|
30
|
+
|
|
31
|
+
iters = num_envs if vectorized else 1
|
|
32
|
+
|
|
33
|
+
losses = []
|
|
34
|
+
|
|
35
|
+
for _ in range(iters):
|
|
36
|
+
data = torch.randint(0, 2, (batch_size, 2))
|
|
37
|
+
labels = data[:, 0] ^ data[:, 1]
|
|
38
|
+
|
|
39
|
+
data, labels = tuple(t.to(device) for t in (data, labels))
|
|
40
|
+
|
|
41
|
+
with torch.no_grad():
|
|
42
|
+
logits = model(data.half())
|
|
43
|
+
loss = F.cross_entropy(logits, labels)
|
|
44
|
+
|
|
45
|
+
losses.append(-loss)
|
|
46
|
+
|
|
47
|
+
if not vectorized:
|
|
48
|
+
return losses[0]
|
|
49
|
+
|
|
50
|
+
return torch.stack(losses)
|
|
51
|
+
|
|
52
|
+
# evo
|
|
53
|
+
|
|
54
|
+
def main(
|
|
55
|
+
vectorized = False,
|
|
56
|
+
num_envs = 8,
|
|
57
|
+
batch_size = 128
|
|
58
|
+
):
|
|
59
|
+
evo_strat = EvoStrategy(
|
|
60
|
+
model,
|
|
61
|
+
environment = lambda model: xor_environment(model, num_envs = num_envs, vectorized = vectorized, batch_size = batch_size),
|
|
62
|
+
vectorized = vectorized,
|
|
63
|
+
vector_size = num_envs,
|
|
64
|
+
noise_population_size = 100,
|
|
65
|
+
noise_low_rank = 1,
|
|
66
|
+
num_generations = 100_000,
|
|
67
|
+
learning_rate = 1e-1,
|
|
68
|
+
noise_scale = 1e-1,
|
|
69
|
+
noise_scale_clamp_range = (0.05, 0.2),
|
|
70
|
+
learned_noise_scale = True,
|
|
71
|
+
noise_scale_learning_rate = 5e-4,
|
|
72
|
+
use_scheduler = True,
|
|
73
|
+
scheduler_klass = LambdaLR,
|
|
74
|
+
scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.)),
|
|
75
|
+
use_sigma_scheduler = True,
|
|
76
|
+
sigma_scheduler_klass = LambdaLR,
|
|
77
|
+
sigma_scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.))
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
evo_strat()
|
|
81
|
+
|
|
82
|
+
if __name__ == '__main__':
|
|
83
|
+
fire.Fire(main)
|
|
@@ -11,6 +11,7 @@ from torch.nn import Module, ModuleList, Parameter, ParameterList
|
|
|
11
11
|
from torch.optim import SGD, Adam, Optimizer
|
|
12
12
|
from torch.optim.lr_scheduler import LRScheduler
|
|
13
13
|
|
|
14
|
+
import torch.distributed as dist
|
|
14
15
|
import torch.nn.functional as F
|
|
15
16
|
|
|
16
17
|
from beartype import beartype
|
|
@@ -93,11 +94,17 @@ class EvoStrategy(Module):
|
|
|
93
94
|
verbose = True,
|
|
94
95
|
accelerator: Accelerator | None = None,
|
|
95
96
|
accelerate_kwargs: dict = dict(),
|
|
96
|
-
reject_generation_fitnesses_if: Callable[[Tensor], bool] | None = None
|
|
97
|
+
reject_generation_fitnesses_if: Callable[[Tensor], bool] | None = None,
|
|
98
|
+
vectorized = False,
|
|
99
|
+
vector_size: int | None = None,
|
|
100
|
+
sync_on_init = True
|
|
97
101
|
):
|
|
98
102
|
super().__init__()
|
|
99
103
|
self.verbose = verbose
|
|
100
104
|
|
|
105
|
+
self.vectorized = vectorized
|
|
106
|
+
self.vector_size = vector_size
|
|
107
|
+
|
|
101
108
|
if not exists(accelerator):
|
|
102
109
|
accelerator = Accelerator(cpu = cpu, **accelerate_kwargs)
|
|
103
110
|
|
|
@@ -122,12 +129,10 @@ class EvoStrategy(Module):
|
|
|
122
129
|
self.model = model
|
|
123
130
|
self.noisable_model = Noisable(model, low_rank = noise_low_rank)
|
|
124
131
|
|
|
125
|
-
#
|
|
126
|
-
|
|
127
|
-
wrapped_model = accelerator.prepare(model)
|
|
132
|
+
# maybe sync model params and buffers
|
|
128
133
|
|
|
129
|
-
|
|
130
|
-
|
|
134
|
+
if sync_on_init:
|
|
135
|
+
self.sync_model_params_and_buffers_()
|
|
131
136
|
|
|
132
137
|
# get param dictionary
|
|
133
138
|
|
|
@@ -249,6 +254,17 @@ class EvoStrategy(Module):
|
|
|
249
254
|
def device(self):
|
|
250
255
|
return self.accelerate.device
|
|
251
256
|
|
|
257
|
+
@torch.no_grad()
|
|
258
|
+
def sync_model_params_and_buffers_(self):
|
|
259
|
+
if not self.accelerate.num_processes > 1:
|
|
260
|
+
return
|
|
261
|
+
|
|
262
|
+
for param in self.model.parameters():
|
|
263
|
+
dist.broadcast(param, src = 0)
|
|
264
|
+
|
|
265
|
+
for buffer in self.model.buffers():
|
|
266
|
+
dist.broadcast(buffer, src = 0)
|
|
267
|
+
|
|
252
268
|
def print(self, *args, **kwargs):
|
|
253
269
|
if not self.verbose:
|
|
254
270
|
return
|
|
@@ -475,24 +491,28 @@ class EvoStrategy(Module):
|
|
|
475
491
|
fitnesses.append([0., 0.] if self.mirror_sampling else 0.)
|
|
476
492
|
continue
|
|
477
493
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
# determine noise scale, which can be fixed or learned
|
|
494
|
+
def get_fitness(negate = False):
|
|
495
|
+
individual_param_seeds = with_seed(individual_seed.item())(randint)(0, MAX_SEED_VALUE, (self.num_params,))
|
|
496
|
+
noise_config = dict(zip(self.param_names_to_optimize, individual_param_seeds.tolist()))
|
|
483
497
|
|
|
484
|
-
|
|
498
|
+
noise_config_with_scale = dict()
|
|
499
|
+
for param_name, seed in noise_config.items():
|
|
500
|
+
noise_scale = self._get_noise_scale(param_name)
|
|
501
|
+
noise_config_with_scale[param_name] = (seed, noise_scale)
|
|
485
502
|
|
|
486
|
-
|
|
503
|
+
with model.temp_add_noise_(noise_config_with_scale, negate = negate):
|
|
504
|
+
fitness = with_seed(maybe_rollout_seed)(self.environment)(model)
|
|
487
505
|
|
|
488
|
-
|
|
506
|
+
if isinstance(fitness, Tensor) and fitness.numel() > 1:
|
|
507
|
+
fitness = fitness.mean().item()
|
|
508
|
+
elif isinstance(fitness, Tensor):
|
|
509
|
+
fitness = fitness.item()
|
|
489
510
|
|
|
490
|
-
|
|
511
|
+
return fitness
|
|
491
512
|
|
|
492
|
-
#
|
|
513
|
+
# evaluate
|
|
493
514
|
|
|
494
|
-
|
|
495
|
-
fitness = with_seed(maybe_rollout_seed)(rollout_for_fitness)()
|
|
515
|
+
fitness = get_fitness(negate = False)
|
|
496
516
|
|
|
497
517
|
if not self.mirror_sampling:
|
|
498
518
|
fitnesses.append(fitness)
|
|
@@ -500,8 +520,7 @@ class EvoStrategy(Module):
|
|
|
500
520
|
|
|
501
521
|
# handle mirror sampling
|
|
502
522
|
|
|
503
|
-
|
|
504
|
-
fitness_mirrored = with_seed(maybe_rollout_seed)(rollout_for_fitness)()
|
|
523
|
+
fitness_mirrored = get_fitness(negate = True)
|
|
505
524
|
|
|
506
525
|
fitnesses.append([fitness, fitness_mirrored])
|
|
507
526
|
|
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
# /// script
|
|
2
|
-
# dependencies = [
|
|
3
|
-
# "gymnasium[mujoco]>=1.0.0",
|
|
4
|
-
# "gymnasium[other]",
|
|
5
|
-
# "x-evolution>=0.0.20",
|
|
6
|
-
# "x-mlps-pytorch"
|
|
7
|
-
# ]
|
|
8
|
-
# ///
|
|
9
|
-
|
|
10
|
-
# import os
|
|
11
|
-
# os.environ["NCCL_P2P_DISABLE"] = "1"
|
|
12
|
-
# os.environ["NCCL_IB_DISABLE"] = "1"
|
|
13
|
-
# os.environ["MUJOCO_GL"] = "osmesa"
|
|
14
|
-
|
|
15
|
-
from shutil import rmtree
|
|
16
|
-
import gymnasium as gym
|
|
17
|
-
import numpy as np
|
|
18
|
-
|
|
19
|
-
import torch
|
|
20
|
-
from torch.nn import Module
|
|
21
|
-
import torch.nn.functional as F
|
|
22
|
-
|
|
23
|
-
def softclamp(t, value):
|
|
24
|
-
return (t / value).tanh() * value
|
|
25
|
-
|
|
26
|
-
class HumanoidEnvironment(Module):
|
|
27
|
-
def __init__(
|
|
28
|
-
self,
|
|
29
|
-
video_folder = './recordings_humanoid',
|
|
30
|
-
render_every_eps = 100,
|
|
31
|
-
max_steps = 1000,
|
|
32
|
-
repeats = 1
|
|
33
|
-
):
|
|
34
|
-
super().__init__()
|
|
35
|
-
|
|
36
|
-
# Humanoid-v5
|
|
37
|
-
env = gym.make('Humanoid-v5', render_mode = 'rgb_array')
|
|
38
|
-
|
|
39
|
-
self.env = env
|
|
40
|
-
self.max_steps = max_steps
|
|
41
|
-
self.repeats = repeats
|
|
42
|
-
self.video_folder = video_folder
|
|
43
|
-
self.render_every_eps = render_every_eps
|
|
44
|
-
|
|
45
|
-
def pre_main_callback(self):
|
|
46
|
-
# the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
|
|
47
|
-
|
|
48
|
-
rmtree(self.video_folder, ignore_errors = True)
|
|
49
|
-
|
|
50
|
-
self.env = gym.wrappers.RecordVideo(
|
|
51
|
-
env = self.env,
|
|
52
|
-
video_folder = self.video_folder,
|
|
53
|
-
name_prefix = 'recording',
|
|
54
|
-
episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
|
|
55
|
-
disable_logger = True
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
def forward(self, model):
|
|
59
|
-
|
|
60
|
-
device = next(model.parameters()).device
|
|
61
|
-
|
|
62
|
-
seed = torch.randint(0, int(1e6), ())
|
|
63
|
-
|
|
64
|
-
cum_reward = 0.
|
|
65
|
-
|
|
66
|
-
for _ in range(self.repeats):
|
|
67
|
-
state, _ = self.env.reset(seed = seed.item())
|
|
68
|
-
|
|
69
|
-
step = 0
|
|
70
|
-
|
|
71
|
-
while step < self.max_steps:
|
|
72
|
-
|
|
73
|
-
state = torch.from_numpy(state).float().to(device)
|
|
74
|
-
|
|
75
|
-
action_logits = model(state)
|
|
76
|
-
|
|
77
|
-
mean, log_var = action_logits.chunk(2, dim = -1)
|
|
78
|
-
|
|
79
|
-
# sample and then bound and scale to -0.4 to 0.4
|
|
80
|
-
|
|
81
|
-
std = (0.5 * softclamp(log_var, 5.)).exp()
|
|
82
|
-
sampled = mean + torch.randn_like(mean) * std
|
|
83
|
-
action = sampled.tanh() * 0.4
|
|
84
|
-
|
|
85
|
-
next_state, reward, truncated, terminated, *_ = self.env.step(action.detach().cpu().numpy())
|
|
86
|
-
|
|
87
|
-
cum_reward += float(reward)
|
|
88
|
-
step += 1
|
|
89
|
-
|
|
90
|
-
state = next_state
|
|
91
|
-
|
|
92
|
-
if truncated or terminated:
|
|
93
|
-
break
|
|
94
|
-
|
|
95
|
-
return cum_reward / self.repeats
|
|
96
|
-
|
|
97
|
-
# evo strategy
|
|
98
|
-
|
|
99
|
-
from x_evolution import EvoStrategy
|
|
100
|
-
|
|
101
|
-
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
102
|
-
|
|
103
|
-
actor = ResidualNormedMLP(
|
|
104
|
-
dim_in = 348, # state
|
|
105
|
-
dim = 256,
|
|
106
|
-
depth = 8,
|
|
107
|
-
residual_every = 2,
|
|
108
|
-
dim_out = 17 * 2 # action mean logvar
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
112
|
-
|
|
113
|
-
evo_strat = EvoStrategy(
|
|
114
|
-
actor,
|
|
115
|
-
environment = HumanoidEnvironment(repeats = 2),
|
|
116
|
-
num_generations = 50_000,
|
|
117
|
-
noise_population_size = 200,
|
|
118
|
-
noise_low_rank = 1,
|
|
119
|
-
noise_scale = 1e-2,
|
|
120
|
-
noise_scale_clamp_range = (5e-3, 2e-2),
|
|
121
|
-
learned_noise_scale = True,
|
|
122
|
-
use_sigma_optimizer = True,
|
|
123
|
-
learning_rate = 1e-3,
|
|
124
|
-
noise_scale_learning_rate = 1e-4,
|
|
125
|
-
use_scheduler = True,
|
|
126
|
-
scheduler_klass = CosineAnnealingLR,
|
|
127
|
-
scheduler_kwargs = dict(T_max = 50_000)
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
evo_strat()
|
|
@@ -1,107 +0,0 @@
|
|
|
1
|
-
# /// script
|
|
2
|
-
# dependencies = [
|
|
3
|
-
# "gymnasium[box2d]>=1.0.0",
|
|
4
|
-
# "gymnasium[other]",
|
|
5
|
-
# "x-evolution>=0.0.20"
|
|
6
|
-
# ]
|
|
7
|
-
# ///
|
|
8
|
-
|
|
9
|
-
from shutil import rmtree
|
|
10
|
-
import gymnasium as gym
|
|
11
|
-
|
|
12
|
-
import torch
|
|
13
|
-
from torch.nn import Module
|
|
14
|
-
import torch.nn.functional as F
|
|
15
|
-
|
|
16
|
-
class LunarEnvironment(Module):
|
|
17
|
-
def __init__(
|
|
18
|
-
self,
|
|
19
|
-
video_folder = './recordings',
|
|
20
|
-
render_every_eps = 500,
|
|
21
|
-
max_steps = 500,
|
|
22
|
-
repeats = 1
|
|
23
|
-
):
|
|
24
|
-
super().__init__()
|
|
25
|
-
|
|
26
|
-
env = gym.make('LunarLander-v3', render_mode = 'rgb_array')
|
|
27
|
-
|
|
28
|
-
self.env = env
|
|
29
|
-
self.max_steps = max_steps
|
|
30
|
-
self.repeats = repeats
|
|
31
|
-
self.video_folder = video_folder
|
|
32
|
-
self.render_every_eps = render_every_eps
|
|
33
|
-
|
|
34
|
-
def pre_main_callback(self):
|
|
35
|
-
# the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
|
|
36
|
-
|
|
37
|
-
rmtree(self.video_folder, ignore_errors = True)
|
|
38
|
-
|
|
39
|
-
self.env = gym.wrappers.RecordVideo(
|
|
40
|
-
env = self.env,
|
|
41
|
-
video_folder = self.video_folder,
|
|
42
|
-
name_prefix = 'recording',
|
|
43
|
-
episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
|
|
44
|
-
disable_logger = True
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
def forward(self, model):
|
|
48
|
-
|
|
49
|
-
device = next(model.parameters()).device
|
|
50
|
-
|
|
51
|
-
seed = torch.randint(0, int(1e6), ())
|
|
52
|
-
|
|
53
|
-
cum_reward = 0.
|
|
54
|
-
|
|
55
|
-
for _ in range(self.repeats):
|
|
56
|
-
state, _ = self.env.reset(seed = seed.item())
|
|
57
|
-
|
|
58
|
-
step = 0
|
|
59
|
-
|
|
60
|
-
while step < self.max_steps:
|
|
61
|
-
|
|
62
|
-
state = torch.from_numpy(state).to(device)
|
|
63
|
-
|
|
64
|
-
action_logits = model(state)
|
|
65
|
-
|
|
66
|
-
action = F.gumbel_softmax(action_logits, hard = True).argmax(dim = -1)
|
|
67
|
-
|
|
68
|
-
next_state, reward, truncated, terminated, *_ = self.env.step(action.item())
|
|
69
|
-
|
|
70
|
-
cum_reward += float(reward)
|
|
71
|
-
step += 1
|
|
72
|
-
|
|
73
|
-
state = next_state
|
|
74
|
-
|
|
75
|
-
if truncated or terminated:
|
|
76
|
-
break
|
|
77
|
-
|
|
78
|
-
return cum_reward / self.repeats
|
|
79
|
-
|
|
80
|
-
# evo strategy
|
|
81
|
-
|
|
82
|
-
from x_evolution import EvoStrategy
|
|
83
|
-
|
|
84
|
-
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
85
|
-
|
|
86
|
-
actor = ResidualNormedMLP(dim_in = 8, dim = 24, depth = 2, residual_every = 1, dim_out = 4)
|
|
87
|
-
|
|
88
|
-
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
89
|
-
|
|
90
|
-
evo_strat = EvoStrategy(
|
|
91
|
-
actor,
|
|
92
|
-
environment = LunarEnvironment(repeats = 2),
|
|
93
|
-
num_generations = 50_000,
|
|
94
|
-
noise_population_size = 50,
|
|
95
|
-
noise_low_rank = 1,
|
|
96
|
-
noise_scale = 1e-2,
|
|
97
|
-
noise_scale_clamp_range = (5e-3, 2e-2),
|
|
98
|
-
learned_noise_scale = True,
|
|
99
|
-
use_sigma_optimizer = True,
|
|
100
|
-
learning_rate = 1e-3,
|
|
101
|
-
noise_scale_learning_rate = 1e-4,
|
|
102
|
-
use_scheduler = True,
|
|
103
|
-
scheduler_klass = CosineAnnealingLR,
|
|
104
|
-
scheduler_kwargs = dict(T_max = 50_000)
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
evo_strat()
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
# /// script
|
|
2
|
-
# dependencies = [
|
|
3
|
-
# "torchvision",
|
|
4
|
-
# "x-evolution>=0.0.20"
|
|
5
|
-
# ]
|
|
6
|
-
# ///
|
|
7
|
-
|
|
8
|
-
import torch
|
|
9
|
-
from torch import tensor, nn
|
|
10
|
-
import torch.nn.functional as F
|
|
11
|
-
from torchvision import datasets, transforms
|
|
12
|
-
from torch.utils.data import DataLoader
|
|
13
|
-
|
|
14
|
-
# model
|
|
15
|
-
|
|
16
|
-
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
17
|
-
|
|
18
|
-
model = nn.Sequential(
|
|
19
|
-
nn.Flatten(),
|
|
20
|
-
ResidualNormedMLP(dim_in = 784, dim = 512, depth = 8, residual_every = 2, dim_out = 10)
|
|
21
|
-
).half()
|
|
22
|
-
|
|
23
|
-
batch_size = 256
|
|
24
|
-
|
|
25
|
-
# data
|
|
26
|
-
|
|
27
|
-
dataset = datasets.MNIST('./data', train = True, download = True, transform = transforms.ToTensor())
|
|
28
|
-
|
|
29
|
-
# fitness as inverse of loss
|
|
30
|
-
|
|
31
|
-
def loss_mnist(model):
|
|
32
|
-
device = next(model.parameters()).device
|
|
33
|
-
|
|
34
|
-
dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True)
|
|
35
|
-
data_iterator = iter(dataloader)
|
|
36
|
-
data, target = next(data_iterator)
|
|
37
|
-
|
|
38
|
-
data, target = data.to(device), target.to(device)
|
|
39
|
-
|
|
40
|
-
with torch.no_grad():
|
|
41
|
-
logits = model(data.half())
|
|
42
|
-
loss = F.cross_entropy(logits, target)
|
|
43
|
-
|
|
44
|
-
return -loss
|
|
45
|
-
|
|
46
|
-
# evo
|
|
47
|
-
|
|
48
|
-
from x_evolution import EvoStrategy
|
|
49
|
-
|
|
50
|
-
evo_strat = EvoStrategy(
|
|
51
|
-
model,
|
|
52
|
-
environment = loss_mnist,
|
|
53
|
-
noise_population_size = 100,
|
|
54
|
-
noise_scale = 1e-2,
|
|
55
|
-
noise_scale_clamp_range = (8e-3, 2e-2),
|
|
56
|
-
noise_low_rank = 1,
|
|
57
|
-
num_generations = 10_000,
|
|
58
|
-
learning_rate = 1e-3,
|
|
59
|
-
learned_noise_scale = True,
|
|
60
|
-
noise_scale_learning_rate = 2e-5
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
evo_strat()
|
x_evolution-0.1.25/train_xor.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
import torch
|
|
2
|
-
from torch import tensor
|
|
3
|
-
import torch.nn.functional as F
|
|
4
|
-
from torch.optim.lr_scheduler import LambdaLR
|
|
5
|
-
|
|
6
|
-
# model
|
|
7
|
-
|
|
8
|
-
from torch import nn
|
|
9
|
-
|
|
10
|
-
model = nn.Sequential(
|
|
11
|
-
nn.Linear(2, 16),
|
|
12
|
-
nn.ReLU(),
|
|
13
|
-
nn.Linear(16, 2)
|
|
14
|
-
).half()
|
|
15
|
-
|
|
16
|
-
batch_size = 128
|
|
17
|
-
|
|
18
|
-
# fitness as inverse of loss
|
|
19
|
-
|
|
20
|
-
from x_evolution import EvoStrategy
|
|
21
|
-
|
|
22
|
-
def loss_xor(model):
|
|
23
|
-
device = next(model.parameters()).device
|
|
24
|
-
|
|
25
|
-
data = torch.randint(0, 2, (batch_size, 2))
|
|
26
|
-
labels = data[:, 0] ^ data[:, 1]
|
|
27
|
-
|
|
28
|
-
data, labels = tuple(t.to(device) for t in (data, labels))
|
|
29
|
-
|
|
30
|
-
with torch.no_grad():
|
|
31
|
-
logits = model(data.half())
|
|
32
|
-
loss = F.cross_entropy(logits, labels)
|
|
33
|
-
|
|
34
|
-
return -loss
|
|
35
|
-
|
|
36
|
-
# evo
|
|
37
|
-
|
|
38
|
-
evo_strat = EvoStrategy(
|
|
39
|
-
model,
|
|
40
|
-
environment = loss_xor,
|
|
41
|
-
noise_population_size = 100,
|
|
42
|
-
noise_low_rank = 1,
|
|
43
|
-
num_generations = 100_000,
|
|
44
|
-
learning_rate = 1e-1,
|
|
45
|
-
noise_scale = 1e-1,
|
|
46
|
-
noise_scale_clamp_range = (5e-2, 2e-1),
|
|
47
|
-
learned_noise_scale = True,
|
|
48
|
-
noise_scale_learning_rate = 5e-4,
|
|
49
|
-
use_scheduler = True,
|
|
50
|
-
scheduler_klass = LambdaLR,
|
|
51
|
-
scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.)),
|
|
52
|
-
use_sigma_scheduler = True,
|
|
53
|
-
sigma_scheduler_klass = LambdaLR,
|
|
54
|
-
sigma_scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.))
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
evo_strat()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|