x-evolution 0.1.24__tar.gz → 0.1.26__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-evolution
3
- Version: 0.1.24
3
+ Version: 0.1.26
4
4
  Summary: x-evolution
5
5
  Project-URL: Homepage, https://pypi.org/project/x-evolution/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-evolution
@@ -38,7 +38,7 @@ Requires-Dist: accelerate
38
38
  Requires-Dist: beartype
39
39
  Requires-Dist: einops>=0.8.0
40
40
  Requires-Dist: torch>=2.4
41
- Requires-Dist: x-mlps-pytorch>=0.1.31
41
+ Requires-Dist: x-mlps-pytorch>=0.2.0
42
42
  Requires-Dist: x-transformers>=2.11.23
43
43
  Provides-Extra: examples
44
44
  Requires-Dist: gymnasium[box2d]>=1.0.0; extra == 'examples'
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-evolution"
3
- version = "0.1.24"
3
+ version = "0.1.26"
4
4
  description = "x-evolution"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -27,7 +27,7 @@ dependencies = [
27
27
  "beartype",
28
28
  "einops>=0.8.0",
29
29
  "torch>=2.4",
30
- "x-mlps-pytorch>=0.1.31",
30
+ "x-mlps-pytorch>=0.2.0",
31
31
  "x-transformers>=2.11.23"
32
32
  ]
33
33
 
@@ -47,3 +47,24 @@ def test_evo_strat(
47
47
  evo_strat('more.evolve', 1)
48
48
 
49
49
  fitnesses = evo_strat('more.evolve', 2, rollback_model_at_end = True)
50
+
51
+ @param('vector_size', (2, 4))
52
+ def test_evo_strat_vectorized(vector_size):
53
+ from x_evolution.x_evolution import EvoStrategy
54
+
55
+ model = MLP(8, 16, 4)
56
+
57
+ def environment(model):
58
+ # mock a vectorized environment returning multiple fitness scores
59
+ return torch.randn(vector_size)
60
+
61
+ evo_strat = EvoStrategy(
62
+ model,
63
+ environment = environment,
64
+ num_generations = 2,
65
+ vectorized = True,
66
+ vector_size = vector_size,
67
+ noise_population_size = 4
68
+ )
69
+
70
+ evo_strat()
@@ -0,0 +1,215 @@
1
+ # /// script
2
+ # dependencies = [
3
+ # "fire",
4
+ # "gymnasium[mujoco]>=1.0.0",
5
+ # "gymnasium[other]",
6
+ # "x-evolution>=0.0.20",
7
+ # "x-mlps-pytorch"
8
+ # ]
9
+ # ///
10
+
11
+ # import os
12
+ # os.environ["NCCL_P2P_DISABLE"] = "1"
13
+ # os.environ["NCCL_IB_DISABLE"] = "1"
14
+ # os.environ["MUJOCO_GL"] = "osmesa"
15
+
16
+ import fire
17
+ from shutil import rmtree
18
+ import gymnasium as gym
19
+ import numpy as np
20
+
21
+ import torch
22
+ from torch.nn import Module, GRU, Linear
23
+ import torch.nn.functional as F
24
+
25
+ # functions
26
+
27
+ def exists(v):
28
+ return v is not None
29
+
30
+ def softclamp(t, value):
31
+ return (t / value).tanh() * value
32
+
33
+ class HumanoidEnvironment(Module):
34
+ def __init__(
35
+ self,
36
+ video_folder = './recordings_humanoid',
37
+ render_every_eps = 100,
38
+ max_steps = 1000,
39
+ repeats = 1,
40
+ vectorized = False,
41
+ num_envs = 1
42
+ ):
43
+ super().__init__()
44
+
45
+ self.vectorized = vectorized
46
+ self.num_envs = num_envs
47
+
48
+ if vectorized:
49
+ env = gym.make_vec('Humanoid-v5', num_envs = num_envs, render_mode = 'rgb_array')
50
+ else:
51
+ env = gym.make('Humanoid-v5', render_mode = 'rgb_array')
52
+
53
+ self.env = env
54
+ self.max_steps = max_steps
55
+ self.repeats = repeats
56
+ self.video_folder = video_folder
57
+ self.render_every_eps = render_every_eps
58
+
59
+ def pre_main_callback(self):
60
+ # the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
61
+
62
+ rmtree(self.video_folder, ignore_errors = True)
63
+
64
+ if not self.vectorized:
65
+ self.env = gym.wrappers.RecordVideo(
66
+ env = self.env,
67
+ video_folder = self.video_folder,
68
+ name_prefix = 'recording',
69
+ episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
70
+ disable_logger = True
71
+ )
72
+
73
+ def forward(self, model):
74
+
75
+ device = next(model.parameters()).device
76
+
77
+ seed = torch.randint(0, int(1e6), ())
78
+
79
+ num_envs = self.num_envs if self.vectorized else 1
80
+ cum_reward = torch.zeros(num_envs, device = device)
81
+
82
+ for _ in range(self.repeats):
83
+ state, _ = self.env.reset(seed = seed.item())
84
+
85
+ step = 0
86
+ hiddens = None
87
+ last_action = None
88
+
89
+ dones = torch.zeros(num_envs, device = device, dtype = torch.bool)
90
+
91
+ while step < self.max_steps and not dones.all():
92
+
93
+ state_torch = torch.from_numpy(state).float().to(device)
94
+
95
+ action_logits, hiddens = model(state_torch, hiddens)
96
+
97
+ mean, log_var = action_logits.chunk(2, dim = -1)
98
+
99
+ # sample and then bound and scale to -0.4 to 0.4
100
+
101
+ std = (0.5 * softclamp(log_var, 5.)).exp()
102
+ sampled = mean + torch.randn_like(mean) * std
103
+ action = sampled.tanh() * 0.4
104
+
105
+ next_state, reward, truncated, terminated, info = self.env.step(action.detach().cpu().numpy() if self.vectorized else action.item())
106
+
107
+ reward_np = np.array(reward) if not isinstance(reward, np.ndarray) else reward
108
+ total_reward_base = torch.from_numpy(reward_np).float().to(device)
109
+
110
+ # reward functions
111
+
112
+ # encouraged to move forward (1.0) and stay upright (> 1.2 meters)
113
+
114
+ z_pos = torch.from_numpy(next_state[..., 0]).float().to(device)
115
+ x_vel = torch.from_numpy(next_state[..., 5]).float().to(device)
116
+
117
+ reward_forward = x_vel
118
+ reward_upright = (z_pos > 1.2).float()
119
+
120
+ exploration_bonus = std.mean(dim = -1) * 0.05
121
+ penalize_extreme_actions = (mean.abs() > 1.).float().mean(dim = -1) * 0.05
122
+
123
+ penalize_action_change = 0.
124
+ if exists(last_action):
125
+ penalize_action_change = (last_action - action).abs().mean(dim = -1) * 0.1
126
+
127
+ total_reward = total_reward_base + reward_forward + reward_upright + exploration_bonus - penalize_extreme_actions - penalize_action_change
128
+
129
+ # only add reward if not done
130
+
131
+ mask = (~dones).float()
132
+ cum_reward += total_reward * mask
133
+
134
+ # update dones
135
+
136
+ dones_np = np.array(truncated | terminated) if not isinstance(truncated | terminated, np.ndarray) else (truncated | terminated)
137
+ dones |= torch.from_numpy(dones_np).to(device)
138
+
139
+ step += 1
140
+
141
+ state = next_state
142
+ last_action = action
143
+
144
+ if not self.vectorized:
145
+ return cum_reward.item() / self.repeats
146
+
147
+ return cum_reward / self.repeats
148
+
149
+ # evo strategy
150
+
151
+ from x_evolution import EvoStrategy
152
+
153
+ from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
154
+
155
+ class Model(Module):
156
+
157
+ def __init__(self):
158
+ super().__init__()
159
+
160
+ self.deep_mlp = ResidualNormedMLP(
161
+ dim_in = 348,
162
+ dim = 256,
163
+ depth = 8,
164
+ residual_every = 2
165
+ )
166
+
167
+ self.gru = GRU(256, 256, batch_first = True)
168
+
169
+ self.to_pred = Linear(256, 17 * 2, bias = False)
170
+
171
+ def forward(self, state, hiddens = None):
172
+
173
+ x = self.deep_mlp(state)
174
+
175
+ x = x.unsqueeze(-2)
176
+ gru_out, hiddens = self.gru(x, hiddens)
177
+ x = x + gru_out
178
+ x = x.squeeze(-2)
179
+
180
+ return self.to_pred(x), hiddens
181
+
182
+ from torch.optim.lr_scheduler import CosineAnnealingLR
183
+
184
+ def main(
185
+ vectorized = False,
186
+ num_envs = 8
187
+ ):
188
+ evo_strat = EvoStrategy(
189
+ Model(),
190
+ environment = HumanoidEnvironment(
191
+ repeats = 1,
192
+ render_every_eps = 200,
193
+ vectorized = vectorized,
194
+ num_envs = num_envs
195
+ ),
196
+ vectorized = vectorized,
197
+ vector_size = num_envs,
198
+ num_generations = 50_000,
199
+ noise_population_size = 200,
200
+ noise_low_rank = 1,
201
+ noise_scale = 1e-2,
202
+ noise_scale_clamp_range = (5e-3, 2e-2),
203
+ learned_noise_scale = True,
204
+ use_sigma_optimizer = True,
205
+ learning_rate = 1e-3,
206
+ noise_scale_learning_rate = 1e-4,
207
+ use_scheduler = True,
208
+ scheduler_klass = CosineAnnealingLR,
209
+ scheduler_kwargs = dict(T_max = 50_000)
210
+ )
211
+
212
+ evo_strat()
213
+
214
+ if __name__ == '__main__':
215
+ fire.Fire(main)
@@ -0,0 +1,141 @@
1
+ # /// script
2
+ # dependencies = [
3
+ # "fire",
4
+ # "gymnasium[box2d]>=1.0.0",
5
+ # "gymnasium[other]",
6
+ # "x-evolution>=0.0.20",
7
+ # "x-mlps-pytorch>=0.2.0"
8
+ # ]
9
+ # ///
10
+
11
+ import fire
12
+ from shutil import rmtree
13
+ import gymnasium as gym
14
+ import numpy as np
15
+
16
+ import torch
17
+ from torch.nn import Module
18
+ import torch.nn.functional as F
19
+ from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
20
+ from torch.optim.lr_scheduler import CosineAnnealingLR
21
+
22
+ class LunarEnvironment(Module):
23
+ def __init__(
24
+ self,
25
+ video_folder = './recordings',
26
+ render_every_eps = 500,
27
+ max_steps = 500,
28
+ repeats = 1,
29
+ vectorized = False,
30
+ num_envs = 1
31
+ ):
32
+ super().__init__()
33
+
34
+ self.vectorized = vectorized
35
+ self.num_envs = num_envs
36
+
37
+ if vectorized:
38
+ env = gym.make_vec('LunarLander-v3', num_envs = num_envs, render_mode = 'rgb_array')
39
+ else:
40
+ env = gym.make('LunarLander-v3', render_mode = 'rgb_array')
41
+
42
+ self.env = env
43
+ self.max_steps = max_steps
44
+ self.repeats = repeats
45
+ self.video_folder = video_folder
46
+ self.render_every_eps = render_every_eps
47
+
48
+ def pre_main_callback(self):
49
+ # the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
50
+
51
+ rmtree(self.video_folder, ignore_errors = True)
52
+
53
+ if not self.vectorized:
54
+ self.env = gym.wrappers.RecordVideo(
55
+ env = self.env,
56
+ video_folder = self.video_folder,
57
+ name_prefix = 'recording',
58
+ episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
59
+ disable_logger = True
60
+ )
61
+
62
+ def forward(self, model):
63
+
64
+ device = next(model.parameters()).device
65
+
66
+ seed = torch.randint(0, int(1e6), ())
67
+
68
+ num_envs = self.num_envs if self.vectorized else 1
69
+ cum_reward = torch.zeros(num_envs, device = device)
70
+
71
+ for _ in range(self.repeats):
72
+ state, _ = self.env.reset(seed = seed.item())
73
+
74
+ step = 0
75
+ dones = torch.zeros(num_envs, device = device, dtype = torch.bool)
76
+
77
+ while step < self.max_steps and not dones.all():
78
+
79
+ state_torch = torch.from_numpy(state).to(device)
80
+
81
+ action_logits = model(state_torch)
82
+
83
+ action = F.gumbel_softmax(action_logits, hard = True).argmax(dim = -1)
84
+
85
+ next_state, reward, truncated, terminated, *_ = self.env.step(action.detach().cpu().numpy() if self.vectorized else action.item())
86
+
87
+ reward_np = np.array(reward) if not isinstance(reward, np.ndarray) else reward
88
+ total_reward = torch.from_numpy(reward_np).float().to(device)
89
+
90
+ mask = (~dones).float()
91
+ cum_reward += total_reward * mask
92
+
93
+ dones_np = np.array(truncated | terminated) if not isinstance(truncated | terminated, np.ndarray) else (truncated | terminated)
94
+ dones |= torch.from_numpy(dones_np).to(device)
95
+
96
+ step += 1
97
+
98
+ state = next_state
99
+
100
+ if not self.vectorized:
101
+ return cum_reward.item() / self.repeats
102
+
103
+ return cum_reward / self.repeats
104
+
105
+ # evo strategy
106
+
107
+ from x_evolution import EvoStrategy
108
+
109
+ def main(
110
+ vectorized = False,
111
+ num_envs = 8
112
+ ):
113
+ actor = ResidualNormedMLP(dim_in = 8, dim = 24, depth = 2, residual_every = 1, dim_out = 4)
114
+
115
+ evo_strat = EvoStrategy(
116
+ actor,
117
+ environment = LunarEnvironment(
118
+ repeats = 2,
119
+ vectorized = vectorized,
120
+ num_envs = num_envs
121
+ ),
122
+ vectorized = vectorized,
123
+ vector_size = num_envs,
124
+ num_generations = 50_000,
125
+ noise_population_size = 50,
126
+ noise_low_rank = 1,
127
+ noise_scale = 1e-2,
128
+ noise_scale_clamp_range = (5e-3, 2e-2),
129
+ learned_noise_scale = True,
130
+ use_sigma_optimizer = True,
131
+ learning_rate = 1e-3,
132
+ noise_scale_learning_rate = 1e-4,
133
+ use_scheduler = True,
134
+ scheduler_klass = CosineAnnealingLR,
135
+ scheduler_kwargs = dict(T_max = 50_000)
136
+ )
137
+
138
+ evo_strat()
139
+
140
+ if __name__ == '__main__':
141
+ fire.Fire(main)
@@ -0,0 +1,91 @@
1
+ # /// script
2
+ # dependencies = [
3
+ # "fire",
4
+ # "torchvision",
5
+ # "x-mlps-pytorch>=0.2.0",
6
+ # "x-evolution>=0.0.20"
7
+ # ]
8
+ # ///
9
+
10
+ import fire
11
+ import torch
12
+ from torch import nn
13
+ import torch.nn.functional as F
14
+ from torchvision import datasets, transforms
15
+ from torch.utils.data import DataLoader
16
+
17
+ # model
18
+
19
+ from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
20
+
21
+ model = nn.Sequential(
22
+ nn.Flatten(),
23
+ ResidualNormedMLP(dim_in = 784, dim = 512, depth = 8, residual_every = 2, dim_out = 10)
24
+ ).half()
25
+
26
+ batch_size = 256
27
+
28
+ # data
29
+
30
+ dataset = datasets.MNIST('./data', train = True, download = True, transform = transforms.ToTensor())
31
+
32
+ # fitness as inverse of loss
33
+
34
+ def mnist_environment(
35
+ model,
36
+ num_envs = 1,
37
+ vectorized = False,
38
+ batch_size = 256
39
+ ):
40
+ device = next(model.parameters()).device
41
+
42
+ iters = num_envs if vectorized else 1
43
+
44
+ losses = []
45
+
46
+ for _ in range(iters):
47
+ dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True)
48
+ data_iterator = iter(dataloader)
49
+ data, target = next(data_iterator)
50
+
51
+ data, target = data.to(device), target.to(device)
52
+
53
+ with torch.no_grad():
54
+ logits = model(data.half())
55
+ loss = F.cross_entropy(logits, target)
56
+
57
+ losses.append(-loss)
58
+
59
+ if not vectorized:
60
+ return losses[0]
61
+
62
+ return torch.stack(losses)
63
+
64
+ # evo
65
+
66
+ from x_evolution import EvoStrategy
67
+
68
+ def main(
69
+ vectorized = False,
70
+ num_envs = 8,
71
+ batch_size = 256
72
+ ):
73
+ evo_strat = EvoStrategy(
74
+ model,
75
+ environment = lambda model: mnist_environment(model, num_envs = num_envs, vectorized = vectorized, batch_size = batch_size),
76
+ vectorized = vectorized,
77
+ vector_size = num_envs,
78
+ noise_population_size = 100,
79
+ noise_scale = 1e-2,
80
+ noise_scale_clamp_range = (8e-3, 2e-2),
81
+ noise_low_rank = 1,
82
+ num_generations = 10_000,
83
+ learning_rate = 1e-3,
84
+ learned_noise_scale = True,
85
+ noise_scale_learning_rate = 2e-5
86
+ )
87
+
88
+ evo_strat()
89
+
90
+ if __name__ == '__main__':
91
+ fire.Fire(main)
@@ -0,0 +1,83 @@
1
+ import fire
2
+ import torch
3
+ from torch import nn
4
+ import torch.nn.functional as F
5
+ from torch.optim.lr_scheduler import LambdaLR
6
+
7
+ # model
8
+
9
+ from torch import nn
10
+
11
+ model = nn.Sequential(
12
+ nn.Linear(2, 16),
13
+ nn.ReLU(),
14
+ nn.Linear(16, 2)
15
+ ).half()
16
+
17
+ batch_size = 128
18
+
19
+ # fitness as inverse of loss
20
+
21
+ from x_evolution import EvoStrategy
22
+
23
+ def xor_environment(
24
+ model,
25
+ num_envs = 1,
26
+ vectorized = False,
27
+ batch_size = 128
28
+ ):
29
+ device = next(model.parameters()).device
30
+
31
+ iters = num_envs if vectorized else 1
32
+
33
+ losses = []
34
+
35
+ for _ in range(iters):
36
+ data = torch.randint(0, 2, (batch_size, 2))
37
+ labels = data[:, 0] ^ data[:, 1]
38
+
39
+ data, labels = tuple(t.to(device) for t in (data, labels))
40
+
41
+ with torch.no_grad():
42
+ logits = model(data.half())
43
+ loss = F.cross_entropy(logits, labels)
44
+
45
+ losses.append(-loss)
46
+
47
+ if not vectorized:
48
+ return losses[0]
49
+
50
+ return torch.stack(losses)
51
+
52
+ # evo
53
+
54
+ def main(
55
+ vectorized = False,
56
+ num_envs = 8,
57
+ batch_size = 128
58
+ ):
59
+ evo_strat = EvoStrategy(
60
+ model,
61
+ environment = lambda model: xor_environment(model, num_envs = num_envs, vectorized = vectorized, batch_size = batch_size),
62
+ vectorized = vectorized,
63
+ vector_size = num_envs,
64
+ noise_population_size = 100,
65
+ noise_low_rank = 1,
66
+ num_generations = 100_000,
67
+ learning_rate = 1e-1,
68
+ noise_scale = 1e-1,
69
+ noise_scale_clamp_range = (0.05, 0.2),
70
+ learned_noise_scale = True,
71
+ noise_scale_learning_rate = 5e-4,
72
+ use_scheduler = True,
73
+ scheduler_klass = LambdaLR,
74
+ scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.)),
75
+ use_sigma_scheduler = True,
76
+ sigma_scheduler_klass = LambdaLR,
77
+ sigma_scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.))
78
+ )
79
+
80
+ evo_strat()
81
+
82
+ if __name__ == '__main__':
83
+ fire.Fire(main)
@@ -93,11 +93,16 @@ class EvoStrategy(Module):
93
93
  verbose = True,
94
94
  accelerator: Accelerator | None = None,
95
95
  accelerate_kwargs: dict = dict(),
96
- reject_generation_fitnesses_if: Callable[[Tensor], bool] | None = None
96
+ reject_generation_fitnesses_if: Callable[[Tensor], bool] | None = None,
97
+ vectorized = False,
98
+ vector_size: int | None = None
97
99
  ):
98
100
  super().__init__()
99
101
  self.verbose = verbose
100
102
 
103
+ self.vectorized = vectorized
104
+ self.vector_size = vector_size
105
+
101
106
  if not exists(accelerator):
102
107
  accelerator = Accelerator(cpu = cpu, **accelerate_kwargs)
103
108
 
@@ -475,24 +480,28 @@ class EvoStrategy(Module):
475
480
  fitnesses.append([0., 0.] if self.mirror_sampling else 0.)
476
481
  continue
477
482
 
478
- individual_param_seeds = with_seed(individual_seed)(randint)(0, MAX_SEED_VALUE, (self.num_params,))
479
-
480
- noise_config = dict(zip(self.param_names_to_optimize, individual_param_seeds.tolist()))
481
-
482
- # determine noise scale, which can be fixed or learned
483
+ def get_fitness(negate = False):
484
+ individual_param_seeds = with_seed(individual_seed.item())(randint)(0, MAX_SEED_VALUE, (self.num_params,))
485
+ noise_config = dict(zip(self.param_names_to_optimize, individual_param_seeds.tolist()))
483
486
 
484
- noise_config_with_scale = dict()
487
+ noise_config_with_scale = dict()
488
+ for param_name, seed in noise_config.items():
489
+ noise_scale = self._get_noise_scale(param_name)
490
+ noise_config_with_scale[param_name] = (seed, noise_scale)
485
491
 
486
- for param_name, seed in noise_config.items():
492
+ with model.temp_add_noise_(noise_config_with_scale, negate = negate):
493
+ fitness = with_seed(maybe_rollout_seed)(self.environment)(model)
487
494
 
488
- noise_scale = self._get_noise_scale(param_name)
495
+ if isinstance(fitness, Tensor) and fitness.numel() > 1:
496
+ fitness = fitness.mean().item()
497
+ elif isinstance(fitness, Tensor):
498
+ fitness = fitness.item()
489
499
 
490
- noise_config_with_scale[param_name] = (seed, noise_scale)
500
+ return fitness
491
501
 
492
- # maybe roll out with a fixed seed
502
+ # evaluate
493
503
 
494
- with model.temp_add_noise_(noise_config_with_scale):
495
- fitness = with_seed(maybe_rollout_seed)(rollout_for_fitness)()
504
+ fitness = get_fitness(negate = False)
496
505
 
497
506
  if not self.mirror_sampling:
498
507
  fitnesses.append(fitness)
@@ -500,8 +509,7 @@ class EvoStrategy(Module):
500
509
 
501
510
  # handle mirror sampling
502
511
 
503
- with model.temp_add_noise_(noise_config_with_scale, negate = True):
504
- fitness_mirrored = with_seed(maybe_rollout_seed)(rollout_for_fitness)()
512
+ fitness_mirrored = get_fitness(negate = True)
505
513
 
506
514
  fitnesses.append([fitness, fitness_mirrored])
507
515
 
@@ -1,130 +0,0 @@
1
- # /// script
2
- # dependencies = [
3
- # "gymnasium[mujoco]>=1.0.0",
4
- # "gymnasium[other]",
5
- # "x-evolution>=0.0.20",
6
- # "x-mlps-pytorch"
7
- # ]
8
- # ///
9
-
10
- import os
11
- os.environ["NCCL_P2P_DISABLE"] = "1"
12
- os.environ["NCCL_IB_DISABLE"] = "1"
13
- os.environ["MUJOCO_GL"] = "osmesa"
14
-
15
- from shutil import rmtree
16
- import gymnasium as gym
17
- import numpy as np
18
-
19
- import torch
20
- from torch.nn import Module
21
- import torch.nn.functional as F
22
-
23
- def softclamp(t, value):
24
- return (t / value).tanh() * value
25
-
26
- class HumanoidEnvironment(Module):
27
- def __init__(
28
- self,
29
- video_folder = './recordings_humanoid',
30
- render_every_eps = 100,
31
- max_steps = 1000,
32
- repeats = 1
33
- ):
34
- super().__init__()
35
-
36
- # Humanoid-v5
37
- env = gym.make('Humanoid-v5', render_mode = 'rgb_array')
38
-
39
- self.env = env
40
- self.max_steps = max_steps
41
- self.repeats = repeats
42
- self.video_folder = video_folder
43
- self.render_every_eps = render_every_eps
44
-
45
- def pre_main_callback(self):
46
- # the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
47
-
48
- rmtree(self.video_folder, ignore_errors = True)
49
-
50
- self.env = gym.wrappers.RecordVideo(
51
- env = self.env,
52
- video_folder = self.video_folder,
53
- name_prefix = 'recording',
54
- episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
55
- disable_logger = True
56
- )
57
-
58
- def forward(self, model):
59
-
60
- device = next(model.parameters()).device
61
-
62
- seed = torch.randint(0, int(1e6), ())
63
-
64
- cum_reward = 0.
65
-
66
- for _ in range(self.repeats):
67
- state, _ = self.env.reset(seed = seed.item())
68
-
69
- step = 0
70
-
71
- while step < self.max_steps:
72
-
73
- state = torch.from_numpy(state).float().to(device)
74
-
75
- action_logits = model(state)
76
-
77
- mean, log_var = action_logits.chunk(2, dim = -1)
78
-
79
- # sample and then bound and scale to -0.4 to 0.4
80
-
81
- std = softclamp((0.5 * log_var).exp(), 10.)
82
- sampled = mean + torch.randn_like(mean) * std
83
- action = sampled.tanh() * 0.4
84
-
85
- next_state, reward, truncated, terminated, *_ = self.env.step(action.detach().cpu().numpy())
86
-
87
- cum_reward += float(reward)
88
- step += 1
89
-
90
- state = next_state
91
-
92
- if truncated or terminated:
93
- break
94
-
95
- return cum_reward / self.repeats
96
-
97
- # evo strategy
98
-
99
- from x_evolution import EvoStrategy
100
-
101
- from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
102
-
103
- actor = ResidualNormedMLP(
104
- dim_in = 348, # state
105
- dim = 256,
106
- depth = 8,
107
- residual_every = 2,
108
- dim_out = 17 * 2 # action mean logvar
109
- )
110
-
111
- from torch.optim.lr_scheduler import CosineAnnealingLR
112
-
113
- evo_strat = EvoStrategy(
114
- actor,
115
- environment = HumanoidEnvironment(repeats = 2),
116
- num_generations = 50_000,
117
- noise_population_size = 200,
118
- noise_low_rank = 1,
119
- noise_scale = 1e-2,
120
- noise_scale_clamp_range = (5e-3, 2e-2),
121
- learned_noise_scale = True,
122
- use_sigma_optimizer = True,
123
- learning_rate = 1e-3,
124
- noise_scale_learning_rate = 1e-4,
125
- use_scheduler = True,
126
- scheduler_klass = CosineAnnealingLR,
127
- scheduler_kwargs = dict(T_max = 50_000)
128
- )
129
-
130
- evo_strat()
@@ -1,107 +0,0 @@
1
- # /// script
2
- # dependencies = [
3
- # "gymnasium[box2d]>=1.0.0",
4
- # "gymnasium[other]",
5
- # "x-evolution>=0.0.20"
6
- # ]
7
- # ///
8
-
9
- from shutil import rmtree
10
- import gymnasium as gym
11
-
12
- import torch
13
- from torch.nn import Module
14
- import torch.nn.functional as F
15
-
16
- class LunarEnvironment(Module):
17
- def __init__(
18
- self,
19
- video_folder = './recordings',
20
- render_every_eps = 500,
21
- max_steps = 500,
22
- repeats = 1
23
- ):
24
- super().__init__()
25
-
26
- env = gym.make('LunarLander-v3', render_mode = 'rgb_array')
27
-
28
- self.env = env
29
- self.max_steps = max_steps
30
- self.repeats = repeats
31
- self.video_folder = video_folder
32
- self.render_every_eps = render_every_eps
33
-
34
- def pre_main_callback(self):
35
- # the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
36
-
37
- rmtree(self.video_folder, ignore_errors = True)
38
-
39
- self.env = gym.wrappers.RecordVideo(
40
- env = self.env,
41
- video_folder = self.video_folder,
42
- name_prefix = 'recording',
43
- episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
44
- disable_logger = True
45
- )
46
-
47
- def forward(self, model):
48
-
49
- device = next(model.parameters()).device
50
-
51
- seed = torch.randint(0, int(1e6), ())
52
-
53
- cum_reward = 0.
54
-
55
- for _ in range(self.repeats):
56
- state, _ = self.env.reset(seed = seed.item())
57
-
58
- step = 0
59
-
60
- while step < self.max_steps:
61
-
62
- state = torch.from_numpy(state).to(device)
63
-
64
- action_logits = model(state)
65
-
66
- action = F.gumbel_softmax(action_logits, hard = True).argmax(dim = -1)
67
-
68
- next_state, reward, truncated, terminated, *_ = self.env.step(action.item())
69
-
70
- cum_reward += float(reward)
71
- step += 1
72
-
73
- state = next_state
74
-
75
- if truncated or terminated:
76
- break
77
-
78
- return cum_reward / self.repeats
79
-
80
- # evo strategy
81
-
82
- from x_evolution import EvoStrategy
83
-
84
- from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
85
-
86
- actor = ResidualNormedMLP(dim_in = 8, dim = 24, depth = 2, residual_every = 1, dim_out = 4)
87
-
88
- from torch.optim.lr_scheduler import CosineAnnealingLR
89
-
90
- evo_strat = EvoStrategy(
91
- actor,
92
- environment = LunarEnvironment(repeats = 2),
93
- num_generations = 50_000,
94
- noise_population_size = 50,
95
- noise_low_rank = 1,
96
- noise_scale = 1e-2,
97
- noise_scale_clamp_range = (5e-3, 2e-2),
98
- learned_noise_scale = True,
99
- use_sigma_optimizer = True,
100
- learning_rate = 1e-3,
101
- noise_scale_learning_rate = 1e-4,
102
- use_scheduler = True,
103
- scheduler_klass = CosineAnnealingLR,
104
- scheduler_kwargs = dict(T_max = 50_000)
105
- )
106
-
107
- evo_strat()
@@ -1,63 +0,0 @@
1
- # /// script
2
- # dependencies = [
3
- # "torchvision",
4
- # "x-evolution>=0.0.20"
5
- # ]
6
- # ///
7
-
8
- import torch
9
- from torch import tensor, nn
10
- import torch.nn.functional as F
11
- from torchvision import datasets, transforms
12
- from torch.utils.data import DataLoader
13
-
14
- # model
15
-
16
- from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
17
-
18
- model = nn.Sequential(
19
- nn.Flatten(),
20
- ResidualNormedMLP(dim_in = 784, dim = 512, depth = 8, residual_every = 2, dim_out = 10)
21
- ).half()
22
-
23
- batch_size = 256
24
-
25
- # data
26
-
27
- dataset = datasets.MNIST('./data', train = True, download = True, transform = transforms.ToTensor())
28
-
29
- # fitness as inverse of loss
30
-
31
- def loss_mnist(model):
32
- device = next(model.parameters()).device
33
-
34
- dataloader = DataLoader(dataset, batch_size = batch_size, shuffle = True)
35
- data_iterator = iter(dataloader)
36
- data, target = next(data_iterator)
37
-
38
- data, target = data.to(device), target.to(device)
39
-
40
- with torch.no_grad():
41
- logits = model(data.half())
42
- loss = F.cross_entropy(logits, target)
43
-
44
- return -loss
45
-
46
- # evo
47
-
48
- from x_evolution import EvoStrategy
49
-
50
- evo_strat = EvoStrategy(
51
- model,
52
- environment = loss_mnist,
53
- noise_population_size = 100,
54
- noise_scale = 1e-2,
55
- noise_scale_clamp_range = (8e-3, 2e-2),
56
- noise_low_rank = 1,
57
- num_generations = 10_000,
58
- learning_rate = 1e-3,
59
- learned_noise_scale = True,
60
- noise_scale_learning_rate = 2e-5
61
- )
62
-
63
- evo_strat()
@@ -1,57 +0,0 @@
1
- import torch
2
- from torch import tensor
3
- import torch.nn.functional as F
4
- from torch.optim.lr_scheduler import LambdaLR
5
-
6
- # model
7
-
8
- from torch import nn
9
-
10
- model = nn.Sequential(
11
- nn.Linear(2, 16),
12
- nn.ReLU(),
13
- nn.Linear(16, 2)
14
- ).half()
15
-
16
- batch_size = 128
17
-
18
- # fitness as inverse of loss
19
-
20
- from x_evolution import EvoStrategy
21
-
22
- def loss_xor(model):
23
- device = next(model.parameters()).device
24
-
25
- data = torch.randint(0, 2, (batch_size, 2))
26
- labels = data[:, 0] ^ data[:, 1]
27
-
28
- data, labels = tuple(t.to(device) for t in (data, labels))
29
-
30
- with torch.no_grad():
31
- logits = model(data.half())
32
- loss = F.cross_entropy(logits, labels)
33
-
34
- return -loss
35
-
36
- # evo
37
-
38
- evo_strat = EvoStrategy(
39
- model,
40
- environment = loss_xor,
41
- noise_population_size = 100,
42
- noise_low_rank = 1,
43
- num_generations = 100_000,
44
- learning_rate = 1e-1,
45
- noise_scale = 1e-1,
46
- noise_scale_clamp_range = (5e-2, 2e-1),
47
- learned_noise_scale = True,
48
- noise_scale_learning_rate = 5e-4,
49
- use_scheduler = True,
50
- scheduler_klass = LambdaLR,
51
- scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.)),
52
- use_sigma_scheduler = True,
53
- sigma_scheduler_klass = LambdaLR,
54
- sigma_scheduler_kwargs = dict(lr_lambda = lambda step: min(1., step / 10.))
55
- )
56
-
57
- evo_strat()
File without changes
File without changes
File without changes