x-evolution 0.1.23__tar.gz → 0.1.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {x_evolution-0.1.23 → x_evolution-0.1.25}/.gitignore +3 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/PKG-INFO +4 -1
- {x_evolution-0.1.23 → x_evolution-0.1.25}/README.md +2 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/pyproject.toml +2 -1
- x_evolution-0.1.25/scripts/install-humanoid.sh +2 -0
- x_evolution-0.1.25/train_humanoid.py +130 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/.github/workflows/python-publish.yml +0 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/.github/workflows/test.yml +0 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/LICENSE +0 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/tests/test_evolution.py +0 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/train_lunar.py +0 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/train_mnist.py +0 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/train_xor.py +0 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/x_evolution/__init__.py +0 -0
- {x_evolution-0.1.23 → x_evolution-0.1.25}/x_evolution/x_evolution.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: x-evolution
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.25
|
|
4
4
|
Summary: x-evolution
|
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/x-evolution/
|
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/x-evolution
|
|
@@ -42,6 +42,7 @@ Requires-Dist: x-mlps-pytorch>=0.1.31
|
|
|
42
42
|
Requires-Dist: x-transformers>=2.11.23
|
|
43
43
|
Provides-Extra: examples
|
|
44
44
|
Requires-Dist: gymnasium[box2d]>=1.0.0; extra == 'examples'
|
|
45
|
+
Requires-Dist: gymnasium[mujoco]>=1.0.0; extra == 'examples'
|
|
45
46
|
Requires-Dist: gymnasium[other]; extra == 'examples'
|
|
46
47
|
Provides-Extra: test
|
|
47
48
|
Requires-Dist: pytest; extra == 'test'
|
|
@@ -106,6 +107,8 @@ Then
|
|
|
106
107
|
$ accelerate launch train.py
|
|
107
108
|
```
|
|
108
109
|
|
|
110
|
+
For gymnasium simulations, first run `pip install '[examples]'`
|
|
111
|
+
|
|
109
112
|
## Citations
|
|
110
113
|
|
|
111
114
|
```bibtex
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "x-evolution"
|
|
3
|
-
version = "0.1.
|
|
3
|
+
version = "0.1.25"
|
|
4
4
|
description = "x-evolution"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
|
|
@@ -39,6 +39,7 @@ Repository = "https://github.com/lucidrains/x-evolution"
|
|
|
39
39
|
|
|
40
40
|
examples = [
|
|
41
41
|
"gymnasium[box2d]>=1.0.0",
|
|
42
|
+
"gymnasium[mujoco]>=1.0.0",
|
|
42
43
|
"gymnasium[other]"
|
|
43
44
|
]
|
|
44
45
|
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# /// script
|
|
2
|
+
# dependencies = [
|
|
3
|
+
# "gymnasium[mujoco]>=1.0.0",
|
|
4
|
+
# "gymnasium[other]",
|
|
5
|
+
# "x-evolution>=0.0.20",
|
|
6
|
+
# "x-mlps-pytorch"
|
|
7
|
+
# ]
|
|
8
|
+
# ///
|
|
9
|
+
|
|
10
|
+
# import os
|
|
11
|
+
# os.environ["NCCL_P2P_DISABLE"] = "1"
|
|
12
|
+
# os.environ["NCCL_IB_DISABLE"] = "1"
|
|
13
|
+
# os.environ["MUJOCO_GL"] = "osmesa"
|
|
14
|
+
|
|
15
|
+
from shutil import rmtree
|
|
16
|
+
import gymnasium as gym
|
|
17
|
+
import numpy as np
|
|
18
|
+
|
|
19
|
+
import torch
|
|
20
|
+
from torch.nn import Module
|
|
21
|
+
import torch.nn.functional as F
|
|
22
|
+
|
|
23
|
+
def softclamp(t, value):
|
|
24
|
+
return (t / value).tanh() * value
|
|
25
|
+
|
|
26
|
+
class HumanoidEnvironment(Module):
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
video_folder = './recordings_humanoid',
|
|
30
|
+
render_every_eps = 100,
|
|
31
|
+
max_steps = 1000,
|
|
32
|
+
repeats = 1
|
|
33
|
+
):
|
|
34
|
+
super().__init__()
|
|
35
|
+
|
|
36
|
+
# Humanoid-v5
|
|
37
|
+
env = gym.make('Humanoid-v5', render_mode = 'rgb_array')
|
|
38
|
+
|
|
39
|
+
self.env = env
|
|
40
|
+
self.max_steps = max_steps
|
|
41
|
+
self.repeats = repeats
|
|
42
|
+
self.video_folder = video_folder
|
|
43
|
+
self.render_every_eps = render_every_eps
|
|
44
|
+
|
|
45
|
+
def pre_main_callback(self):
|
|
46
|
+
# the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
|
|
47
|
+
|
|
48
|
+
rmtree(self.video_folder, ignore_errors = True)
|
|
49
|
+
|
|
50
|
+
self.env = gym.wrappers.RecordVideo(
|
|
51
|
+
env = self.env,
|
|
52
|
+
video_folder = self.video_folder,
|
|
53
|
+
name_prefix = 'recording',
|
|
54
|
+
episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
|
|
55
|
+
disable_logger = True
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def forward(self, model):
|
|
59
|
+
|
|
60
|
+
device = next(model.parameters()).device
|
|
61
|
+
|
|
62
|
+
seed = torch.randint(0, int(1e6), ())
|
|
63
|
+
|
|
64
|
+
cum_reward = 0.
|
|
65
|
+
|
|
66
|
+
for _ in range(self.repeats):
|
|
67
|
+
state, _ = self.env.reset(seed = seed.item())
|
|
68
|
+
|
|
69
|
+
step = 0
|
|
70
|
+
|
|
71
|
+
while step < self.max_steps:
|
|
72
|
+
|
|
73
|
+
state = torch.from_numpy(state).float().to(device)
|
|
74
|
+
|
|
75
|
+
action_logits = model(state)
|
|
76
|
+
|
|
77
|
+
mean, log_var = action_logits.chunk(2, dim = -1)
|
|
78
|
+
|
|
79
|
+
# sample and then bound and scale to -0.4 to 0.4
|
|
80
|
+
|
|
81
|
+
std = (0.5 * softclamp(log_var, 5.)).exp()
|
|
82
|
+
sampled = mean + torch.randn_like(mean) * std
|
|
83
|
+
action = sampled.tanh() * 0.4
|
|
84
|
+
|
|
85
|
+
next_state, reward, truncated, terminated, *_ = self.env.step(action.detach().cpu().numpy())
|
|
86
|
+
|
|
87
|
+
cum_reward += float(reward)
|
|
88
|
+
step += 1
|
|
89
|
+
|
|
90
|
+
state = next_state
|
|
91
|
+
|
|
92
|
+
if truncated or terminated:
|
|
93
|
+
break
|
|
94
|
+
|
|
95
|
+
return cum_reward / self.repeats
|
|
96
|
+
|
|
97
|
+
# evo strategy
|
|
98
|
+
|
|
99
|
+
from x_evolution import EvoStrategy
|
|
100
|
+
|
|
101
|
+
from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
|
|
102
|
+
|
|
103
|
+
actor = ResidualNormedMLP(
|
|
104
|
+
dim_in = 348, # state
|
|
105
|
+
dim = 256,
|
|
106
|
+
depth = 8,
|
|
107
|
+
residual_every = 2,
|
|
108
|
+
dim_out = 17 * 2 # action mean logvar
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
112
|
+
|
|
113
|
+
evo_strat = EvoStrategy(
|
|
114
|
+
actor,
|
|
115
|
+
environment = HumanoidEnvironment(repeats = 2),
|
|
116
|
+
num_generations = 50_000,
|
|
117
|
+
noise_population_size = 200,
|
|
118
|
+
noise_low_rank = 1,
|
|
119
|
+
noise_scale = 1e-2,
|
|
120
|
+
noise_scale_clamp_range = (5e-3, 2e-2),
|
|
121
|
+
learned_noise_scale = True,
|
|
122
|
+
use_sigma_optimizer = True,
|
|
123
|
+
learning_rate = 1e-3,
|
|
124
|
+
noise_scale_learning_rate = 1e-4,
|
|
125
|
+
use_scheduler = True,
|
|
126
|
+
scheduler_klass = CosineAnnealingLR,
|
|
127
|
+
scheduler_kwargs = dict(T_max = 50_000)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
evo_strat()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|