x-evolution 0.1.23__tar.gz → 0.1.24__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,6 @@
1
+ recordings/
2
+ recordings_humanoid/
3
+
1
4
  .DS_Store
2
5
 
3
6
  # Byte-compiled / optimized / DLL files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-evolution
3
- Version: 0.1.23
3
+ Version: 0.1.24
4
4
  Summary: x-evolution
5
5
  Project-URL: Homepage, https://pypi.org/project/x-evolution/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-evolution
@@ -42,6 +42,7 @@ Requires-Dist: x-mlps-pytorch>=0.1.31
42
42
  Requires-Dist: x-transformers>=2.11.23
43
43
  Provides-Extra: examples
44
44
  Requires-Dist: gymnasium[box2d]>=1.0.0; extra == 'examples'
45
+ Requires-Dist: gymnasium[mujoco]>=1.0.0; extra == 'examples'
45
46
  Requires-Dist: gymnasium[other]; extra == 'examples'
46
47
  Provides-Extra: test
47
48
  Requires-Dist: pytest; extra == 'test'
@@ -106,6 +107,8 @@ Then
106
107
  $ accelerate launch train.py
107
108
  ```
108
109
 
110
+ For gymnasium simulations, first run `pip install '[examples]'`
111
+
109
112
  ## Citations
110
113
 
111
114
  ```bibtex
@@ -57,6 +57,8 @@ Then
57
57
  $ accelerate launch train.py
58
58
  ```
59
59
 
60
+ For gymnasium simulations, first run `pip install '[examples]'`
61
+
60
62
  ## Citations
61
63
 
62
64
  ```bibtex
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-evolution"
3
- version = "0.1.23"
3
+ version = "0.1.24"
4
4
  description = "x-evolution"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -39,6 +39,7 @@ Repository = "https://github.com/lucidrains/x-evolution"
39
39
 
40
40
  examples = [
41
41
  "gymnasium[box2d]>=1.0.0",
42
+ "gymnasium[mujoco]>=1.0.0",
42
43
  "gymnasium[other]"
43
44
  ]
44
45
 
@@ -0,0 +1,2 @@
1
+ #!/bin/bash
2
+ apt-get update && apt-get install -y libegl1 libgl1-mesa-glx libosmesa6-dev xvfb
@@ -0,0 +1,130 @@
1
+ # /// script
2
+ # dependencies = [
3
+ # "gymnasium[mujoco]>=1.0.0",
4
+ # "gymnasium[other]",
5
+ # "x-evolution>=0.0.20",
6
+ # "x-mlps-pytorch"
7
+ # ]
8
+ # ///
9
+
10
+ import os
11
+ os.environ["NCCL_P2P_DISABLE"] = "1"
12
+ os.environ["NCCL_IB_DISABLE"] = "1"
13
+ os.environ["MUJOCO_GL"] = "osmesa"
14
+
15
+ from shutil import rmtree
16
+ import gymnasium as gym
17
+ import numpy as np
18
+
19
+ import torch
20
+ from torch.nn import Module
21
+ import torch.nn.functional as F
22
+
23
+ def softclamp(t, value):
24
+ return (t / value).tanh() * value
25
+
26
+ class HumanoidEnvironment(Module):
27
+ def __init__(
28
+ self,
29
+ video_folder = './recordings_humanoid',
30
+ render_every_eps = 100,
31
+ max_steps = 1000,
32
+ repeats = 1
33
+ ):
34
+ super().__init__()
35
+
36
+ # Humanoid-v5
37
+ env = gym.make('Humanoid-v5', render_mode = 'rgb_array')
38
+
39
+ self.env = env
40
+ self.max_steps = max_steps
41
+ self.repeats = repeats
42
+ self.video_folder = video_folder
43
+ self.render_every_eps = render_every_eps
44
+
45
+ def pre_main_callback(self):
46
+ # the `pre_main_callback` on the environment passed in is called before the start of the evolutionary strategies loop
47
+
48
+ rmtree(self.video_folder, ignore_errors = True)
49
+
50
+ self.env = gym.wrappers.RecordVideo(
51
+ env = self.env,
52
+ video_folder = self.video_folder,
53
+ name_prefix = 'recording',
54
+ episode_trigger = lambda eps_num: (eps_num % self.render_every_eps) == 0,
55
+ disable_logger = True
56
+ )
57
+
58
+ def forward(self, model):
59
+
60
+ device = next(model.parameters()).device
61
+
62
+ seed = torch.randint(0, int(1e6), ())
63
+
64
+ cum_reward = 0.
65
+
66
+ for _ in range(self.repeats):
67
+ state, _ = self.env.reset(seed = seed.item())
68
+
69
+ step = 0
70
+
71
+ while step < self.max_steps:
72
+
73
+ state = torch.from_numpy(state).float().to(device)
74
+
75
+ action_logits = model(state)
76
+
77
+ mean, log_var = action_logits.chunk(2, dim = -1)
78
+
79
+ # sample and then bound and scale to -0.4 to 0.4
80
+
81
+ std = softclamp((0.5 * log_var).exp(), 10.)
82
+ sampled = mean + torch.randn_like(mean) * std
83
+ action = sampled.tanh() * 0.4
84
+
85
+ next_state, reward, truncated, terminated, *_ = self.env.step(action.detach().cpu().numpy())
86
+
87
+ cum_reward += float(reward)
88
+ step += 1
89
+
90
+ state = next_state
91
+
92
+ if truncated or terminated:
93
+ break
94
+
95
+ return cum_reward / self.repeats
96
+
97
+ # evo strategy
98
+
99
+ from x_evolution import EvoStrategy
100
+
101
+ from x_mlps_pytorch.residual_normed_mlp import ResidualNormedMLP
102
+
103
+ actor = ResidualNormedMLP(
104
+ dim_in = 348, # state
105
+ dim = 256,
106
+ depth = 8,
107
+ residual_every = 2,
108
+ dim_out = 17 * 2 # action mean logvar
109
+ )
110
+
111
+ from torch.optim.lr_scheduler import CosineAnnealingLR
112
+
113
+ evo_strat = EvoStrategy(
114
+ actor,
115
+ environment = HumanoidEnvironment(repeats = 2),
116
+ num_generations = 50_000,
117
+ noise_population_size = 200,
118
+ noise_low_rank = 1,
119
+ noise_scale = 1e-2,
120
+ noise_scale_clamp_range = (5e-3, 2e-2),
121
+ learned_noise_scale = True,
122
+ use_sigma_optimizer = True,
123
+ learning_rate = 1e-3,
124
+ noise_scale_learning_rate = 1e-4,
125
+ use_scheduler = True,
126
+ scheduler_klass = CosineAnnealingLR,
127
+ scheduler_kwargs = dict(T_max = 50_000)
128
+ )
129
+
130
+ evo_strat()
File without changes
File without changes