PyPI - x-transformers - Versions diffs - 2.7.6__tar.gz → 2.8.0__tar.gz - Mend

x-transformers 2.7.6tar.gz → 2.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

{x_transformers-2.7.6 → x_transformers-2.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: x-transformers
-Version: 2.7.6
+Version: 2.8.0
 Summary: X-Transformers
 Project-URL: Homepage, https://pypi.org/project/x-transformers/
 Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -2540,4 +2540,16 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
 }
 ```
+```bibtex
+@misc{zhao2023learningfinegrainedbimanualmanipulation,
+    title   = {Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware},
+    author  = {Tony Z. Zhao and Vikash Kumar and Sergey Levine and Chelsea Finn},
+    year    = {2023},
+    eprint  = {2304.13705},
+    archivePrefix = {arXiv},
+    primaryClass = {cs.RO},
+    url     = {https://arxiv.org/abs/2304.13705},
+}
+```
 *solve intelligence... then use that to solve everything else.* - Demis Hassabis

{x_transformers-2.7.6 → x_transformers-2.8.0}/README.md RENAMED Viewed

@@ -2492,4 +2492,16 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
 }
 ```
+```bibtex
+@misc{zhao2023learningfinegrainedbimanualmanipulation,
+    title   = {Learning Fine-Grained Bimanual Manipulation with Low-Cost Hardware},
+    author  = {Tony Z. Zhao and Vikash Kumar and Sergey Levine and Chelsea Finn},
+    year    = {2023},
+    eprint  = {2304.13705},
+    archivePrefix = {arXiv},
+    primaryClass = {cs.RO},
+    url     = {https://arxiv.org/abs/2304.13705},
+}
+```
 *solve intelligence... then use that to solve everything else.* - Demis Hassabis

{x_transformers-2.7.6 → x_transformers-2.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "x-transformers"
-version = "2.7.6"
+version = "2.8.0"
 description = "X-Transformers"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

x_transformers-2.8.0/train_gpt_vae.py ADDED Viewed

@@ -0,0 +1,131 @@
+from x_transformers.gpt_vae import GPTVAE
+import random
+import tqdm
+import gzip
+import numpy as np
+import torch
+import torch.optim as optim
+from torch import tensor
+from torch.nn import functional as F
+from torch.utils.data import DataLoader, Dataset
+# constants
+NUM_BATCHES = int(1e5)
+BATCH_SIZE = 4
+GRADIENT_ACCUMULATE_EVERY = 4
+LEARNING_RATE = 1e-4
+VALIDATE_EVERY  = 100
+GENERATE_EVERY  = 500
+GENERATE_LENGTH = 512
+SEQ_LEN = 512
+# helpers
+def cycle(loader):
+    while True:
+        for data in loader:
+            yield data
+def decode_token(token):
+    return str(chr(max(32, token)))
+def decode_tokens(tokens):
+    return ''.join(list(map(decode_token, tokens)))
+# instantiate GPT-like decoder model
+model = GPTVAE(
+    num_tokens = 256,
+    max_seq_len = SEQ_LEN,
+    dim = 512,
+    depth = 6,
+    heads = 8,
+    rotary_pos_emb = True,
+    enc_depth = 3,
+    vae_kl_loss_weight = 1.,
+    dim_latent = 1 # compress to 1 as an example
+).cuda()
+latents = tensor([1.]).cuda()
+# prepare enwik8 data
+with gzip.open('./data/enwik8.gz') as file:
+    data = np.frombuffer(file.read(int(95e6)), dtype=np.uint8).copy()
+    train_x, valid_x = np.split(data, [int(90e6)])
+    data_train, data_val = torch.from_numpy(train_x), torch.from_numpy(valid_x)
+class TextSamplerDataset(Dataset):
+    def __init__(self, data, seq_len):
+        super().__init__()
+        self.data = data
+        self.seq_len = seq_len
+    def __getitem__(self, index):
+        rand_start = torch.randint(0, self.data.size(0) - self.seq_len - 1, (1,))
+        full_seq = self.data[rand_start: rand_start + self.seq_len + 1].long()
+        return full_seq.cuda()
+    def __len__(self):
+        return self.data.size(0) // self.seq_len
+train_dataset = TextSamplerDataset(data_train, SEQ_LEN)
+val_dataset   = TextSamplerDataset(data_val, SEQ_LEN)
+train_loader  = cycle(DataLoader(train_dataset, batch_size = BATCH_SIZE, drop_last = True))
+val_loader    = cycle(DataLoader(val_dataset, batch_size = BATCH_SIZE, drop_last = True))
+# optimizer
+optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
+# training
+for i in tqdm.tqdm(range(NUM_BATCHES), mininterval=10., desc='training'):
+    model.train()
+    for __ in range(GRADIENT_ACCUMULATE_EVERY):
+        loss, (ar_loss, vae_kl_loss) = model(next(train_loader), return_all_losses = True)
+        (loss / GRADIENT_ACCUMULATE_EVERY).backward()
+    print(f'training loss: {ar_loss.item():.4f}\t| kl loss: {vae_kl_loss.item():.4f}')
+    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
+    optim.step()
+    optim.zero_grad()
+    if i % VALIDATE_EVERY == 0:
+        model.eval()
+        with torch.no_grad():
+            loss, (ar_loss, _) = model(next(val_loader), return_all_losses = True)
+            print(f'validation loss: {ar_loss.item():.4f}')
+    if i % GENERATE_EVERY == 0:
+        model.eval()
+        inp = random.choice(val_dataset)[:-1]
+        prime = decode_tokens(inp)
+        print(f'%s \n\n %s', (prime, '*' * 100))
+        sample = model.generate(
+            prompts = inp,
+            seq_len = GENERATE_LENGTH,
+            cache_kv = True,
+            latents = latents
+        )
+        output_str = decode_tokens(sample)
+        print(f'\n\nlatent {latents.tolist()} - ', output_str)
+        sample_other_direction = model.generate(
+            prompts = inp,
+            seq_len = GENERATE_LENGTH,
+            cache_kv = True,
+            latents = -latents
+        )
+        output_str = decode_tokens(sample_other_direction)
+        print(f'\n\nlatent {(-latents).tolist()} - ', output_str)

x_transformers-2.8.0/x_transformers/gpt_vae.py ADDED Viewed

@@ -0,0 +1,200 @@
+from __future__ import annotations
+# applying the cvae + detr design from ACT (Zhou et al.) to GPT
+# for steering, diversity rlvr, map-elites in epo, and other possibilities
+import torch
+from torch import nn, Tensor, is_tensor, tensor
+import torch.nn.functional as F
+from torch.nn import Module, ModuleList
+from x_transformers.x_transformers import (
+    Encoder,
+    Decoder,
+    TransformerWrapper
+)
+from x_transformers.autoregressive_wrapper import AutoregressiveWrapper
+from einops.layers.torch import Rearrange
+from einops import rearrange, reduce, repeat
+# helper functions
+def exists(v):
+    return v is not None
+def default(v, d):
+    return v if exists(v) else d
+# classes
+class GPTVAE(Module):
+    def __init__(
+        self,
+        *,
+        num_tokens,
+        dim,
+        depth,
+        enc_depth,
+        max_seq_len,
+        dim_latent = None,
+        attn_dim_head = 64,
+        heads = 8,
+        enc_kwargs: dict = dict(),
+        dec_kwargs: dict = dict(),
+        vae_kl_loss_weight = 1.,
+        latents_dropout_prob = 0.5, # what percentage of the time to dropout the latents completely
+        pad_id = -1,
+        **kwargs
+    ):
+        super().__init__()
+        dim_latent = default(dim_latent, dim)
+        self.encoder = TransformerWrapper(
+            num_tokens = num_tokens,
+            max_seq_len = max_seq_len + 1,
+            return_only_embed = True,
+            average_pool_embed = True,
+            attn_layers = Encoder(
+                dim = dim,
+                depth = enc_depth,
+                attn_dim_head = attn_dim_head,
+                heads = heads,
+                **kwargs,
+                **enc_kwargs
+            ),
+        )
+        self.to_latent_mean_log_variance = nn.Sequential(
+            nn.Linear(dim, dim_latent * 2),
+            Rearrange('b (two d) -> two b 1 d', two = 2)
+        )
+        self.from_latent_to_prepend_token = nn.Linear(dim_latent, dim)
+        self.decoder = TransformerWrapper(
+            num_tokens = num_tokens,
+            max_seq_len = max_seq_len,
+            attn_layers = Decoder(
+                dim = dim,
+                depth = depth,
+                attn_dim_head = attn_dim_head,
+                heads = heads,
+                **kwargs,
+                **dec_kwargs
+            ),
+        )
+        self.ar_wrapped_decoder = AutoregressiveWrapper(self.decoder, ignore_index = pad_id)
+        self.pad_id = pad_id
+        # loss weights - vae kl loss
+        self.vae_kl_loss_weight = vae_kl_loss_weight
+        self.latents_dropout = nn.Dropout(latents_dropout_prob)
+    @property
+    def device(self):
+        return next(self.parameters()).device
+    def encode_to_latents(
+        self,
+        seq,
+        return_mean_log_var = False
+    ):
+        mask = seq != self.pad_id
+        pooled = self.encoder(seq, mask = mask)
+        latents_mean, latents_log_var = self.to_latent_mean_log_variance(pooled)
+        latents_std = (0.5 * latents_log_var).exp()
+        # reparam trick
+        latents = latents_mean + latents_std * torch.randn_like(latents_mean)
+        if not return_mean_log_var:
+            return latents
+        return latents, (latents_mean, latents_log_var)
+    @torch.no_grad()
+    def generate(
+        self,
+        prompts,
+        seq_len,
+        latents = None,
+        **generate_kwargs
+    ):
+        assert prompts.ndim in {1, 2}
+        batch = prompts.shape[0] if prompts.ndim == 2 else 1
+        # prepend embeds
+        prepend_embeds = None
+        if exists(latents):
+            if not is_tensor(latents):
+                latents = tensor(latents, device = self.device)
+            if latents.ndim == 1: # repeat latents
+                latents = repeat(latents, 'd -> b d', b = batch)
+            prepend_embeds = self.from_latent_to_prepend_token(latents)
+        if exists(prepend_embeds):
+            prepend_embeds = rearrange(prepend_embeds, 'b d -> b 1 d')
+        # generated
+        generated = self.ar_wrapped_decoder.generate(
+            prompts,
+            seq_len,
+            prepend_embeds = prepend_embeds,
+            **generate_kwargs
+        )
+        return generated
+    def forward(
+        self,
+        seq,
+        return_all_losses = False
+    ):
+        batch, device = seq.shape[0], seq.device
+        latents, (latents_mean, latents_log_var) = self.encode_to_latents(seq, return_mean_log_var = True)
+        dropped_latents = ~self.latents_dropout(torch.ones((batch,), device = device)).bool()
+        prepend_embeds = self.from_latent_to_prepend_token(latents)
+        ar_loss = self.ar_wrapped_decoder(
+            seq,
+            prepend_embeds = prepend_embeds,
+            seq_start_pos = dropped_latents.long() # sequence starts at 1 and does not attend to the first style latent
+        )
+        # vae kl loss
+        vae_kl_loss = (
+            latents_log_var.exp()
+            + latents_mean.square()
+            - latents_log_var
+            - 1.
+        ).sum(dim = -1).mean()
+        # return losses
+        total_loss = (
+            ar_loss +
+            vae_kl_loss * self.vae_kl_loss_weight
+        )
+        if not return_all_losses:
+            return total_loss
+        losses = (ar_loss, vae_kl_loss)
+        return total_loss, losses

{x_transformers-2.7.6 → x_transformers-2.8.0}/.github/FUNDING.yml RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/.github/workflows/python-test.yaml RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/.gitignore RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/LICENSE RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/data/README.md RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/data/enwik8.gz RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/all-attention.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/attention-on-attention.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/cosine-sim-attention.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/deepnorm.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/dynamic-pos-bias-linear.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/dynamic-pos-bias-log.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/dynamic-pos-bias-sinusoidal.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/dynamic-pos-bias.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/enhanced-recurrence.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/fcm.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/ffglu.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/flash-attention.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/gate_values.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/gating.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/length-extrapolation-scale.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/macaron-1.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/macaron-2.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/memory-transformer.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/normformer.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/pia.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/qknorm-analysis.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/resi_dual.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/residual_attn.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/rezero.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/rotary.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/sandwich-2.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/sandwich.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/sandwich_norm.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/scalenorm.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/talking-heads.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/topk-attention.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/images/xval.png RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/tests/test_x_transformers.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/train_belief_state.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/train_copy.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/train_entropy_tokenizer.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/train_enwik8.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/train_length_extrapolate.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/train_parity.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/__init__.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/attend.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/belief_state_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/continuous.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/dpo.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/entropy_based_tokenizer.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/multi_input.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/neo_mlp.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/nonautoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/up_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/x_transformers.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/xl_autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.7.6 → x_transformers-2.8.0}/x_transformers/xval.py RENAMED Viewed

File without changes

x-transformers 2.7.6__tar.gz → 2.8.0__tar.gz

x-transformers 2.7.6tar.gz → 2.8.0tar.gz