titans-pytorch 0.1.28__tar.gz → 0.1.30__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: titans-pytorch
3
- Version: 0.1.28
3
+ Version: 0.1.30
4
4
  Summary: Titans
5
5
  Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
6
6
  Project-URL: Repository, https://github.com/lucidrains/titans-pytorch
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "titans-pytorch"
3
- version = "0.1.28"
3
+ version = "0.1.30"
4
4
  description = "Titans"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -409,6 +409,7 @@ class NeuralMemory(Module):
409
409
  ):
410
410
  super().__init__()
411
411
  dim_head = default(dim_head, dim)
412
+ assert not (heads == 1 and dim_head != dim)
412
413
 
413
414
  self.retrieve_chunk_size, self.store_chunk_size = pair(chunk_size)
414
415
 
@@ -566,7 +567,7 @@ class NeuralMemory(Module):
566
567
  ):
567
568
  assert xnor(exists(value_residual), exists(self.learned_value_residual))
568
569
 
569
- seq_len, chunk_size = seq.shape[-2], default(chunk_size, self.store_chunk_size)
570
+ seq_len, heads, chunk_size = seq.shape[-2], self.heads, default(chunk_size, self.store_chunk_size)
570
571
 
571
572
  # handle edge case
572
573
 
@@ -645,7 +646,7 @@ class NeuralMemory(Module):
645
646
 
646
647
  # restore batch and sequence dimension
647
648
 
648
- grads = grads.apply(lambda t: rearrange(t, '(b n) ... -> b n ...', b = batch))
649
+ grads = grads.apply(lambda t: rearrange(t, '(b n) ... -> b n ...', b = batch * heads))
649
650
 
650
651
  # maybe per layer modulation
651
652
 
@@ -37,7 +37,6 @@ NEURAL_MEM_SEGMENT_LEN = WINDOW_SIZE // 2 # set smaller for more granularity for
37
37
  SLIDING_WINDOWS = True
38
38
  STORE_ATTN_POOL_CHUNKS = True # whether to use attention pooling for chunk derived momentum, per-layer lr mod, decay
39
39
  KV_RECON_LOSS_WEIGHT = 0.
40
- LEARNED_MEM_MODEL_WEIGHTS = True
41
40
 
42
41
  # experiment related
43
42
 
@@ -91,7 +90,6 @@ model = MemoryAsContextTransformer(
91
90
  attn_pool_chunks = STORE_ATTN_POOL_CHUNKS,
92
91
  momentum = NEURAL_MEM_MOMENTUM,
93
92
  use_accelerated_scan = USE_ACCELERATED_SCAN,
94
- learned_mem_model_weights = LEARNED_MEM_MODEL_WEIGHTS,
95
93
  default_model_kwargs = dict(
96
94
  depth = NEURAL_MEMORY_DEPTH,
97
95
  )
File without changes