titans-pytorch 0.2.19__tar.gz → 0.2.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: titans-pytorch
3
- Version: 0.2.19
3
+ Version: 0.2.20
4
4
  Summary: Titans
5
5
  Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
6
6
  Project-URL: Repository, https://github.com/lucidrains/titans-pytorch
@@ -204,6 +204,6 @@ $ python train_mac.py
204
204
  eprint = {2501.12352},
205
205
  archivePrefix = {arXiv},
206
206
  primaryClass = {cs.LG},
207
- url = {https://arxiv.org/abs/2501.12352},
207
+ url = {https://arxiv.org/abs/2501.12352},
208
208
  }
209
209
  ```
@@ -150,6 +150,6 @@ $ python train_mac.py
150
150
  eprint = {2501.12352},
151
151
  archivePrefix = {arXiv},
152
152
  primaryClass = {cs.LG},
153
- url = {https://arxiv.org/abs/2501.12352},
153
+ url = {https://arxiv.org/abs/2501.12352},
154
154
  }
155
155
  ```
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "titans-pytorch"
3
- version = "0.2.19"
3
+ version = "0.2.20"
4
4
  description = "Titans"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -560,7 +560,7 @@ class MemoryAsContextTransformer(Module):
560
560
  chunk_size = self.neural_memory_segment_len,
561
561
  batch_size = neural_memory_batch_size,
562
562
  model = deepcopy(neural_memory_model),
563
- learned_weight_residual = neural_mem_weight_residual and not is_first_neural_mem,
563
+ accept_weight_residual = neural_mem_weight_residual and not is_first_neural_mem,
564
564
  **neural_memory_kwargs
565
565
  )
566
566
 
@@ -38,10 +38,10 @@ NEURAL_MEM_MAX_LR = 1e-1
38
38
  WINDOW_SIZE = 32
39
39
  NEURAL_MEM_SEGMENT_LEN = 4 # set smaller for more granularity for learning rate / momentum etc
40
40
  NEURAL_MEM_BATCH_SIZE = 128 # set smaller to update the neural memory weights more often as it traverses the sequence
41
- NEURAL_MEM_WEIGHT_RESIDUAL = False
42
41
  SLIDING_WINDOWS = True
43
42
  STORE_ATTN_POOL_CHUNKS = True # whether to use attention pooling for chunk derived momentum, per-layer lr mod, decay
44
43
  MEMORY_MODEL_PER_LAYER_LEARNED_LR = True
44
+ NEURAL_MEM_WEIGHT_RESIDUAL = True # learning to accept contributions from the weights of the previous neural mem layer brings about significant improvements. this was improvised and not in the paper, but inspired by the value residual learning free lunch paper
45
45
 
46
46
  # experiment related
47
47
 
File without changes