titans-pytorch 0.2.27__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: titans-pytorch
3
- Version: 0.2.27
3
+ Version: 0.3.0
4
4
  Summary: Titans
5
5
  Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
6
6
  Project-URL: Repository, https://github.com/lucidrains/titans-pytorch
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "titans-pytorch"
3
- version = "0.2.27"
3
+ version = "0.3.0"
4
4
  description = "Titans"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -220,16 +220,19 @@ def test_mac_sampling(
220
220
  @pytest.mark.parametrize('seq_len', (2, 64, 256))
221
221
  @pytest.mark.parametrize('prompt_len', (0, 65))
222
222
  @pytest.mark.parametrize('mem_chunk_size', (2, 32, 64))
223
+ @pytest.mark.parametrize('gated_transition', (False, True))
223
224
  @torch_default_dtype(torch.float64)
224
225
  def test_neural_mem_inference(
225
226
  seq_len,
226
227
  prompt_len,
227
- mem_chunk_size
228
+ mem_chunk_size,
229
+ gated_transition
228
230
  ):
229
231
 
230
232
  mem = NeuralMemory(
231
233
  dim = 384,
232
234
  chunk_size = mem_chunk_size,
235
+ gated_transition = gated_transition
233
236
  )
234
237
 
235
238
  seq = torch.randn(2, seq_len, 384)
@@ -66,12 +66,6 @@ def xnor(x, y):
66
66
  def divisible_by(num, den):
67
67
  return (num % den) == 0
68
68
 
69
- def tuple_index_set(t: tuple, index, value):
70
- klass = type(t)
71
- t = list(t)
72
- t[index] = value
73
- return klass(*t)
74
-
75
69
  def safe_cat(inputs, dim = -2):
76
70
  inputs = tuple(filter(exists, inputs))
77
71
 
@@ -869,16 +863,23 @@ class NeuralMemory(Module):
869
863
 
870
864
  # update weights once batch size is fulfilled
871
865
 
872
- last_update, _ = past_state
866
+ last_update, last_momentum = past_state
873
867
 
874
868
  if exists(gate):
875
- weights = TensorDict({param_name: one_weight.lerp(one_last_update, gate) for (param_name, one_weight), (_, one_last_update) in zip(weights.items(), last_update.items())})
876
- else:
877
- weights = last_update
869
+ last_update = TensorDict({param_name: one_weight.lerp(one_last_update, gate) for (param_name, one_weight), (_, one_last_update) in zip(weights.items(), last_update.items())})
870
+
871
+ past_state = (last_update, last_momentum)
872
+
873
+ # set weights to the last updated weights for the last minibatch
878
874
 
879
- next_neural_mem_state = tuple_index_set(next_neural_mem_state, 1, weights)
875
+ weights = last_update
880
876
 
881
- next_neural_mem_state = tuple_index_set(next_neural_mem_state, -1, updates)
877
+ next_neural_mem_state = next_neural_mem_state._replace(
878
+ weights = weights,
879
+ states = past_state,
880
+ )
881
+
882
+ next_neural_mem_state = next_neural_mem_state._replace(updates = updates)
882
883
 
883
884
  # retrieve
884
885
 
@@ -889,7 +890,8 @@ class NeuralMemory(Module):
889
890
  retrieve_chunk_size = 1
890
891
  need_pad = False
891
892
 
892
- last_update, _ = past_state
893
+ last_update, _ = next_neural_mem_state.states
894
+
893
895
  updates = rearrange_dict_values(last_update, 'b ... -> b 1 ...')
894
896
 
895
897
  retrieved = self.retrieve_memories(
File without changes
File without changes
File without changes