titans-pytorch 0.2.28__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: titans-pytorch
3
- Version: 0.2.28
3
+ Version: 0.3.0
4
4
  Summary: Titans
5
5
  Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
6
6
  Project-URL: Repository, https://github.com/lucidrains/titans-pytorch
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "titans-pytorch"
3
- version = "0.2.28"
3
+ version = "0.3.0"
4
4
  description = "Titans"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -220,16 +220,19 @@ def test_mac_sampling(
220
220
  @pytest.mark.parametrize('seq_len', (2, 64, 256))
221
221
  @pytest.mark.parametrize('prompt_len', (0, 65))
222
222
  @pytest.mark.parametrize('mem_chunk_size', (2, 32, 64))
223
+ @pytest.mark.parametrize('gated_transition', (False, True))
223
224
  @torch_default_dtype(torch.float64)
224
225
  def test_neural_mem_inference(
225
226
  seq_len,
226
227
  prompt_len,
227
- mem_chunk_size
228
+ mem_chunk_size,
229
+ gated_transition
228
230
  ):
229
231
 
230
232
  mem = NeuralMemory(
231
233
  dim = 384,
232
234
  chunk_size = mem_chunk_size,
235
+ gated_transition = gated_transition
233
236
  )
234
237
 
235
238
  seq = torch.randn(2, seq_len, 384)
@@ -66,12 +66,6 @@ def xnor(x, y):
66
66
  def divisible_by(num, den):
67
67
  return (num % den) == 0
68
68
 
69
- def tuple_index_set(t: tuple, index, value):
70
- klass = type(t)
71
- t = list(t)
72
- t[index] = value
73
- return klass(*t)
74
-
75
69
  def safe_cat(inputs, dim = -2):
76
70
  inputs = tuple(filter(exists, inputs))
77
71
 
@@ -872,15 +866,20 @@ class NeuralMemory(Module):
872
866
  last_update, last_momentum = past_state
873
867
 
874
868
  if exists(gate):
875
- weights = TensorDict({param_name: one_weight.lerp(one_last_update, gate) for (param_name, one_weight), (_, one_last_update) in zip(weights.items(), last_update.items())})
876
- else:
877
- weights = last_update
869
+ last_update = TensorDict({param_name: one_weight.lerp(one_last_update, gate) for (param_name, one_weight), (_, one_last_update) in zip(weights.items(), last_update.items())})
870
+
871
+ past_state = (last_update, last_momentum)
872
+
873
+ # set weights to the last updated weights for the last minibatch
878
874
 
879
- past_state = (weights, last_momentum)
880
- next_neural_mem_state = tuple_index_set(next_neural_mem_state, -2, past_state)
881
- next_neural_mem_state = tuple_index_set(next_neural_mem_state, 1, weights)
875
+ weights = last_update
882
876
 
883
- next_neural_mem_state = tuple_index_set(next_neural_mem_state, -1, updates)
877
+ next_neural_mem_state = next_neural_mem_state._replace(
878
+ weights = weights,
879
+ states = past_state,
880
+ )
881
+
882
+ next_neural_mem_state = next_neural_mem_state._replace(updates = updates)
884
883
 
885
884
  # retrieve
886
885
 
@@ -891,7 +890,8 @@ class NeuralMemory(Module):
891
890
  retrieve_chunk_size = 1
892
891
  need_pad = False
893
892
 
894
- last_update, _ = past_state
893
+ last_update, _ = next_neural_mem_state.states
894
+
895
895
  updates = rearrange_dict_values(last_update, 'b ... -> b 1 ...')
896
896
 
897
897
  retrieved = self.retrieve_memories(
File without changes
File without changes
File without changes