PyPI - titans-pytorch - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

titans-pytorch 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

titans_pytorch/neural_memory.py CHANGED Viewed

@@ -353,11 +353,11 @@ class NeuralMemory(Module):
             pred = functional_call(self.memory_model, params, inputs)
             loss = self.store_memory_loss_fn(pred, target) # simple mse loss in paper - eq (12) - |M(k) - v|²
             weighted_loss = loss * loss_weights
-            return weighted_loss.sum()
+            return weighted_loss.sum(), loss
         # two functions
-        grad_fn = grad(forward_and_loss)
+        grad_fn = grad(forward_and_loss, has_aux = True)
         self.per_sample_grad_fn = vmap(grad_fn, in_dims = (0, 0, 0, 0))
@@ -526,6 +526,7 @@ class NeuralMemory(Module):
         seq_index = 0,
         prev_weights = None,
         mask: Tensor | None = None,
+        return_surprises = True
     ):
         if self.qkv_receives_diff_views:
             _, batch, seq_len = seq.shape[:3]
@@ -645,10 +646,14 @@ class NeuralMemory(Module):
         # get grads and extra auxiliary loss (for backwarding through qkv projection in base neural memory module)
-        grads = self.per_sample_grad_fn(dict(weights_for_surprise), keys, adaptive_lr, values)
+        grads, unweighted_mem_model_loss = self.per_sample_grad_fn(dict(weights_for_surprise), keys, adaptive_lr, values)
         grads = TensorDict(grads)
+        # surprises
+        unweighted_mem_model_loss = rearrange(unweighted_mem_model_loss, '(b h n) c -> b h (n c)', b = batch, h = heads)
         # maybe softclamp grad norm
         if exists(self.max_grad_norm):
@@ -687,7 +692,10 @@ class NeuralMemory(Module):
             output = (updates, next_store_state)
-            return output
+            if not return_surprises:
+                return output
+            return (*output, unweighted_mem_model_loss)
         # momentum + weight decay - momentum is the new contribution, as most linear RNNs have learned forgetting gates
@@ -744,7 +752,10 @@ class NeuralMemory(Module):
         # return updates to neural memory at all chunked timesteps + neural mem cache / state to be fed back
-        return updates, next_store_state
+        if not return_surprises:
+            return updates, next_store_state
+        return updates, next_store_state, unweighted_mem_model_loss
     def retrieve_memories(
         self,
@@ -843,7 +854,8 @@ class NeuralMemory(Module):
         store_seq = None,
         state: NeuralMemState | None = None,
         prev_weights = None,
-        store_mask: Tensor | None = None
+        store_mask: Tensor | None = None,
+        return_surprises = False
     ):
         is_multi_input = self.qkv_receives_diff_views
@@ -927,6 +939,7 @@ class NeuralMemory(Module):
         # whether to allow network to slowly adjust from initial weight throughout (residual path) to fully updating weights every batch
+        surprises = None
         gate = None
         if exists(self.transition_gate):
@@ -937,13 +950,14 @@ class NeuralMemory(Module):
             # store
-            next_updates, next_neural_mem_state = self.store_memories(
+            next_updates, next_neural_mem_state, chunk_surprises = self.store_memories(
                 store_seq_chunk,
                 weights,
                 seq_index = seq_index,
                 past_state = past_state,
                 prev_weights = prev_weights,
-                mask = maybe_store_mask
+                mask = maybe_store_mask,
+                return_surprises = True
             )
             weights = next_neural_mem_state.weights
@@ -952,6 +966,8 @@ class NeuralMemory(Module):
             updates = accum_updates(updates, next_updates)
+            surprises = safe_cat((surprises, chunk_surprises), dim = -1)
             if is_last and not update_after_final_store:
                 continue
@@ -986,4 +1002,9 @@ class NeuralMemory(Module):
             updates
         )
-        return retrieved, next_neural_mem_state
+        # returning
+        if not return_surprises:
+            return retrieved, next_neural_mem_state
+        return retrieved, next_neural_mem_state, surprises

{titans_pytorch-0.4.0.dist-info → titans_pytorch-0.4.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: titans-pytorch
-Version: 0.4.0
+Version: 0.4.1
 Summary: Titans
 Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
 Project-URL: Repository, https://github.com/lucidrains/titans-pytorch

{titans_pytorch-0.4.0.dist-info → titans_pytorch-0.4.1.dist-info}/RECORD RENAMED Viewed

@@ -2,8 +2,8 @@ titans_pytorch/__init__.py,sha256=AyEUlcXWpnqrvyeihRAXWIfQlzLA4NhBjOqQU4edL-4,29
 titans_pytorch/associative_scan.py,sha256=esaLbukFlgvy2aqopsqBy6KEcZ64B3rsNhG8moKdPSc,5159
 titans_pytorch/mac_transformer.py,sha256=tz72141G5t3AOnxSVsOLtLptGtl8T7zROUvaTw2_XCY,26960
 titans_pytorch/memory_models.py,sha256=wnH9i9kUSoVZhEWUlj8LpBSbB400L9kLt1zP8CO45QQ,5835
-titans_pytorch/neural_memory.py,sha256=uh5NbtAAzfPeZPFe7uhgnpUF6qyP0zjP0eXPIgY5pfc,31929
-titans_pytorch-0.4.0.dist-info/METADATA,sha256=uOklaPv-y-eSpgnvrgVZ-ZL4TpeBg7r_EJxwJbdKyO0,6816
-titans_pytorch-0.4.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-titans_pytorch-0.4.0.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
-titans_pytorch-0.4.0.dist-info/RECORD,,
+titans_pytorch/neural_memory.py,sha256=io5fvLWpOTzx8mkDA9sg3Mkc7-aeugUJoDCniryiuYE,32666
+titans_pytorch-0.4.1.dist-info/METADATA,sha256=XwduHOXOJvjaWJhdYUq-1jhVq2zNKJBwMH1VWopxv5Y,6816
+titans_pytorch-0.4.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+titans_pytorch-0.4.1.dist-info/licenses/LICENSE,sha256=1yCiA9b5nhslTavxPjsQAO-wpOnwJR9-l8LTVi7GJuk,1066
+titans_pytorch-0.4.1.dist-info/RECORD,,

{titans_pytorch-0.4.0.dist-info → titans_pytorch-0.4.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{titans_pytorch-0.4.0.dist-info → titans_pytorch-0.4.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

titans-pytorch 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

titans-pytorch 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl