titans-pytorch 0.0.46__tar.gz → 0.0.47__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: titans-pytorch
3
- Version: 0.0.46
3
+ Version: 0.0.47
4
4
  Summary: Titans
5
5
  Project-URL: Homepage, https://pypi.org/project/titans-pytorch/
6
6
  Project-URL: Repository, https://github.com/lucidrains/titans-pytorch
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "titans-pytorch"
3
- version = "0.0.46"
3
+ version = "0.0.47"
4
4
  description = "Titans"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -217,10 +217,10 @@ class NeuralMemory(Module):
217
217
  def forward_and_loss(params, inputs, loss_weights, target):
218
218
  pred = functional_call(self.memory_model, params, inputs)
219
219
  loss = self.store_memory_loss_fn(pred, target) # simple mse loss in paper - eq (12) - |M(k) - v|²
220
- loss = loss * loss_weights
221
- return loss.sum()
220
+ weighted_loss = loss * loss_weights
221
+ return weighted_loss.sum(), loss.mean()
222
222
 
223
- self.per_sample_grad_fn = vmap(grad(forward_and_loss), in_dims = (None, 0, 0, 0))
223
+ self.per_sample_grad_fn = vmap(grad(forward_and_loss, has_aux = True), in_dims = (None, 0, 0, 0))
224
224
 
225
225
  # queries for retrieving from the model
226
226
 
@@ -282,7 +282,8 @@ class NeuralMemory(Module):
282
282
  def store_memories(
283
283
  self,
284
284
  seq,
285
- past_state: tuple[dict[str, Tensor], dict[str, Tensor]]
285
+ past_state: tuple[dict[str, Tensor], dict[str, Tensor]],
286
+ return_aux_kv_loss = False
286
287
  ):
287
288
 
288
289
  seq = self.store_norm(seq)
@@ -330,7 +331,7 @@ class NeuralMemory(Module):
330
331
 
331
332
  # get grads and extra auxiliary loss (for backwarding through qkv projection in base neural memory module)
332
333
 
333
- grads = self.per_sample_grad_fn(dict(curr_weights), keys, adaptive_lr, values)
334
+ grads, aux_kv_recon_loss = self.per_sample_grad_fn(dict(curr_weights), keys, adaptive_lr, values)
334
335
 
335
336
  grads = TensorDict(grads)
336
337
 
@@ -405,7 +406,10 @@ class NeuralMemory(Module):
405
406
 
406
407
  next_state = (curr_weights + last_update, next_momentum)
407
408
 
408
- return updates, next_state
409
+ if not return_aux_kv_loss:
410
+ return updates, next_state
411
+
412
+ return updates, next_state, aux_kv_recon_loss
409
413
 
410
414
  def retrieve_memories(
411
415
  self,
@@ -484,7 +488,7 @@ class NeuralMemory(Module):
484
488
  seq,
485
489
  store_seq = None,
486
490
  past_state: tuple[dict[str, Tensor], dict[str, Tensor]] | None = None,
487
- return_next_memories = False
491
+ return_aux_kv_loss = False
488
492
  ):
489
493
  batch, seq_len = seq.shape[:2]
490
494
 
@@ -499,13 +503,13 @@ class NeuralMemory(Module):
499
503
 
500
504
  store_seq = default(store_seq, seq)
501
505
 
502
- updates, next_memories = self.store_memories(store_seq, past_state)
506
+ updates, next_memories, aux_kv_recon_loss = self.store_memories(store_seq, past_state, return_aux_kv_loss = True)
503
507
 
504
508
  past_weights, _ = past_state
505
509
 
506
510
  retrieved = self.retrieve_memories(seq, past_weights + updates)
507
511
 
508
- if not return_next_memories:
512
+ if not return_aux_kv_loss:
509
513
  return retrieved
510
514
 
511
- return retrieved, next_memories
515
+ return retrieved, aux_kv_recon_loss
File without changes