PyPI - tmnt - Versions diffs - 0.7.51b20240412__py3-none-any.whl → 0.7.52__py3-none-any.whl - Mend

tmnt 0.7.51b20240412py3-none-any.whl → 0.7.52py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

tmnt/estimator.py CHANGED Viewed

@@ -18,7 +18,7 @@ import json
 from sklearn.metrics import average_precision_score, top_k_accuracy_score, roc_auc_score, ndcg_score, precision_recall_fscore_support
 from tmnt.data_loading import PairedDataLoader, SingletonWrapperLoader, SparseDataLoader, get_llm_model
 from tmnt.modeling import BowVAEModel, CovariateBowVAEModel, SeqBowVED
-from tmnt.modeling import SelfEmbeddingCrossEntropyLoss, GeneralizedSDMLLoss, MultiNegativeCrossEntropyLoss, MetricSeqBowVED, MetricBowVAEModel
+from tmnt.modeling import CrossBatchCosineSimilarityLoss, GeneralizedSDMLLoss, MultiNegativeCrossEntropyLoss, MetricSeqBowVED, MetricBowVAEModel
 from tmnt.eval_npmi import EvaluateNPMI
 from tmnt.distribution import LogisticGaussianDistribution, BaseDistribution, GaussianDistribution, VonMisesDistribution
@@ -1573,11 +1573,11 @@ class SeqBowEstimator(BaseEstimator):
 class SeqBowMetricEstimator(SeqBowEstimator):
     def __init__(self, *args, sdml_smoothing_factor=0.3, metric_loss_temp=0.1, use_teacher_forcing=False,
-                 teacher_forcing_right=True,
+                 teacher_forcing_mode='rand',
                  use_sdml=False, non_scoring_index=-1, **kwargs):
         super(SeqBowMetricEstimator, self).__init__(*args, **kwargs)
         if use_teacher_forcing:
-            self.loss_function = SelfEmbeddingCrossEntropyLoss(teacher_right=teacher_forcing_right, metric_loss_temp=metric_loss_temp)
+            self.loss_function = CrossBatchCosineSimilarityLoss(teacher_mode = teacher_forcing_mode)
         else:
             self.loss_function = \
                 GeneralizedSDMLLoss(smoothing_parameter=sdml_smoothing_factor, x2_downweight_idx=non_scoring_index) if use_sdml \

tmnt/modeling.py CHANGED Viewed

@@ -14,6 +14,7 @@ from tmnt.distribution import BaseDistribution
 from torch import nn
 from torch.nn.modules.loss import _Loss
 import torch
+from torch import Tensor
 from torch.distributions.categorical import Categorical
 from typing import List, Tuple, Dict, Optional, Union, NoReturn
@@ -775,41 +776,39 @@ class MultiNegativeCrossEntropyLoss(_Loss):
         return self._loss(x1, l1, x2, l2)
-class SelfEmbeddingCrossEntropyLoss(_Loss):
+class CrossBatchCosineSimilarityLoss(_Loss):
     """
     Inputs:
         - **x1**: Minibatch of data points with shape (batch_size, vector_dim)
         - **x2**: Minibatch of data points with shape (batch_size, vector_dim)
           Each item in x1 is a positive sample for the items with the same label in x2
-          That is, x1[0] and x2[0] form a positive pair iff label(x1[0]) = label(x2[0])
-          All data points in different rows should be decorrelated
     Outputs:
         - **loss**: loss tensor with shape (batch_size,).
     """
-    def __init__(self, teacher_right=True, metric_loss_temp=0.5, batch_axis=0, **kwargs):
-        super(SelfEmbeddingCrossEntropyLoss, self).__init__(batch_axis, **kwargs)
-        self.cross_entropy_loss = nn.CrossEntropyLoss()
-        self.metric_loss_temp = metric_loss_temp
-        self.teacher_right = teacher_right
+    def __init__(self, teacher_mode='rand', batch_axis=0, **kwargs):
+        super(CrossBatchCosineSimilarityLoss, self).__init__(batch_axis, **kwargs)
+        self.loss_fn = nn.MSELoss()
+        self.teacher_mode = teacher_mode
+    def cosine_sim(self, a: Tensor, b: Tensor) -> Tensor:
+        a_norm = torch.nn.functional.normalize(a, p=2, dim=1)
+        b_norm = torch.nn.functional.normalize(b, p=2, dim=1)
+        return torch.mm(a_norm, b_norm.transpose(0, 1))
     def _loss(self, x1: torch.Tensor, l1: torch.Tensor, x2: torch.Tensor, l2: torch.Tensor):
-        """
-        the function computes the kl divergence between the negative distances
-        and the smoothed label matrix.
-        """
-        batch_size = l1.size()[0]
-        x1_norm = torch.nn.functional.normalize(x1, p=2, dim=1)
-        x2_norm = torch.nn.functional.normalize(x2, p=2, dim=1)
-        cross_side_distances = torch.mm(x1_norm, x2_norm.transpose(0,1)) / self.metric_loss_temp
-        single_side_distances = torch.mm(x2_norm, x2_norm.transpose(0,1)) / self.metric_loss_temp if self.teacher_right \
-            else torch.mm(x1_norm, x1_norm.transpose(0,1)) / self.metric_loss_temp
-        # need to normalize these
-        single_side_distances = single_side_distances / single_side_distances.sum(axis=1,keepdim=True).expand(batch_size, batch_size)
-        # multiply by the batch size to obtain the sum loss (kl_loss averages instead of sum)
-        return self.cross_entropy_loss(cross_side_distances, single_side_distances.to(single_side_distances.device))
+        scores = self.cosine_sim(x1,x2)
+        if self.teacher_mode == 'right':
+            labels = self.cosine_sim(x2,x2).detach()
+        elif self.teacher_mode == 'left':
+            labels = self.cosine_sim(x1,x1).detach()
+        else:
+            if np.random.randint(2):
+                labels = self.cosine_sim(x2,x2).detach()
+            else:
+                labels = self.cosine_sim(x1,x1).detach()
+        return self.loss_fn(scores, labels)
     def forward(self, x1, l1, x2, l2):
         return self._loss(x1, l1, x2, l2)

{tmnt-0.7.51b20240412.dist-info → tmnt-0.7.52.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tmnt
-Version: 0.7.51b20240412
+Version: 0.7.52
 Summary: Topic modeling neural toolkit
 Home-page: https://github.com/mitre/tmnt.git
 Author: The MITRE Corporation

{tmnt-0.7.51b20240412.dist-info → tmnt-0.7.52.dist-info}/RECORD RENAMED Viewed

@@ -2,10 +2,10 @@ tmnt/__init__.py,sha256=EPNq1H7UMyMewWT_zTGBaC7ZouvCywX_gMX4G1dtmvw,250
 tmnt/configuration.py,sha256=P8PEhzVPKO5xG0FrdTLRQ60OYWigbzPY-OSx_hzQlrY,10054
 tmnt/data_loading.py,sha256=A0tsM6x61BGhYBV6rAYdryz2NwbR__8EAYj_Q4Z-DCs,18736
 tmnt/distribution.py,sha256=Pmyc5gwDd_-jP7vLVb0vdNQaSSvF1EuiTZEWg3KfmI8,10866
-tmnt/estimator.py,sha256=i37NVmUseDuEWfk4cwZcShsRrbINLbtrqRzDAPmJUwU,77249
+tmnt/estimator.py,sha256=MERanBwrbYqUcHC872qXCIjUoqjlTKnYjOCBu6mxo90,77217
 tmnt/eval_npmi.py,sha256=ODRDMsBgDM__iCNEX399ck7bAhl7ydvgDqmpfR7Y-q4,5048
 tmnt/inference.py,sha256=Sw7GO7QiWVEtbPJKBjFB7AiKRmUOZbFZn3tCrsStzWw,17845
-tmnt/modeling.py,sha256=wKDuUsw2bvsrvJ7LkcnSXAPh8cvUSd8y3Q7eGAf_JeU,35049
+tmnt/modeling.py,sha256=UJvwQU2ujmY3hUBmUuTWOsZ5AcUFcw-kQhYFK5pICTY,34549
 tmnt/preprocess/__init__.py,sha256=gwMejkQrnqKS05i0JVsUru2hDUR5jE1hKC10dL934GU,170
 tmnt/preprocess/tokenizer.py,sha256=-ZgowfbHrM040vbNTktZM_hdl6HDTqxSJ4mDAxq3dUs,14050
 tmnt/preprocess/vectorizer.py,sha256=RkdivqP76qAJDianV09lONad9NbfBVWLZgIbU_P1-zo,15796
@@ -17,9 +17,9 @@ tmnt/utils/ngram_helpers.py,sha256=VrIzou2oQHCLBLSWODDeikN3PYat1NqqvEeYQj_GhbA,1
 tmnt/utils/pubmed_utils.py,sha256=3sHwoun7vxb0GV-arhpXLMUbAZne0huAh9xQNy6H40E,1274
 tmnt/utils/random.py,sha256=qY75WG3peWoMh9pUyCPBEo6q8IvkF6VRjeb5CqJOBF8,327
 tmnt/utils/recalibrate.py,sha256=TmpB8An8bslICZ13UTJfIvr8VoqiSedtpHxec4n8CHk,1439
-tmnt-0.7.51b20240412.dist-info/LICENSE,sha256=qFZJrfJ7Zi4IXDiyiGVrHWic_l1h2tc36tI8Z7rK9bs,11356
-tmnt-0.7.51b20240412.dist-info/METADATA,sha256=x2c3Q8FLiFfUfbE68ih_lMP7u0_i5M5RHBLisRbXRVw,1452
-tmnt-0.7.51b20240412.dist-info/NOTICE,sha256=p0kYIVAkReTFaGb4C-qPa7h5ztze6hGzOpjCMMbOipU,425
-tmnt-0.7.51b20240412.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-tmnt-0.7.51b20240412.dist-info/top_level.txt,sha256=RpYgUl187sXnqmiwKjZZdcDlHz2AALs6bGdUcukyd_E,5
-tmnt-0.7.51b20240412.dist-info/RECORD,,
+tmnt-0.7.52.dist-info/LICENSE,sha256=qFZJrfJ7Zi4IXDiyiGVrHWic_l1h2tc36tI8Z7rK9bs,11356
+tmnt-0.7.52.dist-info/METADATA,sha256=8jzdkE7tv6P_5OAMS7_pp8_iPyAtwDVhmQ9o5Eo2Zfo,1443
+tmnt-0.7.52.dist-info/NOTICE,sha256=p0kYIVAkReTFaGb4C-qPa7h5ztze6hGzOpjCMMbOipU,425
+tmnt-0.7.52.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+tmnt-0.7.52.dist-info/top_level.txt,sha256=RpYgUl187sXnqmiwKjZZdcDlHz2AALs6bGdUcukyd_E,5
+tmnt-0.7.52.dist-info/RECORD,,

{tmnt-0.7.51b20240412.dist-info → tmnt-0.7.52.dist-info}/LICENSE RENAMED Viewed

File without changes

{tmnt-0.7.51b20240412.dist-info → tmnt-0.7.52.dist-info}/NOTICE RENAMED Viewed

File without changes

{tmnt-0.7.51b20240412.dist-info → tmnt-0.7.52.dist-info}/WHEEL RENAMED Viewed

File without changes

{tmnt-0.7.51b20240412.dist-info → tmnt-0.7.52.dist-info}/top_level.txt RENAMED Viewed

File without changes

tmnt 0.7.51b20240412__py3-none-any.whl → 0.7.52__py3-none-any.whl

tmnt 0.7.51b20240412py3-none-any.whl → 0.7.52py3-none-any.whl