PyPI - tmnt - Versions diffs - 0.7.57__py3-none-any.whl → 0.7.58__py3-none-any.whl - Mend

tmnt 0.7.57py3-none-any.whl → 0.7.58py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

tmnt/data_loading.py +2 -1
tmnt/distribution.py +46 -44
tmnt/estimator.py +10 -2
tmnt/inference.py +5 -1
tmnt/modeling.py +10 -1
{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info}/METADATA +3 -2
{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info}/RECORD +11 -11
{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info}/WHEEL +1 -1
{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info/licenses}/LICENSE +0 -0
{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info/licenses}/NOTICE +0 -0
{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info}/top_level.txt +0 -0

tmnt/data_loading.py CHANGED Viewed

@@ -40,7 +40,8 @@ llm_catalog = {
     'johngiorgi/declutr-sci-base': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained),
     'BAAI/bge-base-en-v1.5': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained),
     'pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained),
-    'Alibaba-NLP/gte-base-en-v1.5': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained)
+    'Alibaba-NLP/gte-base-en-v1.5': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained),
+    'intfloat/multilingual-e5-base': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained)
     ## add more model options here ...
     }

tmnt/distribution.py CHANGED Viewed

@@ -15,24 +15,19 @@ import torch
 from scipy import special as sp
 import torch
 from typing import Callable, Literal, Optional, Tuple, TypeVar, Union
+from tmnt.sparse.modeling import TopKEncoder
 __all__ = ['BaseDistribution', 'GaussianDistribution', 'GaussianUnitVarDistribution', 'LogisticGaussianDistribution',
            'VonMisesDistribution']
 class BaseDistribution(nn.Module):
-    def __init__(self, enc_size, n_latent, device, on_simplex=False):
+    def __init__(self, enc_size, n_latent, device, on_simplex=True):
         super(BaseDistribution, self).__init__()
         self.n_latent = n_latent
         self.enc_size = enc_size
         self.device = device
-        self.mu_encoder = nn.Linear(enc_size, n_latent).to(device)
-        self.mu_bn = nn.BatchNorm1d(n_latent, momentum = 0.8, eps=0.0001).to(device)
-        self.softmax = nn.Softmax(dim=1).to(device)
-        self.softplus = nn.Softplus().to(device)
-        self.on_simplex = on_simplex
     ## this is required by most priors
     def _get_gaussian_sample(self, mu, lv, batch_size):
@@ -47,11 +42,25 @@ class BaseDistribution(nn.Module):
     def get_mu_encoding(self, data, include_bn):
         raise NotImplemented
+    def freeze_pre_encoder(self) -> None:
+        raise NotImplemented
+    def unfreeze_pre_encoder(self) -> None:
+        raise NotImplemented
+class SimpleDistribution(BaseDistribution):
+    def __init__(self, enc_size, n_latent, device, on_simplex=False):
+        super(SimpleDistribution, self).__init__(enc_size, n_latent, device, on_simplex=on_simplex)
+        self.mu_encoder = nn.Linear(enc_size, n_latent).to(device)
+        self.mu_bn = nn.BatchNorm1d(n_latent, momentum = 0.8, eps=0.0001).to(device)
+        self.softmax = nn.Softmax(dim=1).to(device)
+        self.softplus = nn.Softplus().to(device)
+        self.on_simplex = on_simplex
-class GaussianDistribution(BaseDistribution):
+class GaussianDistribution(SimpleDistribution):
     """Gaussian latent distribution with diagnol co-variance.
     Parameters:
@@ -98,7 +107,7 @@ class GaussianDistribution(BaseDistribution):
-class GaussianUnitVarDistribution(BaseDistribution):
+class GaussianUnitVarDistribution(SimpleDistribution):
     """Gaussian latent distribution with fixed unit variance.
     Parameters:
@@ -141,7 +150,7 @@ class GaussianUnitVarDistribution(BaseDistribution):
         return mu
-class LogisticGaussianDistribution(BaseDistribution):
+class LogisticGaussianDistribution(SimpleDistribution):
     """Logistic normal/Gaussian latent distribution with specified prior
     Parameters:
@@ -198,7 +207,7 @@ class LogisticGaussianDistribution(BaseDistribution):
         return mu
-class VonMisesDistribution(BaseDistribution):
+class VonMisesDistribution(SimpleDistribution):
     def __init__(self, enc_size, n_latent, kappa=100.0, dr=0.1, device='cpu'):
         super(VonMisesDistribution, self).__init__(enc_size, n_latent, device, on_simplex=False)
@@ -238,7 +247,7 @@ class VonMisesDistribution(BaseDistribution):
-class Projection(BaseDistribution):
+class Projection(SimpleDistribution):
     def __init__(self, enc_size, n_latent, device='cpu'):
         super(Projection, self).__init__(enc_size, n_latent, device)
@@ -264,23 +273,7 @@ class Projection(BaseDistribution):
         return enc
-class TopK(nn.Module):
-    def __init__(
-        self, k: int, postact_fn: Callable[[torch.Tensor], torch.Tensor] = nn.ReLU()
-    ):
-        super().__init__()
-        self.k = k
-        self.postact_fn = postact_fn
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        topk = torch.topk(x, k=self.k, dim=-1)
-        values = self.postact_fn(topk.values)
-        result = torch.zeros_like(x)
-        result.scatter_(-1, topk.indices, values)
-        return result
-class ConceptLogisticGaussianDistribution(nn.Module):
+class ConceptLogisticGaussianDistribution(BaseDistribution):
     """Sparse concept encoding with Logistic normal/Gaussian latent distribution with specified prior
     Parameters:
@@ -289,19 +282,19 @@ class ConceptLogisticGaussianDistribution(nn.Module):
         dr (float): Dropout value for dropout applied post sample. optional (default = 0.2)
         alpha (float): Value the determines prior variance as 1/alpha - (2/n_latent) + 1/(n_latent^2)
     """
-    def __init__(self, enc_size, n_latent, n_concepts=16000, k_sparsity=32, device='cpu', dr=0.1, alpha=1.0):
-        super(ConceptLogisticGaussianDistribution, self).__init__()
+    def __init__(self, enc_size, n_latent, sparse_encoder: TopKEncoder, device='cpu', dr=0.1, alpha=1.0):
+        super(ConceptLogisticGaussianDistribution, self).__init__(enc_size, n_latent, device, on_simplex=True)
         self.n_latent = n_latent
         self.enc_size = enc_size
         self.device = device
-        self.activation = TopK(k=k_sparsity)
-        self.core_sparse = Sequential(nn.Linear(enc_size, n_concepts), self.activation).to(device)
-        self.mu_encoder = Sequential(self.core_sparse, nn.Linear(n_concepts, n_latent)).to(device)
+        self.sparse_encoder = sparse_encoder.to(device)
+        self.n_concepts = sparse_encoder.get_dict_size()
+        self.sparse_to_mu = nn.Linear(self.n_concepts, n_latent).to(device)
+        self.sparse_bn = nn.BatchNorm1d(self.n_concepts, momentum=0.8, eps=0.0001).to(device)
         self.mu_bn = nn.BatchNorm1d(n_latent, momentum = 0.8, eps=0.0001).to(device)
         self.softmax = nn.Softmax(dim=1).to(device)
         self.on_simplex = True
         self.alpha = alpha
-        self.n_concepts = n_concepts
         prior_var = 1 / self.alpha - (2.0 / n_latent) + 1 / (self.n_latent * self.n_latent)
         self.prior_var = torch.tensor([prior_var], device=device)
@@ -309,16 +302,18 @@ class ConceptLogisticGaussianDistribution(nn.Module):
         ## NOTE: the weights to model the log-variance are separate but the sparse encoder is shared
         ## between the lv_encoder and mu_encoder (above)
-        self.lv_encoder = Sequential(self.core_sparse, nn.Linear(n_concepts, n_latent)).to(device)
+        self.sparse_to_lv = nn.Linear(self.n_concepts, n_latent).to(device)
         self.lv_bn = nn.BatchNorm1d(n_latent, momentum = 0.8, eps=0.001).to(device)
         self.post_sample_dr_o = nn.Dropout(dr)
-    ## this is required by most priors
-    def _get_gaussian_sample(self, mu, lv, batch_size):
-        eps = Normal(torch.zeros(batch_size, self.n_latent),
-                     torch.ones(batch_size, self.n_latent)).sample().to(self.device)
-        return (mu + torch.exp(0.5*lv).to(self.device) * eps)
+    def freeze_pre_encoder(self):
+        self.sparse_encoder.W_enc.requires_grad = False
+        self.sparse_encoder.b_enc.requires_grad = False
+    def unfreeze_pre_encoder(self):
+        self.sparse_encoder.W_enc.requires_grad = True
+        self.sparse_encoder.b_enc.requires_grad = True
     def _get_kl_term(self, mu, lv):
         posterior_var = torch.exp(lv)
@@ -331,14 +326,20 @@ class ConceptLogisticGaussianDistribution(nn.Module):
     def forward(self, data, batch_size):
         """Generate a sample according to the logistic Gaussian latent distribution given the encoder outputs
         """
-        mu = self.mu_encoder(data)
+        _, sparse, _, _, _ = self.sparse_encoder(data)
+        #sparse_bn = self.sparse_bn(sparse)
+        mu = self.sparse_to_mu(sparse)
         mu_bn = self.mu_bn(mu)
-        lv = self.lv_encoder(data)
+        lv = self.sparse_to_lv(sparse)
         lv_bn = self.lv_bn(lv)
         z_p = self._get_gaussian_sample(mu_bn, lv_bn, batch_size)
         KL = self._get_kl_term(mu, lv)
         z = self.post_sample_dr_o(z_p)
         return self.softmax(z), KL
+    def get_sparse_encoding(self, data):
+        _, sparse, _, _, _ = self.sparse_encoder(data)
+        return sparse
     def get_mu_encoding(self, data, include_bn=True, normalize=False):
         """Provide the distribution mean as the natural result of running the full encoder
@@ -348,7 +349,8 @@ class ConceptLogisticGaussianDistribution(nn.Module):
         Returns:
             encoding (:class:`mxnet.ndarray.NDArray`): Encoding vector representing unnormalized topic proportions
         """
-        enc = self.mu_encoder(data)
+        _, sparse, _, _, _ = self.sparse_encoder(data)
+        enc = self.sparse_to_mu(sparse)
         if include_bn:
             enc = self.mu_bn(enc)
         mu = self.softmax(enc) if normalize else enc

tmnt/estimator.py CHANGED Viewed

@@ -943,6 +943,7 @@ class SeqBowEstimator(BaseEstimator):
         self._bow_matrix = None
         self.entropy_loss_coef = entropy_loss_coef
         self.pool_encoder = pool_encoder
+        self.freeze_pre_encoder_weights = False
     @classmethod
@@ -1013,6 +1014,9 @@ class SeqBowEstimator(BaseEstimator):
                                pretrained_param_file = param_file,
                                device = device)
+    def freeze_pre_encoder(self):
+        self.freeze_pre_encoder_weights = True
     def _get_model_bias_initialize(self, train_data):
         model = self._get_model()
@@ -1030,6 +1034,7 @@ class SeqBowEstimator(BaseEstimator):
                           entropy_loss_coef=self.entropy_loss_coef,
                           dropout=self.classifier_dropout)
         return model
     def _get_config(self):
         config = {}
@@ -1185,8 +1190,10 @@ class SeqBowEstimator(BaseEstimator):
         if self.model is None or not self.warm_start:
             self.model = self._get_model_bias_initialize(train_data)
-        model = self.model
+        if self.freeze_pre_encoder_weights:
+            self.model.freeze_pre_encoder()
+        model = self.model
         accumulate = False
         v_res      = None
@@ -1268,7 +1275,8 @@ class SeqBowEstimator(BaseEstimator):
                     update_loss_details(total_ls_2, elbo_ls_2, red_ls_2, None)
                 if not accumulate or (batch_id + 1) % accumulate == 0:
-                    torch.nn.utils.clip_grad.clip_grad_value_(model.llm.parameters(), 1.0)
+                    if not self.freeze_pre_encoder_weights:
+                        torch.nn.utils.clip_grad.clip_grad_value_(model.llm.parameters(), 1.0)
                     optimizer.step()
                     dec_optimizer.step()
                     lr_scheduler.step()

tmnt/inference.py CHANGED Viewed

@@ -18,8 +18,9 @@ from tmnt.utils.recalibrate import recalibrate_scores
 from sklearn.datasets import load_svmlight_file
 from functools import partial
 from tmnt.data_loading import get_llm_tokenizer
 from typing import List, Tuple, Dict, Optional, Union, NoReturn
+from scipy.sparse import csr_matrix
+from tmnt.distribution import ConceptLogisticGaussianDistribution
 MAX_DESIGN_MATRIX = 250000000
@@ -347,6 +348,9 @@ class MetricSeqVEDInferencer(SeqVEDInferencer):

tmnt/modeling.py CHANGED Viewed

@@ -45,6 +45,9 @@ class BaseVAE(nn.Module):
         t_npmi_mat = torch.Tensor(npmi_mat).to(self.device)
         self.npmi_with_diversity_loss = NPMILossWithDiversity(t_npmi_mat, device=self.device, npmi_lambda=npmi_lambda, npmi_scale=npmi_scale)
+    def freeze_pre_encoder(self):
+        pass
     def get_ordered_terms(self):
         """
         Returns the top K terms for each topic based on sensitivity analysis. Terms whose
@@ -360,7 +363,7 @@ class CoherenceRegularizer(nn.Module):
 class BaseSeqBowVED(BaseVAE):
     def __init__(self,
                  llm,
-                 latent_dist,
+                 latent_dist: BaseDistribution,
                  num_classes=0,
                  dropout=0.0,
                  vocab_size=2000,
@@ -401,6 +404,11 @@ class BaseSeqBowVED(BaseVAE):
             return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
         else:
             return model_output.last_hidden_state[:,0,:]
+    def freeze_pre_encoder(self):
+        for p in self.llm.parameters():
+            p.requires_grad = False
+        self.latent_distribution.freeze_pre_encoder()
     def get_ordered_terms(self):
         """
@@ -447,6 +455,7 @@ class SeqBowVED(BaseSeqBowVED):
             self.classifier = torch.nn.Sequential()
             self.classifier.add_module("dr", nn.Dropout(self.dropout).to(self.device))
             self.classifier.add_module("l_out", nn.Linear(self.n_latent, self.num_classes).to(self.device))
     def forward(self, input_ids, attention_mask, bow=None):  # pylint: disable=arguments-differ
         llm_output = self.llm(input_ids, attention_mask)

{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: tmnt
-Version: 0.7.57
+Version: 0.7.58
 Summary: Topic modeling neural toolkit
 Home-page: https://github.com/mitre/tmnt.git
 Author: The MITRE Corporation
@@ -40,6 +40,7 @@ Dynamic: description
 Dynamic: description-content-type
 Dynamic: home-page
 Dynamic: license
+Dynamic: license-file
 Dynamic: requires-dist
 Dynamic: requires-python
 Dynamic: summary

{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
 tmnt/__init__.py,sha256=EPNq1H7UMyMewWT_zTGBaC7ZouvCywX_gMX4G1dtmvw,250
 tmnt/configuration.py,sha256=P8PEhzVPKO5xG0FrdTLRQ60OYWigbzPY-OSx_hzQlrY,10054
-tmnt/data_loading.py,sha256=zB3wIBXgl_UKjjRLQgPwCZOVTcjHK4YahxCbsLd70RY,19238
-tmnt/distribution.py,sha256=2YBfaGIiUJc-OjKaotnKmicSEdL4OAGBx3icacbePQ8,14868
-tmnt/estimator.py,sha256=qh-pCbmhhtGpRKKQv10ANyQakuoMYaVH87NM5UIxtyM,67777
+tmnt/data_loading.py,sha256=LcVcXX00UsuAillRPILcvmqj3AsCIgzB6V_S6lfsbIY,19335
+tmnt/distribution.py,sha256=4gn1wnszVAErzICCvZXSYki0G78WC3_jyBr27N-Aj3E,15108
+tmnt/estimator.py,sha256=KnnvSNXm6cRL0GwDrGdgqqPX5ZubpCQ0WqcSXJDkUU4,68072
 tmnt/eval_npmi.py,sha256=8S-IE-bEhtQofF6oKeXs7oaUeu-7yDlaEqjMj52gmNQ,6549
-tmnt/inference.py,sha256=da8qAnjTDTuWQfPEOQewOfgikqE00XT1xGMiO2mckI4,15679
-tmnt/modeling.py,sha256=QRnHbNFp85LKp5ILYsJqTeQ3BV0jLPCwKX1Eh-Ed3Dc,29975
+tmnt/inference.py,sha256=Iwc2_w7QrS1epiVEm_Ewx5sYFNNMDfvhMJETOgJqm0E,15783
+tmnt/modeling.py,sha256=rGHQsW7ldycFUd1f9NzcnNuSRElr600vLwmYPl6YY0M,30215
 tmnt/preprocess/__init__.py,sha256=gwMejkQrnqKS05i0JVsUru2hDUR5jE1hKC10dL934GU,170
 tmnt/preprocess/tokenizer.py,sha256=-ZgowfbHrM040vbNTktZM_hdl6HDTqxSJ4mDAxq3dUs,14050
 tmnt/preprocess/vectorizer.py,sha256=RaianZ_DG3Nc-RI96FtmI4PCZPi5Nipx9a5xndLZ52M,20689
@@ -18,9 +18,9 @@ tmnt/utils/pubmed_utils.py,sha256=3sHwoun7vxb0GV-arhpXLMUbAZne0huAh9xQNy6H40E,12
 tmnt/utils/random.py,sha256=qY75WG3peWoMh9pUyCPBEo6q8IvkF6VRjeb5CqJOBF8,327
 tmnt/utils/recalibrate.py,sha256=TmpB8An8bslICZ13UTJfIvr8VoqiSedtpHxec4n8CHk,1439
 tmnt/utils/vocab.py,sha256=J6GFGLyvDgdmtVQjYlyzWjuykRD3kllCKPG1z0lI0P8,3504
-tmnt-0.7.57.dist-info/LICENSE,sha256=qFZJrfJ7Zi4IXDiyiGVrHWic_l1h2tc36tI8Z7rK9bs,11356
-tmnt-0.7.57.dist-info/METADATA,sha256=EDNrl4p3d9j2UXPwENrMAp0EgaRQuJCBGFvXdYoJTmI,1641
-tmnt-0.7.57.dist-info/NOTICE,sha256=p0kYIVAkReTFaGb4C-qPa7h5ztze6hGzOpjCMMbOipU,425
-tmnt-0.7.57.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
-tmnt-0.7.57.dist-info/top_level.txt,sha256=RpYgUl187sXnqmiwKjZZdcDlHz2AALs6bGdUcukyd_E,5
-tmnt-0.7.57.dist-info/RECORD,,
+tmnt-0.7.58.dist-info/licenses/LICENSE,sha256=qFZJrfJ7Zi4IXDiyiGVrHWic_l1h2tc36tI8Z7rK9bs,11356
+tmnt-0.7.58.dist-info/licenses/NOTICE,sha256=p0kYIVAkReTFaGb4C-qPa7h5ztze6hGzOpjCMMbOipU,425
+tmnt-0.7.58.dist-info/METADATA,sha256=drdqhfVdpDs5LD_FMAMZjPRWw_TnNqFlGsh0QGtm8QE,1663
+tmnt-0.7.58.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+tmnt-0.7.58.dist-info/top_level.txt,sha256=RpYgUl187sXnqmiwKjZZdcDlHz2AALs6bGdUcukyd_E,5
+tmnt-0.7.58.dist-info/RECORD,,

{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (76.0.0)
+Generator: setuptools (78.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info/licenses}/NOTICE RENAMED Viewed

File without changes

{tmnt-0.7.57.dist-info → tmnt-0.7.58.dist-info}/top_level.txt RENAMED Viewed

File without changes

tmnt 0.7.57__py3-none-any.whl → 0.7.58__py3-none-any.whl

tmnt 0.7.57py3-none-any.whl → 0.7.58py3-none-any.whl