PyPI - tmnt - Versions diffs - 0.7.44b20240123__py3-none-any.whl → 0.7.44b20240125__py3-none-any.whl - Mend

tmnt 0.7.44b20240123py3-none-any.whl → 0.7.44b20240125py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

tmnt/data_loading.py CHANGED Viewed

@@ -38,7 +38,8 @@ llm_catalog = {
     'bert-base-uncased' : (AutoTokenizer.from_pretrained, BertModel.from_pretrained),
     'openai-gpt' : (AutoTokenizer.from_pretrained, OpenAIGPTModel.from_pretrained),
     'sentence-transformers/all-mpnet-base-v2' : (AutoTokenizer.from_pretrained, AutoModel.from_pretrained),
-    'allenai/scibert_scivocab_uncased': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained)
+    'allenai/scibert_scivocab_uncased': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained),
+    'johngiorgi/declutr-sci-base': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained)
     ## add more model options here if desired
     }

tmnt/estimator.py CHANGED Viewed

@@ -1116,6 +1116,7 @@ class SeqBowEstimator(BaseEstimator):
                  pure_classifier_objective = False,
                  validate_each_epoch = False,
                  entropy_loss_coef = 1000.0,
+                 pool_encoder = True,
                  **kwargs):
         super(SeqBowEstimator, self).__init__(*args, **kwargs)
         self.pure_classifier_objective = pure_classifier_objective
@@ -1135,6 +1136,7 @@ class SeqBowEstimator(BaseEstimator):
         self.decoder_lr = decoder_lr
         self._bow_matrix = None
         self.entropy_loss_coef = entropy_loss_coef
+        self.pool_encoder = pool_encoder
     @classmethod
@@ -1217,7 +1219,7 @@ class SeqBowEstimator(BaseEstimator):
     def _get_model(self):
         llm_base_model = get_llm_model(self.llm_model_name).to(self.device)
         model = SeqBowVED(llm_base_model, self.latent_distribution, num_classes=self.n_labels, device=self.device,
-                          vocab_size = len(self.vocabulary), use_pooling = (self.llm_model_name.startswith("sentence-transformers")),
+                          vocab_size = len(self.vocabulary), use_pooling = self.pool_encoder,
                           entropy_loss_coef=self.entropy_loss_coef,
                           dropout=self.classifier_dropout)
         return model
@@ -1443,6 +1445,9 @@ class SeqBowEstimator(BaseEstimator):
             if class_ls is not None:
                 loss_details['class_loss'] += float(class_ls.mean())
+        sc_obj = None
+        v_res  = None
         for epoch_id in range(self.epochs):
             if self.metric is not None:
                 self.metric.reset()
@@ -1570,10 +1575,12 @@ class SeqBowEstimator(BaseEstimator):
 class SeqBowMetricEstimator(SeqBowEstimator):
-    def __init__(self, *args, sdml_smoothing_factor=0.3, metric_loss_temp=0.1, non_scoring_index=-1, **kwargs):
+    def __init__(self, *args, sdml_smoothing_factor=0.3, metric_loss_temp=0.1,
+                 use_sdml=False, non_scoring_index=-1, **kwargs):
         super(SeqBowMetricEstimator, self).__init__(*args, **kwargs)
-        #self.loss_function = GeneralizedSDMLLoss(smoothing_parameter=sdml_smoothing_factor, x2_downweight_idx=non_scoring_index)
-        self.loss_function = MultiNegativeCrossEntropyLoss(smoothing_parameter=sdml_smoothing_factor, metric_loss_temp=metric_loss_temp)
+        self.loss_function = \
+            GeneralizedSDMLLoss(smoothing_parameter=sdml_smoothing_factor, x2_downweight_idx=non_scoring_index) if use_sdml \
+            else MultiNegativeCrossEntropyLoss(smoothing_parameter=sdml_smoothing_factor, metric_loss_temp=metric_loss_temp)
         self.non_scoring_index = non_scoring_index ## if >=0 this will avoid considering this label index in evaluation

tmnt/modeling.py CHANGED Viewed

@@ -473,7 +473,7 @@ class BaseSeqBowVED(BaseVAE):
                  vocab_size=2000,
                  kld=0.1,
                  device='cpu',
-                 use_pooling=False,
+                 use_pooling=True,
                  entropy_loss_coef=1000.0,
                  redundancy_reg_penalty=0.0, pre_trained_embedding = None):
         super(BaseSeqBowVED, self).__init__(device=device, vocab_size=vocab_size)
@@ -493,7 +493,9 @@ class BaseSeqBowVED(BaseVAE):
         if pre_trained_embedding is not None:
             self.embedding = nn.Linear(len(pre_trained_embedding.idx_to_vec),
                                            pre_trained_embedding.idx_to_vec[0].size, bias=False)
-        self.apply(self._init_weights)
+        #self.apply(self._init_weights)
+        self.latent_distribution.apply(self._init_weights)
+        self.decoder.apply(self._init_weights)
     def _init_weights(self, module):
         if isinstance(module, torch.nn.Linear):

{tmnt-0.7.44b20240123.dist-info → tmnt-0.7.44b20240125.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tmnt
-Version: 0.7.44b20240123
+Version: 0.7.44b20240125
 Summary: Topic modeling neural toolkit
 Home-page: https://github.com/mitre/tmnt.git
 Author: The MITRE Corporation

{tmnt-0.7.44b20240123.dist-info → tmnt-0.7.44b20240125.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
 tmnt/__init__.py,sha256=EPNq1H7UMyMewWT_zTGBaC7ZouvCywX_gMX4G1dtmvw,250
 tmnt/configuration.py,sha256=P8PEhzVPKO5xG0FrdTLRQ60OYWigbzPY-OSx_hzQlrY,10054
-tmnt/data_loading.py,sha256=_NpAwmpeFBoQp7xtWOLb6i3WS271JoSJqDx9BMrXtKM,18207
+tmnt/data_loading.py,sha256=B47kfq5nrpw2bHYT2qEv2tpCLT7EFwqD7ZDjsoBto_Q,18303
 tmnt/distribution.py,sha256=Pmyc5gwDd_-jP7vLVb0vdNQaSSvF1EuiTZEWg3KfmI8,10866
-tmnt/estimator.py,sha256=xk4QATqqD8ukxtraOQ6BvSJrdqGTQvX52fNdcgfQ3w8,77801
+tmnt/estimator.py,sha256=IIXjtKB09qUqL_lDiDbhd5IVsW7hLuCHo82fF27xp64,77942
 tmnt/eval_npmi.py,sha256=ODRDMsBgDM__iCNEX399ck7bAhl7ydvgDqmpfR7Y-q4,5048
 tmnt/inference.py,sha256=Sw7GO7QiWVEtbPJKBjFB7AiKRmUOZbFZn3tCrsStzWw,17845
-tmnt/modeling.py,sha256=NTgjTqvi3sUsEfQa8Kq8lGW3vST905B8OkNhQmNwpwA,32841
+tmnt/modeling.py,sha256=Q-CSN0oaftf6RhM3Y3zKk4xw1Wd_WeZmPexZy8nk2Nw,32947
 tmnt/preprocess/__init__.py,sha256=gwMejkQrnqKS05i0JVsUru2hDUR5jE1hKC10dL934GU,170
 tmnt/preprocess/tokenizer.py,sha256=-ZgowfbHrM040vbNTktZM_hdl6HDTqxSJ4mDAxq3dUs,14050
 tmnt/preprocess/vectorizer.py,sha256=RkdivqP76qAJDianV09lONad9NbfBVWLZgIbU_P1-zo,15796
@@ -17,9 +17,9 @@ tmnt/utils/ngram_helpers.py,sha256=VrIzou2oQHCLBLSWODDeikN3PYat1NqqvEeYQj_GhbA,1
 tmnt/utils/pubmed_utils.py,sha256=3sHwoun7vxb0GV-arhpXLMUbAZne0huAh9xQNy6H40E,1274
 tmnt/utils/random.py,sha256=qY75WG3peWoMh9pUyCPBEo6q8IvkF6VRjeb5CqJOBF8,327
 tmnt/utils/recalibrate.py,sha256=TmpB8An8bslICZ13UTJfIvr8VoqiSedtpHxec4n8CHk,1439
-tmnt-0.7.44b20240123.dist-info/LICENSE,sha256=qFZJrfJ7Zi4IXDiyiGVrHWic_l1h2tc36tI8Z7rK9bs,11356
-tmnt-0.7.44b20240123.dist-info/METADATA,sha256=BvdBJQro8PU8RZPCKCXaK7-Ui30wTQDAfK-hNQT0qlE,1403
-tmnt-0.7.44b20240123.dist-info/NOTICE,sha256=p0kYIVAkReTFaGb4C-qPa7h5ztze6hGzOpjCMMbOipU,425
-tmnt-0.7.44b20240123.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-tmnt-0.7.44b20240123.dist-info/top_level.txt,sha256=RpYgUl187sXnqmiwKjZZdcDlHz2AALs6bGdUcukyd_E,5
-tmnt-0.7.44b20240123.dist-info/RECORD,,
+tmnt-0.7.44b20240125.dist-info/LICENSE,sha256=qFZJrfJ7Zi4IXDiyiGVrHWic_l1h2tc36tI8Z7rK9bs,11356
+tmnt-0.7.44b20240125.dist-info/METADATA,sha256=0duXA_NTiacN4bKgC10fnqMdPeOfVEHqy9EDz7EqquU,1403
+tmnt-0.7.44b20240125.dist-info/NOTICE,sha256=p0kYIVAkReTFaGb4C-qPa7h5ztze6hGzOpjCMMbOipU,425
+tmnt-0.7.44b20240125.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+tmnt-0.7.44b20240125.dist-info/top_level.txt,sha256=RpYgUl187sXnqmiwKjZZdcDlHz2AALs6bGdUcukyd_E,5
+tmnt-0.7.44b20240125.dist-info/RECORD,,

{tmnt-0.7.44b20240123.dist-info → tmnt-0.7.44b20240125.dist-info}/LICENSE RENAMED Viewed

File without changes

{tmnt-0.7.44b20240123.dist-info → tmnt-0.7.44b20240125.dist-info}/NOTICE RENAMED Viewed

File without changes

{tmnt-0.7.44b20240123.dist-info → tmnt-0.7.44b20240125.dist-info}/WHEEL RENAMED Viewed

File without changes

{tmnt-0.7.44b20240123.dist-info → tmnt-0.7.44b20240125.dist-info}/top_level.txt RENAMED Viewed

File without changes

tmnt 0.7.44b20240123__py3-none-any.whl → 0.7.44b20240125__py3-none-any.whl

tmnt 0.7.44b20240123py3-none-any.whl → 0.7.44b20240125py3-none-any.whl