PyPI - tmnt - Versions diffs - 0.7.44b20240127__py3-none-any.whl → 0.7.46__py3-none-any.whl - Mend

tmnt 0.7.44b20240127py3-none-any.whl → 0.7.46py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

tmnt/data_loading.py CHANGED Viewed

@@ -39,7 +39,8 @@ llm_catalog = {
     'openai-gpt' : (AutoTokenizer.from_pretrained, OpenAIGPTModel.from_pretrained),
     'sentence-transformers/all-mpnet-base-v2' : (AutoTokenizer.from_pretrained, AutoModel.from_pretrained),
     'allenai/scibert_scivocab_uncased': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained),
-    'johngiorgi/declutr-sci-base': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained)
+    'johngiorgi/declutr-sci-base': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained),
+    'BAAI/bge-base-en-v1.5': (AutoTokenizer.from_pretrained, AutoModel.from_pretrained)
     ## add more model options here if desired
     }

tmnt/estimator.py CHANGED Viewed

@@ -1115,7 +1115,7 @@ class SeqBowEstimator(BaseEstimator):
                  classifier_dropout = 0.0,
                  pure_classifier_objective = False,
                  validate_each_epoch = False,
-                 entropy_loss_coef = 1000.0,
+                 entropy_loss_coef = 0.0,
                  pool_encoder = True,
                  **kwargs):
         super(SeqBowEstimator, self).__init__(*args, **kwargs)
@@ -1527,13 +1527,6 @@ class SeqBowEstimator(BaseEstimator):
         else:
             self._output_status("Epoch [{}]. Objective = {} ==> PPL = {}. NPMI ={}. Redundancy = {}."
                                 .format(epoch_id, sc_obj, v_res['ppl'], v_res['npmi'], v_res['redundancy']))
-        #if self.reporter:
-        #if 'accuracy' in v_res:
-            #session.report({"objective": sc_obj, "coherence": v_res['npmi'], "perplexity": v_res['ppl'],
-            #                "redundancy": v_res['redundancy'], "accuracy": v_res['accuracy']})
-        #else:
-            #session.report({"objective": sc_obj, "coherence": v_res['npmi'], "perplexity": v_res['ppl'],
-            #            "redundancy": v_res['redundancy']})
         return sc_obj, v_res
@@ -1610,13 +1603,8 @@ class SeqBowMetricEstimator(SeqBowEstimator):
             bow_batch_b = seqs_b[3].to_dense()
             sums += bow_batch_a.sum(axis=0)
             sums += bow_batch_b.sum(axis=0)
-        return sums.cpu().numpy()                               #def _get_model_bias_initialize(self, train_data):
-    #    model = self._get_model()
-    #    tr_bow_matrix = self._get_bow_matrix(train_data)
-        #model.initialize_bias_terms(tr_bow_matrix.sum(axis=0))
-    #    return model
+        return sums.cpu().numpy()
     def _get_bow_matrix(self, dataloader, cache=False):
         bow_matrix = []
         for _, seqs in enumerate(dataloader):
@@ -1670,10 +1658,5 @@ class SeqBowMetricEstimator(SeqBowEstimator):
         v_res = self.validate(model, dev_data, epoch_id)
         self._output_status("Epoch [{}]. ==> elbo loss = {}; kldiv loss = {}"
                             .format(epoch_id, v_res['elbo_ls'], v_res['kl_ls']))
-        #session.report({"objective": sc_obj, "coherence": v_res['npmi'], "perplexity": v_res['ppl'],
-        #                "redundancy": v_res['redundancy']})
-        #if self.reporter:
-        #    self.reporter(epoch=epoch_id+1, objective=v_res['avg_prec'], time_step=time.time(), coherence=0.0,
-        #                  perplexity=0.0, redundancy=0.0)
         return v_res['kl_ls'], v_res

tmnt/modeling.py CHANGED Viewed

@@ -595,11 +595,11 @@ class MetricSeqBowVED(BaseSeqBowVED):
         elbo = elbo1 + elbo2
         rec_loss = rec_loss1 + rec_loss2
         KL_loss = KL_loss1 + KL_loss2
-        z_mu1 = self.latent_distribution.get_mu_encoding(enc2)
-        z_mu2 = self.latent_distribution.get_mu_encoding(enc2)
+        #z_mu1 = self.latent_distribution.get_mu_encoding(enc2)
+        #z_mu2 = self.latent_distribution.get_mu_encoding(enc2)
         redundancy_loss = entropy_loss1 + entropy_loss2 #self.get_redundancy_penalty()
-        return elbo, rec_loss, KL_loss, redundancy_loss, z_mu1, z_mu2
-        #return elbo, rec_loss, KL_loss, redundancy_loss, enc1, enc2
+        #return elbo, rec_loss, KL_loss, redundancy_loss, z_mu1, z_mu2
+        return elbo, rec_loss, KL_loss, redundancy_loss, enc1, enc2
 class GeneralizedSDMLLoss(_Loss):

{tmnt-0.7.44b20240127.dist-info → tmnt-0.7.46.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tmnt
-Version: 0.7.44b20240127
+Version: 0.7.46
 Summary: Topic modeling neural toolkit
 Home-page: https://github.com/mitre/tmnt.git
 Author: The MITRE Corporation
@@ -9,7 +9,7 @@ License: Apache
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Operating System :: OS Independent
-Requires-Python: >=3.8
+Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
 License-File: NOTICE
@@ -27,7 +27,9 @@ Requires-Dist: pyOpenSSL ==18.0.0
 Requires-Dist: PySocks ==1.6.8
 Requires-Dist: sacremoses >=0.0.38
 Requires-Dist: sentence-splitter ==1.4
-Requires-Dist: umap-learn ==0.4.6
+Requires-Dist: umap-learn[plot] >=0.5.5
+Requires-Dist: numba
+Requires-Dist: scipy
 Requires-Dist: tabulate >=0.8.7
 Requires-Dist: torch >=2.1.2
 Requires-Dist: torchtext >=0.13.0
@@ -35,7 +37,7 @@ Requires-Dist: torchtext >=0.13.0
 The Topic Modeling Neural Toolkit (TMNT) is a software library that enables training
 topic models as neural network-based variational auto-encoders.
-Current stable version is: 0.7.44
+Current stable version is: 0.7.46
 Documentation can be found here: https://tmnt.readthedocs.io/en/stable/

{tmnt-0.7.44b20240127.dist-info → tmnt-0.7.46.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
 tmnt/__init__.py,sha256=EPNq1H7UMyMewWT_zTGBaC7ZouvCywX_gMX4G1dtmvw,250
 tmnt/configuration.py,sha256=P8PEhzVPKO5xG0FrdTLRQ60OYWigbzPY-OSx_hzQlrY,10054
-tmnt/data_loading.py,sha256=B47kfq5nrpw2bHYT2qEv2tpCLT7EFwqD7ZDjsoBto_Q,18303
+tmnt/data_loading.py,sha256=IB7qgoeIY6a4i-YDB7kwWUU3LMvlCGF6_PgzlWDjkc8,18392
 tmnt/distribution.py,sha256=Pmyc5gwDd_-jP7vLVb0vdNQaSSvF1EuiTZEWg3KfmI8,10866
-tmnt/estimator.py,sha256=kQZ42MfOBBZuF0TQVdd9vBlw101ZlXk77mlws2ZvAS4,78014
+tmnt/estimator.py,sha256=cRdA3s3_PmbSU36xYc8cfano_rkqEl9j_0FM3eZ8IA8,76953
 tmnt/eval_npmi.py,sha256=ODRDMsBgDM__iCNEX399ck7bAhl7ydvgDqmpfR7Y-q4,5048
 tmnt/inference.py,sha256=Sw7GO7QiWVEtbPJKBjFB7AiKRmUOZbFZn3tCrsStzWw,17845
-tmnt/modeling.py,sha256=-fvmbT-KXr8luhELnCAOyZ-DUbTUd65cKRNRaH49EKI,33016
+tmnt/modeling.py,sha256=372eAVcnI5xcBYRwSO8N0XK_ECWHwRw7KfuIB8uz3RA,33018
 tmnt/preprocess/__init__.py,sha256=gwMejkQrnqKS05i0JVsUru2hDUR5jE1hKC10dL934GU,170
 tmnt/preprocess/tokenizer.py,sha256=-ZgowfbHrM040vbNTktZM_hdl6HDTqxSJ4mDAxq3dUs,14050
 tmnt/preprocess/vectorizer.py,sha256=RkdivqP76qAJDianV09lONad9NbfBVWLZgIbU_P1-zo,15796
@@ -17,9 +17,9 @@ tmnt/utils/ngram_helpers.py,sha256=VrIzou2oQHCLBLSWODDeikN3PYat1NqqvEeYQj_GhbA,1
 tmnt/utils/pubmed_utils.py,sha256=3sHwoun7vxb0GV-arhpXLMUbAZne0huAh9xQNy6H40E,1274
 tmnt/utils/random.py,sha256=qY75WG3peWoMh9pUyCPBEo6q8IvkF6VRjeb5CqJOBF8,327
 tmnt/utils/recalibrate.py,sha256=TmpB8An8bslICZ13UTJfIvr8VoqiSedtpHxec4n8CHk,1439
-tmnt-0.7.44b20240127.dist-info/LICENSE,sha256=qFZJrfJ7Zi4IXDiyiGVrHWic_l1h2tc36tI8Z7rK9bs,11356
-tmnt-0.7.44b20240127.dist-info/METADATA,sha256=RNb_SRd6cyvKGKSJT1NKTDdjjVVUfhDXqRuFIxmy2dE,1403
-tmnt-0.7.44b20240127.dist-info/NOTICE,sha256=p0kYIVAkReTFaGb4C-qPa7h5ztze6hGzOpjCMMbOipU,425
-tmnt-0.7.44b20240127.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-tmnt-0.7.44b20240127.dist-info/top_level.txt,sha256=RpYgUl187sXnqmiwKjZZdcDlHz2AALs6bGdUcukyd_E,5
-tmnt-0.7.44b20240127.dist-info/RECORD,,
+tmnt-0.7.46.dist-info/LICENSE,sha256=qFZJrfJ7Zi4IXDiyiGVrHWic_l1h2tc36tI8Z7rK9bs,11356
+tmnt-0.7.46.dist-info/METADATA,sha256=KLktIuJoTOtPvY1uML9pgNJwRE-Rxact3yLk092gw7I,1443
+tmnt-0.7.46.dist-info/NOTICE,sha256=p0kYIVAkReTFaGb4C-qPa7h5ztze6hGzOpjCMMbOipU,425
+tmnt-0.7.46.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
+tmnt-0.7.46.dist-info/top_level.txt,sha256=RpYgUl187sXnqmiwKjZZdcDlHz2AALs6bGdUcukyd_E,5
+tmnt-0.7.46.dist-info/RECORD,,

{tmnt-0.7.44b20240127.dist-info → tmnt-0.7.46.dist-info}/LICENSE RENAMED Viewed

File without changes

{tmnt-0.7.44b20240127.dist-info → tmnt-0.7.46.dist-info}/NOTICE RENAMED Viewed

File without changes

{tmnt-0.7.44b20240127.dist-info → tmnt-0.7.46.dist-info}/WHEEL RENAMED Viewed

File without changes

{tmnt-0.7.44b20240127.dist-info → tmnt-0.7.46.dist-info}/top_level.txt RENAMED Viewed

File without changes

tmnt 0.7.44b20240127__py3-none-any.whl → 0.7.46__py3-none-any.whl

tmnt 0.7.44b20240127py3-none-any.whl → 0.7.46py3-none-any.whl