PyPI - torchtextclassifiers - Versions diffs - 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl - Mend

torchtextclassifiers 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

torchTextClassifiers/model/components/text_embedder.py CHANGED Viewed

@@ -23,6 +23,9 @@ class TextEmbedder(nn.Module):
         self.config = text_embedder_config
         self.attention_config = text_embedder_config.attention_config
+        if isinstance(self.attention_config, dict):
+            self.attention_config = AttentionConfig(**self.attention_config)
         if self.attention_config is not None:
             self.attention_config.n_embd = text_embedder_config.embedding_dim

torchTextClassifiers/model/lightning.py CHANGED Viewed

@@ -36,7 +36,7 @@ class TextClassificationModule(pl.LightningModule):
             scheduler_interval: Scheduler interval.
         """
         super().__init__()
-        self.save_hyperparameters(ignore=["model", "loss"])
+        self.save_hyperparameters(ignore=["model"])
         self.model = model
         self.loss = loss

torchTextClassifiers/tokenizers/__init__.py CHANGED Viewed

@@ -7,4 +7,6 @@ from .base import (
 )
 from .base import TokenizerOutput as TokenizerOutput
 from .ngram import NGramTokenizer as NGramTokenizer
-from .WordPiece import WordPieceTokenizer as WordPieceTokenizer
+if HAS_HF:
+    from .WordPiece import WordPieceTokenizer as WordPieceTokenizer

torchTextClassifiers/torchTextClassifiers.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import logging
+import pickle
 import time
 from dataclasses import asdict, dataclass, field
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, Type, Union
 try:
@@ -75,6 +77,7 @@ class TrainingConfig:
     trainer_params: Optional[dict] = None
     optimizer_params: Optional[dict] = None
     scheduler_params: Optional[dict] = None
+    save_path: Optional[str] = "my_ttc"
     def to_dict(self) -> Dict[str, Any]:
         data = asdict(self)
@@ -362,6 +365,7 @@ class torchTextClassifiers:
             logger.info(f"Training completed in {end - start:.2f} seconds.")
         best_model_path = trainer.checkpoint_callback.best_model_path
+        self.checkpoint_path = best_model_path
         self.lightning_module = TextClassificationModule.load_from_checkpoint(
             best_model_path,
@@ -372,6 +376,9 @@ class torchTextClassifiers:
         self.pytorch_model = self.lightning_module.model.to(self.device)
+        self.save_path = training_config.save_path
+        self.save(self.save_path)
         self.lightning_module.eval()
     def _check_XY(self, X: np.ndarray, Y: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
@@ -576,6 +583,122 @@ class torchTextClassifiers:
                 "confidence": confidence,
             }
+    def save(self, path: Union[str, Path]) -> None:
+        """Save the complete torchTextClassifiers instance to disk.
+        This saves:
+        - Model configuration
+        - Tokenizer state
+        - PyTorch Lightning checkpoint (if trained)
+        - All other instance attributes
+        Args:
+            path: Directory path where the model will be saved
+        Example:
+            >>> ttc = torchTextClassifiers(tokenizer, model_config)
+            >>> ttc.train(X_train, y_train, training_config)
+            >>> ttc.save("my_model")
+        """
+        path = Path(path)
+        path.mkdir(parents=True, exist_ok=True)
+        # Save the checkpoint if model has been trained
+        checkpoint_path = None
+        if hasattr(self, "lightning_module"):
+            checkpoint_path = path / "model_checkpoint.ckpt"
+            # Save the current state as a checkpoint
+            trainer = pl.Trainer()
+            trainer.strategy.connect(self.lightning_module)
+            trainer.save_checkpoint(checkpoint_path)
+        # Prepare metadata to save
+        metadata = {
+            "model_config": self.model_config.to_dict(),
+            "ragged_multilabel": self.ragged_multilabel,
+            "vocab_size": self.vocab_size,
+            "embedding_dim": self.embedding_dim,
+            "categorical_vocabulary_sizes": self.categorical_vocabulary_sizes,
+            "num_classes": self.num_classes,
+            "checkpoint_path": str(checkpoint_path) if checkpoint_path else None,
+            "device": str(self.device) if hasattr(self, "device") else None,
+        }
+        # Save metadata
+        with open(path / "metadata.pkl", "wb") as f:
+            pickle.dump(metadata, f)
+        # Save tokenizer
+        tokenizer_path = path / "tokenizer.pkl"
+        with open(tokenizer_path, "wb") as f:
+            pickle.dump(self.tokenizer, f)
+        logger.info(f"Model saved successfully to {path}")
+    @classmethod
+    def load(cls, path: Union[str, Path], device: str = "auto") -> "torchTextClassifiers":
+        """Load a torchTextClassifiers instance from disk.
+        Args:
+            path: Directory path where the model was saved
+            device: Device to load the model on ('auto', 'cpu', 'cuda', etc.)
+        Returns:
+            Loaded torchTextClassifiers instance
+        Example:
+            >>> loaded_ttc = torchTextClassifiers.load("my_model")
+            >>> predictions = loaded_ttc.predict(X_test)
+        """
+        path = Path(path)
+        if not path.exists():
+            raise FileNotFoundError(f"Model directory not found: {path}")
+        # Load metadata
+        with open(path / "metadata.pkl", "rb") as f:
+            metadata = pickle.load(f)
+        # Load tokenizer
+        with open(path / "tokenizer.pkl", "rb") as f:
+            tokenizer = pickle.load(f)
+        # Reconstruct model_config
+        model_config = ModelConfig.from_dict(metadata["model_config"])
+        # Create instance
+        instance = cls(
+            tokenizer=tokenizer,
+            model_config=model_config,
+            ragged_multilabel=metadata["ragged_multilabel"],
+        )
+        # Set device
+        if device == "auto":
+            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        else:
+            device = torch.device(device)
+        instance.device = device
+        # Load checkpoint if it exists
+        if metadata["checkpoint_path"]:
+            checkpoint_path = path / "model_checkpoint.ckpt"
+            if checkpoint_path.exists():
+                # Load the checkpoint with weights_only=False since it's our own trusted checkpoint
+                instance.lightning_module = TextClassificationModule.load_from_checkpoint(
+                    str(checkpoint_path),
+                    model=instance.pytorch_model,
+                    weights_only=False,
+                )
+                instance.pytorch_model = instance.lightning_module.model.to(device)
+                instance.checkpoint_path = str(checkpoint_path)
+                logger.info(f"Model checkpoint loaded from {checkpoint_path}")
+            else:
+                logger.warning(f"Checkpoint file not found at {checkpoint_path}")
+        logger.info(f"Model loaded successfully from {path}")
+        return instance
     def __repr__(self):
         model_type = (
             self.lightning_module.__repr__()

{torchtextclassifiers-1.0.1.dist-info → torchtextclassifiers-1.0.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: torchtextclassifiers
-Version: 1.0.1
+Version: 1.0.2
 Summary: A text classification toolkit to easily build, train and evaluate deep learning text classifiers using PyTorch.
 Keywords: fastText,text classification,NLP,automatic coding,deep learning
 Author: Cédric Couralet, Meilame Tayebjee

{torchtextclassifiers-1.0.1.dist-info → torchtextclassifiers-1.0.2.dist-info}/RECORD RENAMED Viewed

@@ -6,16 +6,16 @@ torchTextClassifiers/model/components/__init__.py,sha256=-IT_6fCHZkRw6Hu7GdVeCt6
 torchTextClassifiers/model/components/attention.py,sha256=hhSMh_CvpR-hiP8hoCg4Fr_TovGlJpC_RHs3iW-Pnpc,4199
 torchTextClassifiers/model/components/categorical_var_net.py,sha256=no0QDidKCw1rlbJzD7S-Srhzn5P6vETGRT5Er-gzMnM,5699
 torchTextClassifiers/model/components/classification_head.py,sha256=myuEc5wFQ5gw_f519cUZ1Z7AMuQF7Vshq_B3aRt5xRE,2501
-torchTextClassifiers/model/components/text_embedder.py,sha256=tY2pXAt4IvayyvRpjiKGg5vGz_Q2-p_TOL6Jg2p8hYE,9058
-torchTextClassifiers/model/lightning.py,sha256=dOJzyGbqwFxriAtrIjC14E1f107YMtpiR65-OJy_Pc4,5367
+torchTextClassifiers/model/components/text_embedder.py,sha256=qInHVQfjxN1zBGSNNv_9Ku4EwjntWLazjasoHhFn_yI,9188
+torchTextClassifiers/model/lightning.py,sha256=dJEH_cPPh089v4hwLuyZuXe2QxIwWOqecsXqEYrsIHU,5359
 torchTextClassifiers/model/model.py,sha256=jjGjvK7C2Wly0e4S6gTC8Ty8y-o8reU-aniBqYS73Cc,6100
 torchTextClassifiers/tokenizers/WordPiece.py,sha256=HMHYV2SiwShlhWMQ6LXH4MtZE5GSsaNA2DlD340ABGE,3289
-torchTextClassifiers/tokenizers/__init__.py,sha256=I8IQ2-t85RVlZFwLjDFF_Te2S9uiwlymQDWx-3GeF-Y,334
+torchTextClassifiers/tokenizers/__init__.py,sha256=rWWIDIQnAL9vS33ygNlZju3A6lpzC8zDiL1GBT_2TWc,350
 torchTextClassifiers/tokenizers/base.py,sha256=OY6GIhI4KTdvvKq3VZowf64H7lAmdQyq4scZ10HxP3A,7570
 torchTextClassifiers/tokenizers/ngram.py,sha256=lHI8dtuCGWh0o7V58TJx_mTVIHm8udl6XuWccxgJPew,16375
-torchTextClassifiers/torchTextClassifiers.py,sha256=ru1gAp3IaNNiV1aMzU_TYxfm81buJLu-NkvrRwUGbEU,23053
+torchTextClassifiers/torchTextClassifiers.py,sha256=_2PpE9OEuNNskwJwMc1Dqu_DP5yp6T-H-C2VOKoKn2I,27683
 torchTextClassifiers/utilities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 torchTextClassifiers/utilities/plot_explainability.py,sha256=uSN6NbbVnnCd7Zy7zCDVM0iBbhx03tXlON6TlNk0tNU,7248
-torchtextclassifiers-1.0.1.dist-info/WHEEL,sha256=xDCZ-UyfvkGuEHPeI7BcJzYKIZzdqN8A8o1M5Om8IyA,79
-torchtextclassifiers-1.0.1.dist-info/METADATA,sha256=Nwp2MD_jexz6zQdwPXIsiLO7GDwTL3qVYK6D57aYMF4,3666
-torchtextclassifiers-1.0.1.dist-info/RECORD,,
+torchtextclassifiers-1.0.2.dist-info/WHEEL,sha256=xDCZ-UyfvkGuEHPeI7BcJzYKIZzdqN8A8o1M5Om8IyA,79
+torchtextclassifiers-1.0.2.dist-info/METADATA,sha256=ztc5fj_-smNTKq6j8CeLU39QRdk8Li8CzgxX1snispU,3666
+torchtextclassifiers-1.0.2.dist-info/RECORD,,

{torchtextclassifiers-1.0.1.dist-info → torchtextclassifiers-1.0.2.dist-info}/WHEEL RENAMED Viewed

File without changes

torchtextclassifiers 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl

torchtextclassifiers 1.0.1py3-none-any.whl → 1.0.2py3-none-any.whl