PyPI - torchtextclassifiers - Versions diffs - 1.0.0__tar.gz → 1.0.1__tar.gz - Mend

torchtextclassifiers 1.0.0tar.gz → 1.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: torchtextclassifiers
-Version: 1.0.0
+Version: 1.0.1
 Summary: A text classification toolkit to easily build, train and evaluate deep learning text classifiers using PyTorch.
 Keywords: fastText,text classification,NLP,automatic coding,deep learning
 Author: Cédric Couralet, Meilame Tayebjee

{torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/pyproject.toml RENAMED Viewed

@@ -18,7 +18,7 @@ dependencies = [
     "pytorch-lightning>=2.4.0",
 ]
 requires-python = ">=3.11"
-version="1.0.0"
+version="1.0.1"
 [dependency-groups]

{torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/torchTextClassifiers.py RENAMED Viewed

@@ -198,6 +198,12 @@ class torchTextClassifiers:
         - Model training with early stopping
         - Best model loading after training
+        Note on Checkpoints:
+            After training, the best model checkpoint is automatically loaded.
+            This checkpoint contains the full training state (model weights,
+            optimizer, and scheduler state). Loading uses weights_only=False
+            as the checkpoint is self-generated and trusted.
         Args:
             X_train: Training input data
             y_train: Training labels
@@ -361,6 +367,7 @@ class torchTextClassifiers:
             best_model_path,
             model=self.pytorch_model,
             loss=training_config.loss,
+            weights_only=False,  # Required: checkpoint contains optimizer/scheduler state
         )
         self.pytorch_model = self.lightning_module.model.to(self.device)

{torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/utilities/plot_explainability.py RENAMED Viewed

@@ -53,8 +53,18 @@ def map_attributions_to_char(attributions, offsets, text):
         np.exp(attributions_per_char), axis=1, keepdims=True
     )  # softmax normalization
+def get_id_to_word(text, word_ids, offsets):
+    words = {}
+    for idx, word_id in enumerate(word_ids):
+        if word_id is None:
+            continue
+        start, end = offsets[idx]
+        words[int(word_id)] = text[start:end]
+    return words
-def map_attributions_to_word(attributions, word_ids):
+def map_attributions_to_word(attributions, text, word_ids, offsets):
     """
     Maps token-level attributions to word-level attributions based on word IDs.
     Args:
@@ -69,8 +79,9 @@ def map_attributions_to_word(attributions, word_ids):
         np.ndarray: Array of shape (top_k, num_words) containing word-level attributions.
             num_words is the number of unique words in the original text.
     """
     word_ids = np.array(word_ids)
+    words = get_id_to_word(text, word_ids, offsets)
     # Convert None to -1 for easier processing (PAD tokens)
     word_ids_int = np.array([x if x is not None else -1 for x in word_ids], dtype=int)
@@ -99,7 +110,7 @@ def map_attributions_to_word(attributions, word_ids):
         )  # zero-out non-matching tokens and sum attributions for all tokens belonging to the same word
     # assert word_attributions.sum(axis=1) == attributions.sum(axis=1), "Sum of word attributions per top_k must equal sum of token attributions per top_k."
-    return np.exp(word_attributions) / np.sum(
+    return words, np.exp(word_attributions) / np.sum(
         np.exp(word_attributions), axis=1, keepdims=True
     )  # softmax normalization
@@ -131,7 +142,7 @@ def plot_attributions_at_char(
         fig, ax = plt.subplots(figsize=figsize)
         ax.bar(range(len(text)), attributions_per_char[i])
         ax.set_xticks(np.arange(len(text)))
-        ax.set_xticklabels(list(text), rotation=90)
+        ax.set_xticklabels(list(text), rotation=45)
         title = titles[i] if titles is not None else f"Attributions for Top {i+1} Prediction"
         ax.set_title(title)
         ax.set_xlabel("Characters in Text")
@@ -142,7 +153,7 @@ def plot_attributions_at_char(
 def plot_attributions_at_word(
-    text, attributions_per_word, figsize=(10, 2), titles: Optional[List[str]] = None
+    text, words, attributions_per_word, figsize=(10, 2), titles: Optional[List[str]] = None
 ):
     """
     Plots word-level attributions as a heatmap.
@@ -159,14 +170,13 @@ def plot_attributions_at_word(
             "matplotlib is required for plotting. Please install it to use this function."
         )
-    words = text.split()
     top_k = attributions_per_word.shape[0]
     all_plots = []
     for i in range(top_k):
         fig, ax = plt.subplots(figsize=figsize)
         ax.bar(range(len(words)), attributions_per_word[i])
         ax.set_xticks(np.arange(len(words)))
-        ax.set_xticklabels(words, rotation=90)
+        ax.set_xticklabels(words, rotation=45)
         title = titles[i] if titles is not None else f"Attributions for Top {i+1} Prediction"
         ax.set_title(title)
         ax.set_xlabel("Words in Text")