torchtextclassifiers 1.0.0__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/PKG-INFO +1 -1
  2. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/pyproject.toml +1 -1
  3. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/torchTextClassifiers.py +7 -0
  4. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/utilities/plot_explainability.py +17 -7
  5. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/README.md +0 -0
  6. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/__init__.py +0 -0
  7. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/dataset/__init__.py +0 -0
  8. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/dataset/dataset.py +0 -0
  9. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/model/__init__.py +0 -0
  10. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/model/components/__init__.py +0 -0
  11. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/model/components/attention.py +0 -0
  12. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/model/components/categorical_var_net.py +0 -0
  13. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/model/components/classification_head.py +0 -0
  14. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/model/components/text_embedder.py +0 -0
  15. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/model/lightning.py +0 -0
  16. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/model/model.py +0 -0
  17. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/tokenizers/WordPiece.py +0 -0
  18. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/tokenizers/__init__.py +0 -0
  19. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/tokenizers/base.py +0 -0
  20. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/tokenizers/ngram.py +0 -0
  21. {torchtextclassifiers-1.0.0 → torchtextclassifiers-1.0.1}/torchTextClassifiers/utilities/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: torchtextclassifiers
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: A text classification toolkit to easily build, train and evaluate deep learning text classifiers using PyTorch.
5
5
  Keywords: fastText,text classification,NLP,automatic coding,deep learning
6
6
  Author: Cédric Couralet, Meilame Tayebjee
@@ -18,7 +18,7 @@ dependencies = [
18
18
  "pytorch-lightning>=2.4.0",
19
19
  ]
20
20
  requires-python = ">=3.11"
21
- version="1.0.0"
21
+ version="1.0.1"
22
22
 
23
23
 
24
24
  [dependency-groups]
@@ -198,6 +198,12 @@ class torchTextClassifiers:
198
198
  - Model training with early stopping
199
199
  - Best model loading after training
200
200
 
201
+ Note on Checkpoints:
202
+ After training, the best model checkpoint is automatically loaded.
203
+ This checkpoint contains the full training state (model weights,
204
+ optimizer, and scheduler state). Loading uses weights_only=False
205
+ as the checkpoint is self-generated and trusted.
206
+
201
207
  Args:
202
208
  X_train: Training input data
203
209
  y_train: Training labels
@@ -361,6 +367,7 @@ class torchTextClassifiers:
361
367
  best_model_path,
362
368
  model=self.pytorch_model,
363
369
  loss=training_config.loss,
370
+ weights_only=False, # Required: checkpoint contains optimizer/scheduler state
364
371
  )
365
372
 
366
373
  self.pytorch_model = self.lightning_module.model.to(self.device)
@@ -53,8 +53,18 @@ def map_attributions_to_char(attributions, offsets, text):
53
53
  np.exp(attributions_per_char), axis=1, keepdims=True
54
54
  ) # softmax normalization
55
55
 
56
+ def get_id_to_word(text, word_ids, offsets):
57
+ words = {}
58
+ for idx, word_id in enumerate(word_ids):
59
+ if word_id is None:
60
+ continue
61
+ start, end = offsets[idx]
62
+ words[int(word_id)] = text[start:end]
63
+
64
+ return words
65
+
56
66
 
57
- def map_attributions_to_word(attributions, word_ids):
67
+ def map_attributions_to_word(attributions, text, word_ids, offsets):
58
68
  """
59
69
  Maps token-level attributions to word-level attributions based on word IDs.
60
70
  Args:
@@ -69,8 +79,9 @@ def map_attributions_to_word(attributions, word_ids):
69
79
  np.ndarray: Array of shape (top_k, num_words) containing word-level attributions.
70
80
  num_words is the number of unique words in the original text.
71
81
  """
72
-
82
+
73
83
  word_ids = np.array(word_ids)
84
+ words = get_id_to_word(text, word_ids, offsets)
74
85
 
75
86
  # Convert None to -1 for easier processing (PAD tokens)
76
87
  word_ids_int = np.array([x if x is not None else -1 for x in word_ids], dtype=int)
@@ -99,7 +110,7 @@ def map_attributions_to_word(attributions, word_ids):
99
110
  ) # zero-out non-matching tokens and sum attributions for all tokens belonging to the same word
100
111
 
101
112
  # assert word_attributions.sum(axis=1) == attributions.sum(axis=1), "Sum of word attributions per top_k must equal sum of token attributions per top_k."
102
- return np.exp(word_attributions) / np.sum(
113
+ return words, np.exp(word_attributions) / np.sum(
103
114
  np.exp(word_attributions), axis=1, keepdims=True
104
115
  ) # softmax normalization
105
116
 
@@ -131,7 +142,7 @@ def plot_attributions_at_char(
131
142
  fig, ax = plt.subplots(figsize=figsize)
132
143
  ax.bar(range(len(text)), attributions_per_char[i])
133
144
  ax.set_xticks(np.arange(len(text)))
134
- ax.set_xticklabels(list(text), rotation=90)
145
+ ax.set_xticklabels(list(text), rotation=45)
135
146
  title = titles[i] if titles is not None else f"Attributions for Top {i+1} Prediction"
136
147
  ax.set_title(title)
137
148
  ax.set_xlabel("Characters in Text")
@@ -142,7 +153,7 @@ def plot_attributions_at_char(
142
153
 
143
154
 
144
155
  def plot_attributions_at_word(
145
- text, attributions_per_word, figsize=(10, 2), titles: Optional[List[str]] = None
156
+ text, words, attributions_per_word, figsize=(10, 2), titles: Optional[List[str]] = None
146
157
  ):
147
158
  """
148
159
  Plots word-level attributions as a heatmap.
@@ -159,14 +170,13 @@ def plot_attributions_at_word(
159
170
  "matplotlib is required for plotting. Please install it to use this function."
160
171
  )
161
172
 
162
- words = text.split()
163
173
  top_k = attributions_per_word.shape[0]
164
174
  all_plots = []
165
175
  for i in range(top_k):
166
176
  fig, ax = plt.subplots(figsize=figsize)
167
177
  ax.bar(range(len(words)), attributions_per_word[i])
168
178
  ax.set_xticks(np.arange(len(words)))
169
- ax.set_xticklabels(words, rotation=90)
179
+ ax.set_xticklabels(words, rotation=45)
170
180
  title = titles[i] if titles is not None else f"Attributions for Top {i+1} Prediction"
171
181
  ax.set_title(title)
172
182
  ax.set_xlabel("Words in Text")