wolof-translate 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/PKG-INFO +1 -1
  2. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/setup.py +1 -1
  3. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/bucket_iterator.py +40 -45
  4. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate.egg-info/PKG-INFO +1 -1
  5. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/setup.cfg +0 -0
  6. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/__init__.py +0 -0
  7. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/__pycache__/__init__.cpython-310.pyc +0 -0
  8. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/__pycache__/__init__.cpython-311.pyc +0 -0
  9. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/__pycache__/dataset_v1.cpython-310.pyc +0 -0
  10. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/__pycache__/sent_transformers.cpython-310.pyc +0 -0
  11. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/__init__.py +0 -0
  12. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/__pycache__/__init__.cpython-310.pyc +0 -0
  13. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/__pycache__/__init__.cpython-311.pyc +0 -0
  14. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/__pycache__/dataset_v1.cpython-310.pyc +0 -0
  15. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/__pycache__/dataset_v2.cpython-310.pyc +0 -0
  16. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/__pycache__/dataset_v3.cpython-310.pyc +0 -0
  17. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/__pycache__/dataset_v3.cpython-311.pyc +0 -0
  18. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/__pycache__/dataset_v4.cpython-310.pyc +0 -0
  19. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/__pycache__/dataset_v4.cpython-311.pyc +0 -0
  20. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/dataset_v1.py +0 -0
  21. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/dataset_v2.py +0 -0
  22. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/dataset_v3.py +0 -0
  23. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/dataset_v3_2.py +0 -0
  24. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/dataset_v4.py +0 -0
  25. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/data/dataset_v5.py +0 -0
  26. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/__init__.py +0 -0
  27. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/__pycache__/__init__.cpython-310.pyc +0 -0
  28. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/__pycache__/__init__.cpython-311.pyc +0 -0
  29. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/french/tfidfaug_w2idf.txt +0 -0
  30. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/french/tfidfaug_w2tfidf.txt +0 -0
  31. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__init__.py +0 -0
  32. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/__init__.cpython-310.pyc +0 -0
  33. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/__init__.cpython-311.pyc +0 -0
  34. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/main.cpython-310.pyc +0 -0
  35. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/main.cpython-311.pyc +0 -0
  36. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/optimization.cpython-310.pyc +0 -0
  37. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/optimization.cpython-311.pyc +0 -0
  38. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/position.cpython-310.pyc +0 -0
  39. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/position.cpython-311.pyc +0 -0
  40. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/size.cpython-310.pyc +0 -0
  41. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/__pycache__/size.cpython-311.pyc +0 -0
  42. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/main.py +0 -0
  43. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/main_2.py +0 -0
  44. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/optimization.py +0 -0
  45. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/position.py +0 -0
  46. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/transformers/size.py +0 -0
  47. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/wolof/tfidfaug_w2idf.txt +0 -0
  48. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/models/wolof/tfidfaug_w2tfidf.txt +0 -0
  49. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/pipe/__init__.py +0 -0
  50. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/pipe/__pycache__/__init__.cpython-310.pyc +0 -0
  51. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/pipe/__pycache__/nlp_pipeline.cpython-310.pyc +0 -0
  52. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/pipe/nlp_pipeline.py +0 -0
  53. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/__init__.py +0 -0
  54. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/__pycache__/__init__.cpython-310.pyc +0 -0
  55. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/adverse_tokenizer.json +0 -0
  56. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/bart_tokenizers/tokenizer_v3.json +0 -0
  57. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/bart_tokenizers/tokenizer_v3_2.json +0 -0
  58. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/bart_tokenizers/tokenizer_v5.json +0 -0
  59. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.model +0 -0
  60. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v3.vocab +0 -0
  61. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v4.model +0 -0
  62. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v4.vocab +0 -0
  63. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v5.model +0 -0
  64. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v5.vocab +0 -0
  65. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v6.model +0 -0
  66. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v6.vocab +0 -0
  67. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v7.model +0 -0
  68. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v7.vocab +0 -0
  69. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v8.model +0 -0
  70. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v8.vocab +0 -0
  71. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v9.model +0 -0
  72. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers/tokenizer_v9.vocab +0 -0
  73. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/t5_tokenizers.zip +0 -0
  74. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/tokenizer_v1.json +0 -0
  75. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/tokenizer_v3_2.json +0 -0
  76. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/trax/sentencepiece_tokenizer_v4.subwords +0 -0
  77. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/trax/sentencepiece_tokenizer_v5.subwords +0 -0
  78. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/trax/sentencepiece_tokenizer_v6.subwords +0 -0
  79. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/tokenizers/trax/sentencepiece_tokenizer_v7.subwords +0 -0
  80. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/__init__.py +0 -0
  81. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/__pycache__/__init__.cpython-310.pyc +0 -0
  82. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/__pycache__/__init__.cpython-311.pyc +0 -0
  83. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/__pycache__/transformer_trainer.cpython-310.pyc +0 -0
  84. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/__pycache__/transformer_trainer.cpython-311.pyc +0 -0
  85. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/__pycache__/transformer_trainer_custom.cpython-310.pyc +0 -0
  86. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/__pycache__/transformer_trainer_ml.cpython-310.pyc +0 -0
  87. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/__pycache__/transformer_trainer_ml_.cpython-310.pyc +0 -0
  88. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/transformer_trainer.py +0 -0
  89. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/transformer_trainer_custom.py +0 -0
  90. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/transformer_trainer_ml.py +0 -0
  91. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/trainers/transformer_trainer_ml_.py +0 -0
  92. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__init__.py +0 -0
  93. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/__init__.cpython-310.pyc +0 -0
  94. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/__init__.cpython-311.pyc +0 -0
  95. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/bucket_iterator.cpython-310.pyc +0 -0
  96. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/database_manager.cpython-310.pyc +0 -0
  97. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/display_predictions.cpython-310.pyc +0 -0
  98. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/download_model.cpython-310.pyc +0 -0
  99. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/evaluate_custom.cpython-310.pyc +0 -0
  100. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/evaluation.cpython-310.pyc +0 -0
  101. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/evaluation.cpython-311.pyc +0 -0
  102. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/extract_new_sentences.cpython-310.pyc +0 -0
  103. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/recuperate_datasets.cpython-310.pyc +0 -0
  104. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/sent_corrections.cpython-310.pyc +0 -0
  105. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/sent_corrections.cpython-311.pyc +0 -0
  106. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/sent_transformers.cpython-310.pyc +0 -0
  107. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/sent_transformers.cpython-311.pyc +0 -0
  108. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/sent_unification.cpython-310.pyc +0 -0
  109. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/split_with_valid.cpython-310.pyc +0 -0
  110. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/split_with_valid.cpython-311.pyc +0 -0
  111. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/tokenize_text.cpython-310.pyc +0 -0
  112. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/__pycache__/training.cpython-310.pyc +0 -0
  113. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/database_manager.py +0 -0
  114. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/display_predictions.py +0 -0
  115. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/download_model.py +0 -0
  116. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/evaluate_custom.py +0 -0
  117. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/evaluation.py +0 -0
  118. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/extract_new_sentences.py +0 -0
  119. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/extract_poems.py +0 -0
  120. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/extract_sentences.py +0 -0
  121. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/improvements/__init__.py +0 -0
  122. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/improvements/__pycache__/__init__.cpython-310.pyc +0 -0
  123. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/improvements/__pycache__/__init__.cpython-311.pyc +0 -0
  124. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/improvements/__pycache__/end_marks.cpython-310.pyc +0 -0
  125. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/improvements/__pycache__/end_marks.cpython-311.pyc +0 -0
  126. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/improvements/end_marks.py +0 -0
  127. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/recuperate_datasets.py +0 -0
  128. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/recuperate_datasets_trunc.py +0 -0
  129. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/send_model.py +0 -0
  130. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/sent_corrections.py +0 -0
  131. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/sent_transformers.py +0 -0
  132. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/sent_unification.py +0 -0
  133. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/split_with_valid.py +0 -0
  134. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/tokenize_text.py +0 -0
  135. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/training.py +0 -0
  136. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate/utils/trunc_hg_training.py +0 -0
  137. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate.egg-info/SOURCES.txt +0 -0
  138. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate.egg-info/dependency_links.txt +0 -0
  139. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate.egg-info/requires.txt +0 -0
  140. {wolof_translate-0.0.1 → wolof_translate-0.0.2}/wolof_translate.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wolof_translate
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: Contain function and classes to process corpora for making translation between wolof text and other languages.
5
5
  Author: Oumar Kane
6
6
  Author-email: oumar.kane@univ-thies.sn
@@ -2,7 +2,7 @@ from setuptools import setup
2
2
 
3
3
  setup(
4
4
  name="wolof_translate",
5
- version="0.0.1",
5
+ version="0.0.2",
6
6
  author="Oumar Kane",
7
7
  author_email="oumar.kane@univ-thies.sn",
8
8
  description="Contain function and classes to process corpora for making translation between wolof text and other languages.",
@@ -1,84 +1,79 @@
1
-
2
1
  import torch
3
2
  import numpy as np
4
- from typing import *
3
+ from typing import Optional, List, Iterator
5
4
  from torch.utils.data import Sampler
6
- from torch.nn.utils.rnn import pad_sequence
7
5
  from math import ceil
8
6
 
9
- class SequenceLengthBatchSampler(Sampler):
10
- def __init__(self, dataset, boundaries, batch_sizes, input_key = None, label_key = None, drop_unique = True):
7
+ class SequenceLengthBatchSampler(Sampler[List[int]]):
8
+ def __init__(
9
+ self,
10
+ dataset,
11
+ boundaries: List[int],
12
+ batch_sizes: List[int],
13
+ input_key: Optional[int] = None,
14
+ label_key: Optional[int] = None,
15
+ drop_unique: bool = True,
16
+ ):
11
17
  self.dataset = dataset
12
18
  self.boundaries = boundaries
13
19
  self.batch_sizes = batch_sizes
14
- self.data_info = {}
15
20
  self.drop_unique = drop_unique
21
+ self.data_info = {}
16
22
 
17
- # Initialize dictionary with indices and element lengths
23
+ # Extract lengths
18
24
  for i in range(len(dataset)):
19
25
  data = dataset[i]
20
- length = (
21
- max(len(data[0]), len(data[2]))
22
- if (input_key is None and label_key is None)
23
- else max(len(data[input_key]), len(data[label_key]))
24
- )
26
+ if input_key is None or label_key is None:
27
+ length = max(len(data[0]), len(data[2]))
28
+ else:
29
+ length = max(len(data[input_key]), len(data[label_key]))
25
30
  self.data_info[i] = {"index": i, "length": length}
26
-
31
+
27
32
  self.calculate_length()
28
33
 
29
34
  def calculate_length(self):
30
35
  self.batches = []
31
-
32
- # Sort indices based on element length
33
36
  sorted_indices = sorted(self.data_info.keys(), key=lambda i: self.data_info[i]["length"])
34
-
35
- # Group indices into batches of sequences with the same length
37
+
38
+ prev_boundary = 0
36
39
  for boundary in self.boundaries:
37
- batch = [i for i in sorted_indices if self.data_info[i]["length"] <= boundary] # Filter indices based on length boundary
40
+ batch = [i for i in sorted_indices if prev_boundary < self.data_info[i]["length"] <= boundary]
38
41
  self.batches.append(batch)
39
- sorted_indices = [i for i in sorted_indices if i not in batch] # Remove processed indices
42
+ sorted_indices = [i for i in sorted_indices if i not in batch]
43
+ prev_boundary = boundary
40
44
 
41
- # Add remaining indices to the last batch
45
+ # Remaining sequences > last boundary
42
46
  self.batches.append(sorted_indices)
43
47
 
44
- # Calculate the total length of the data loader
45
- self.length = sum(ceil(len(batch) / batch_size) for batch, batch_size in zip(self.batches, self.batch_sizes) if len(batch) % batch_size != 1 or not self.drop_unique)
46
-
47
- def __iter__(self):
48
- # indices = list(self.data_info.keys()) # Get indices from the data_info dictionary
49
- # np.random.shuffle(indices) # Shuffle the indices
48
+ total_batches = 0
49
+ for batch, batch_size in zip(self.batches, self.batch_sizes):
50
+ n_full_batches = len(batch) // batch_size
51
+ leftover = len(batch) % batch_size
52
+ total_batches += n_full_batches
53
+ if leftover > 0 and (leftover != 1 or not self.drop_unique):
54
+ total_batches += 1
55
+ self.length = total_batches
50
56
 
51
- # Yield batches with the corresponding batch sizes
57
+ def __iter__(self) -> Iterator[List[int]]:
52
58
  for batch_indices, batch_size in zip(self.batches, self.batch_sizes):
53
59
  num_batches = len(batch_indices) // batch_size
54
60
 
55
61
  for i in range(num_batches):
56
- # Recuperate the current bucket
57
62
  current_bucket = batch_indices[i * batch_size: (i + 1) * batch_size]
58
-
59
- # Shuffle the current bucket
60
63
  np.random.shuffle(current_bucket)
64
+ yield [self.data_info[idx]["index"] for idx in current_bucket]
61
65
 
62
- # Yield the current bucket
63
- yield [self.data_info[i]["index"] for i in current_bucket]
64
-
65
- remaining_indices = len(batch_indices) % batch_size
66
-
67
- if remaining_indices > 0 and remaining_indices != 1 or not self.drop_unique:
68
-
69
- # Recuperate the current bucket
70
- current_bucket = batch_indices[-remaining_indices:]
71
-
72
- # Shuffle the current bucket
66
+ remaining = len(batch_indices) % batch_size
67
+ if remaining > 0 and (remaining != 1 or not self.drop_unique):
68
+ current_bucket = batch_indices[-remaining:]
73
69
  np.random.shuffle(current_bucket)
70
+ yield [self.data_info[idx]["index"] for idx in current_bucket]
74
71
 
75
- # Yield the current bucket
76
- yield [self.data_info[i]["index"] for i in batch_indices[-remaining_indices:]]
77
-
78
- def __len__(self):
72
+ def __len__(self) -> int:
79
73
  return self.length
80
74
 
81
75
 
76
+
82
77
  class BucketSampler(Sampler):
83
78
  def __init__(self, dataset, batch_size, sort_key=lambda x, index_1, index_2: max(len(x[index_1]), len(x[index_2])), input_key: Union[str, int] = 0, label_key: Union[str, int] = 1):
84
79
  self.dataset = dataset
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wolof-translate
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: Contain function and classes to process corpora for making translation between wolof text and other languages.
5
5
  Author: Oumar Kane
6
6
  Author-email: oumar.kane@univ-thies.sn