wolof-translate 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. wolof_translate/__init__.py +73 -0
  2. wolof_translate/data/__init__.py +0 -0
  3. wolof_translate/data/dataset_v1.py +151 -0
  4. wolof_translate/data/dataset_v2.py +187 -0
  5. wolof_translate/data/dataset_v3.py +187 -0
  6. wolof_translate/data/dataset_v3_2.py +187 -0
  7. wolof_translate/data/dataset_v4.py +202 -0
  8. wolof_translate/data/dataset_v5.py +65 -0
  9. wolof_translate/models/__init__.py +0 -0
  10. wolof_translate/models/transformers/__init__.py +0 -0
  11. wolof_translate/models/transformers/main.py +865 -0
  12. wolof_translate/models/transformers/main_2.py +362 -0
  13. wolof_translate/models/transformers/optimization.py +41 -0
  14. wolof_translate/models/transformers/position.py +46 -0
  15. wolof_translate/models/transformers/size.py +44 -0
  16. wolof_translate/pipe/__init__.py +1 -0
  17. wolof_translate/pipe/nlp_pipeline.py +512 -0
  18. wolof_translate/tokenizers/__init__.py +0 -0
  19. wolof_translate/trainers/__init__.py +0 -0
  20. wolof_translate/trainers/transformer_trainer.py +760 -0
  21. wolof_translate/trainers/transformer_trainer_custom.py +882 -0
  22. wolof_translate/trainers/transformer_trainer_ml.py +925 -0
  23. wolof_translate/trainers/transformer_trainer_ml_.py +1042 -0
  24. wolof_translate/utils/__init__.py +1 -0
  25. wolof_translate/utils/bucket_iterator.py +143 -0
  26. wolof_translate/utils/database_manager.py +116 -0
  27. wolof_translate/utils/display_predictions.py +162 -0
  28. wolof_translate/utils/download_model.py +40 -0
  29. wolof_translate/utils/evaluate_custom.py +147 -0
  30. wolof_translate/utils/evaluation.py +74 -0
  31. wolof_translate/utils/extract_new_sentences.py +810 -0
  32. wolof_translate/utils/extract_poems.py +60 -0
  33. wolof_translate/utils/extract_sentences.py +562 -0
  34. wolof_translate/utils/improvements/__init__.py +0 -0
  35. wolof_translate/utils/improvements/end_marks.py +45 -0
  36. wolof_translate/utils/recuperate_datasets.py +94 -0
  37. wolof_translate/utils/recuperate_datasets_trunc.py +85 -0
  38. wolof_translate/utils/send_model.py +26 -0
  39. wolof_translate/utils/sent_corrections.py +169 -0
  40. wolof_translate/utils/sent_transformers.py +27 -0
  41. wolof_translate/utils/sent_unification.py +97 -0
  42. wolof_translate/utils/split_with_valid.py +72 -0
  43. wolof_translate/utils/tokenize_text.py +46 -0
  44. wolof_translate/utils/training.py +213 -0
  45. wolof_translate/utils/trunc_hg_training.py +196 -0
  46. wolof_translate-0.0.1.dist-info/METADATA +31 -0
  47. wolof_translate-0.0.1.dist-info/RECORD +49 -0
  48. wolof_translate-0.0.1.dist-info/WHEEL +5 -0
  49. wolof_translate-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,213 @@
1
+ from wolof_translate import *
2
+ import warnings
3
+
4
+
5
+ def train(config: dict):
6
+
7
+ # ---------------------------------------
8
+ # add distribution if necessary (https://github.com/aws/amazon-sagemaker-examples/blob/main/sagemaker-python-sdk/pytorch_mnist/mnist.py)
9
+
10
+ logger = config["logger"]
11
+
12
+ is_distributed = len(config["hosts"]) > 1 and config["backend"] is not None
13
+
14
+ use_cuda = config["num_gpus"] > 0
15
+
16
+ config.update({"num_workers": 1, "pin_memory": True} if use_cuda else {})
17
+
18
+ if not logger is None:
19
+
20
+ logger.debug("Distributed training - {}".format(is_distributed))
21
+
22
+ logger.debug("Number of gpus available - {}".format(config["num_gpus"]))
23
+
24
+ if is_distributed:
25
+ # Initialize the distributed environment.
26
+ world_size = len(config["hosts"])
27
+
28
+ os.environ["WORLD_SIZE"] = str(world_size)
29
+
30
+ host_rank = config["hosts"].index(config["current_host"])
31
+
32
+ os.environ["RANK"] = str(host_rank)
33
+
34
+ dist.init_process_group(
35
+ backend=config["backend"], rank=host_rank, world_size=world_size
36
+ )
37
+
38
+ if not logger is None:
39
+ logger.info(
40
+ "Initialized the distributed environment: '{}' backend on {} nodes. ".format(
41
+ config["backend"], dist.get_world_size()
42
+ )
43
+ + "Current host rank is {}. Number of gpus: {}".format(
44
+ dist.get_rank(), config["num_gpus"]
45
+ )
46
+ )
47
+ # ---------------------------------------
48
+
49
+ # split the data
50
+ if config["include_split"]:
51
+ split_data(
52
+ config["random_state"], config["data_directory"], config["data_file"]
53
+ )
54
+
55
+ # recuperate the tokenizer
56
+ tokenizer = T5TokenizerFast(config["tokenizer_path"])
57
+
58
+ # recuperate train and test set
59
+ train_dataset, test_dataset = recuperate_datasets(
60
+ config["char_p"],
61
+ config["word_p"],
62
+ config["max_len"],
63
+ config["end_mark"],
64
+ tokenizer,
65
+ config["corpus_1"],
66
+ config["corpus_2"],
67
+ config["train_file"],
68
+ config["test_file"],
69
+ )
70
+
71
+ # initialize the evaluation object
72
+ evaluation = TranslationEvaluation(tokenizer, train_dataset.decode)
73
+
74
+ # let us initialize the trainer
75
+ trainer = ModelRunner(
76
+ model=Transformer,
77
+ version=config["version"],
78
+ seed=0,
79
+ evaluation=evaluation,
80
+ optimizer=Adafactor,
81
+ )
82
+
83
+ # initialize the encoder and the decoder layers
84
+ encoder_layer = nn.TransformerEncoderLayer(
85
+ config["d_model"],
86
+ config["n_head"],
87
+ config["dim_ff"],
88
+ config["drop_out_rate"],
89
+ batch_first=True,
90
+ )
91
+
92
+ decoder_layer = nn.TransformerDecoderLayer(
93
+ config["d_model"],
94
+ config["n_head"],
95
+ config["dim_ff"],
96
+ config["drop_out_rate"],
97
+ batch_first=True,
98
+ )
99
+
100
+ # let us initialize the encoder and the decoder
101
+ encoder = nn.TransformerEncoder(encoder_layer, config["n_encoders"])
102
+
103
+ decoder = nn.TransformerDecoder(decoder_layer, config["n_decoders"])
104
+
105
+ # -------------------------------------
106
+ # in the case when the linear learning rate scheduler with warmup is used
107
+
108
+ # let us calculate the appropriate warmup steps (let us take a max epoch of 100)
109
+ # length = len(train_dataset)
110
+
111
+ # n_steps = length // config['batch_size']
112
+
113
+ # num_steps = config['max_epoch'] * n_steps
114
+
115
+ # warmup_steps = (config['max_epoch'] * n_steps) * config['warmup_ratio']
116
+
117
+ # Initialize the scheduler parameters
118
+ # scheduler_args = {'num_warmup_steps': warmup_steps, 'num_training_steps': num_steps}
119
+ # -------------------------------------
120
+
121
+ # Initialize the transformer parameters
122
+ model_args = {
123
+ "vocab_size": len(tokenizer),
124
+ "encoder": encoder,
125
+ "decoder": decoder,
126
+ "class_criterion": nn.CrossEntropyLoss(
127
+ label_smoothing=config["label_smoothing"]
128
+ ),
129
+ "max_len": config["max_len"],
130
+ }
131
+
132
+ # Initialize the optimizer parameters
133
+ optimizer_args = {
134
+ "lr": config["learning_rate"],
135
+ "weight_decay": config["weight_decay"],
136
+ # 'betas': (0.9, 0.98),
137
+ "warmup_init": config["warmup_init"],
138
+ "relative_step": config["relative_step"],
139
+ }
140
+
141
+ # ----------------------------
142
+ # initialize the bucket samplers for distributed environment
143
+ boundaries = config["boundaries"]
144
+ batch_sizes = config["batch_sizes"]
145
+
146
+ train_sampler = SequenceLengthBatchSampler(
147
+ train_dataset, boundaries=boundaries, batch_sizes=batch_sizes
148
+ )
149
+
150
+ test_sampler = SequenceLengthBatchSampler(
151
+ test_dataset, boundaries=boundaries, batch_sizes=batch_sizes
152
+ )
153
+
154
+ # ------------------------------
155
+ # initialize a bucket sampler with fixed batch size in the case of single machine
156
+ # with parallelization on multiple gpus
157
+ # train_sampler = BucketSampler(train_dataset, config['batch_size'])
158
+
159
+ # test_sampler = BucketSampler(test_dataset, config['batch_size'])
160
+
161
+ # ------------------------------
162
+
163
+ # Initialize the loaders parameters
164
+ train_loader_args = {
165
+ "batch_sampler": train_sampler,
166
+ "collate_fn": collate_fn,
167
+ "num_workers": config["num_workers"],
168
+ "pin_memory": config["pin_memory"],
169
+ }
170
+
171
+ test_loader_args = {
172
+ "batch_sampler": test_sampler,
173
+ "collate_fn": collate_fn,
174
+ "num_workers": config["num_workers"],
175
+ "pin_memory": config["pin_memory"],
176
+ }
177
+
178
+ # Add the datasets and hyperparameters to trainer
179
+ trainer.compile(
180
+ train_dataset,
181
+ test_dataset,
182
+ tokenizer,
183
+ train_loader_args,
184
+ test_loader_args,
185
+ optimizer_kwargs=optimizer_args,
186
+ model_kwargs=model_args,
187
+ # lr_scheduler=get_linear_schedule_with_warmup,
188
+ # lr_scheduler_kwargs=scheduler_args,
189
+ predict_with_generate=True,
190
+ is_distributed=is_distributed,
191
+ logging_dir=config["logging_dir"],
192
+ dist=dist,
193
+ )
194
+
195
+ # load the model
196
+ trainer.load(config["model_dir"], load_best=not config["continue"])
197
+
198
+ # Train the model
199
+ trainer.train(
200
+ config["epochs"] - trainer.current_epoch,
201
+ auto_save=True,
202
+ log_step=config["log_step"],
203
+ saving_directory=config["new_model_dir"],
204
+ save_best=config["save_best"],
205
+ metric_for_best_model=config["metric_for_best_model"],
206
+ metric_objective=config["metric_objective"],
207
+ )
208
+
209
+ if config["return_trainer"]:
210
+
211
+ return trainer
212
+
213
+ return None
@@ -0,0 +1,196 @@
1
+ from wolof_translate import *
2
+ import warnings
3
+
4
+
5
+ def train(config: dict):
6
+
7
+ # ---------------------------------------
8
+ # add distribution if necessary (https://github.com/aws/amazon-sagemaker-examples/blob/main/sagemaker-python-sdk/pytorch_mnist/mnist.py)
9
+
10
+ logger = config["logger"]
11
+
12
+ is_distributed = len(config["hosts"]) > 1 and config["backend"] is not None
13
+
14
+ use_cuda = config["num_gpus"] > 0
15
+
16
+ config.update({"num_workers": 1, "pin_memory": True} if use_cuda else {})
17
+
18
+ if not logger is None:
19
+
20
+ logger.debug("Distributed training - {}".format(is_distributed))
21
+
22
+ logger.debug("Number of gpus available - {}".format(config["num_gpus"]))
23
+
24
+ if is_distributed:
25
+ # Initialize the distributed environment.
26
+ world_size = len(config["hosts"])
27
+
28
+ os.environ["WORLD_SIZE"] = str(world_size)
29
+
30
+ host_rank = config["hosts"].index(config["current_host"])
31
+
32
+ os.environ["RANK"] = str(host_rank)
33
+
34
+ dist.init_process_group(
35
+ backend=config["backend"], rank=host_rank, world_size=world_size
36
+ )
37
+
38
+ if not logger is None:
39
+ logger.info(
40
+ "Initialized the distributed environment: '{}' backend on {} nodes. ".format(
41
+ config["backend"], dist.get_world_size()
42
+ )
43
+ + "Current host rank is {}. Number of gpus: {}".format(
44
+ dist.get_rank(), config["num_gpus"]
45
+ )
46
+ )
47
+ # ---------------------------------------
48
+
49
+ # split the data
50
+ if config["include_split"]:
51
+ split_data(
52
+ config["random_state"], config["data_directory"], config["data_file"]
53
+ )
54
+
55
+ # recuperate the tokenizer
56
+ tokenizer = T5TokenizerFast(config["tokenizer_path"])
57
+
58
+ # Initialize the model name
59
+ model_name = "t5-small"
60
+
61
+ # import the model with its pre-trained weights
62
+ model = T5ForConditionalGeneration.from_pretrained(model_name)
63
+
64
+ # resize the token embeddings
65
+ model.resize_token_embeddings(len(tokenizer))
66
+
67
+ # recuperate train and test set
68
+ train_dataset, test_dataset = recuperate_datasets(
69
+ config["char_p"],
70
+ config["word_p"],
71
+ 50,
72
+ config["end_mark"],
73
+ tokenizer,
74
+ config["corpus_1"],
75
+ config["corpus_2"],
76
+ config["train_file"],
77
+ config["test_file"],
78
+ )
79
+
80
+ # initialize the evaluation object
81
+ evaluation = TranslationEvaluation(tokenizer, train_dataset.decode)
82
+
83
+ # let us initialize the trainer
84
+ trainer = ModelRunner(
85
+ model=model,
86
+ version=config["version"],
87
+ seed=0,
88
+ evaluation=evaluation,
89
+ optimizer=Adafactor,
90
+ )
91
+
92
+ # -------------------------------------
93
+ # in the case when the linear learning rate scheduler with warmup is used
94
+
95
+ # let us calculate the appropriate warmup steps (let us take a max epoch of 100)
96
+ # length = len(train_dataset)
97
+
98
+ # n_steps = length // config['batch_size']
99
+
100
+ # num_steps = config['max_epoch'] * n_steps
101
+
102
+ # warmup_steps = (config['max_epoch'] * n_steps) * config['warmup_ratio']
103
+
104
+ # Initialize the scheduler parameters
105
+ # scheduler_args = {'num_warmup_steps': warmup_steps, 'num_training_steps': num_steps}
106
+ # -------------------------------------
107
+
108
+ # Initialize the optimizer parameters
109
+ optimizer_args = {
110
+ "lr": config["learning_rate"],
111
+ "weight_decay": config["weight_decay"],
112
+ # 'betas': (0.9, 0.98),
113
+ "warmup_init": config["warmup_init"],
114
+ "relative_step": config["relative_step"],
115
+ }
116
+
117
+ # ----------------------------
118
+ # initialize the bucket samplers for distributed environment
119
+ # boundaries = config['boundaries']
120
+ # batch_sizes = config['batch_sizes']
121
+
122
+ # train_sampler = SequenceLengthBatchSampler(train_dataset,
123
+ # boundaries = boundaries,
124
+ # batch_sizes = batch_sizes)
125
+
126
+ # test_sampler = SequenceLengthBatchSampler(test_dataset,
127
+ # boundaries = boundaries,
128
+ # batch_sizes = batch_sizes)
129
+
130
+ # ------------------------------
131
+ # initialize a bucket sampler with fixed batch size in the case of single machine
132
+ # with parallelization on multiple gpus
133
+ train_sampler = BucketSampler(train_dataset, config["batch_size"])
134
+
135
+ test_sampler = BucketSampler(test_dataset, config["batch_size"])
136
+
137
+ # ------------------------------
138
+
139
+ # Initialize the loaders parameters
140
+ train_loader_args = {
141
+ "batch_sampler": train_sampler,
142
+ "collate_fn": partial(
143
+ collate_fn_trunc,
144
+ max_len=train_dataset.max_len,
145
+ eos_token_id=tokenizer.eos_token_id,
146
+ pad_token_id=tokenizer.pad_token_id,
147
+ ),
148
+ }
149
+
150
+ test_loader_args = {
151
+ "batch_sampler": test_sampler,
152
+ "collate_fn": partial(
153
+ collate_fn_trunc,
154
+ max_len=train_dataset.max_len,
155
+ eos_token_id=tokenizer.eos_token_id,
156
+ pad_token_id=tokenizer.pad_token_id,
157
+ ),
158
+ }
159
+
160
+ # Add the datasets and hyperparameters to trainer
161
+ trainer.compile(
162
+ train_dataset,
163
+ test_dataset,
164
+ tokenizer,
165
+ train_loader_args,
166
+ test_loader_args,
167
+ optimizer_kwargs=optimizer_args,
168
+ # lr_scheduler=get_linear_schedule_with_warmup,
169
+ # lr_scheduler_kwargs=scheduler_args,
170
+ predict_with_generate=True,
171
+ stopping_patience=config["stopping_patience"],
172
+ hugging_face=True,
173
+ is_distributed=is_distributed,
174
+ logging_dir=config["logging_dir"],
175
+ dist=dist,
176
+ )
177
+
178
+ # load the model
179
+ trainer.load(config["model_dir"], load_best=not config["continue"])
180
+
181
+ # Train the model
182
+ trainer.train(
183
+ config["epochs"] - trainer.current_epoch,
184
+ auto_save=True,
185
+ log_step=config["log_step"],
186
+ saving_directory=config["new_model_dir"],
187
+ save_best=config["save_best"],
188
+ metric_for_best_model=config["metric_for_best_model"],
189
+ metric_objective=config["metric_objective"],
190
+ )
191
+
192
+ if config["return_trainer"]:
193
+
194
+ return trainer
195
+
196
+ return None
@@ -0,0 +1,31 @@
1
+ Metadata-Version: 2.1
2
+ Name: wolof-translate
3
+ Version: 0.0.1
4
+ Summary: Contain function and classes to process corpora for making translation between wolof text and other languages.
5
+ Author: Oumar Kane
6
+ Author-email: oumar.kane@univ-thies.sn
7
+ Requires-Dist: accelerate
8
+ Requires-Dist: torch
9
+ Requires-Dist: spacy
10
+ Requires-Dist: nltk
11
+ Requires-Dist: gensim
12
+ Requires-Dist: furo
13
+ Requires-Dist: streamlit
14
+ Requires-Dist: tokenizers
15
+ Requires-Dist: tensorboard
16
+ Requires-Dist: evaluate
17
+ Requires-Dist: transformers
18
+ Requires-Dist: pandas
19
+ Requires-Dist: numpy
20
+ Requires-Dist: scikit-learn
21
+ Requires-Dist: matplotlib
22
+ Requires-Dist: plotly
23
+ Requires-Dist: sacrebleu
24
+ Requires-Dist: nlpaug
25
+ Requires-Dist: wandb
26
+ Requires-Dist: pytorch-lightning
27
+ Requires-Dist: selenium
28
+ Requires-Dist: sentencepiece
29
+ Requires-Dist: peft
30
+ Requires-Dist: rouge-score
31
+
@@ -0,0 +1,49 @@
1
+ wolof_translate/__init__.py,sha256=qHFFSR2P3SpUh62FuZghk3KWNCeo2At_SJnRb3wRRpU,2509
2
+ wolof_translate/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ wolof_translate/data/dataset_v1.py,sha256=7BWW4jv8qR1Wau_-GT007hrZpnHxbPwnvDca8C0_eHU,4264
4
+ wolof_translate/data/dataset_v2.py,sha256=nTYW88yP6mcky-T43mh-1CeYU30-Xlq4DxOpd3FZ0OQ,5613
5
+ wolof_translate/data/dataset_v3.py,sha256=SD0VGr2oMl_4TUafD_5ZKqiZAnbwuFD44sajI2gGl2Y,5628
6
+ wolof_translate/data/dataset_v3_2.py,sha256=-gq31O7dt41Zi196m6uHKV6PHtODAeZq95ICc9NJXRA,5628
7
+ wolof_translate/data/dataset_v4.py,sha256=0c97Pkjb7TBC5G91-1bFvVzafLQpruX3lj_PDOzmaAE,6271
8
+ wolof_translate/data/dataset_v5.py,sha256=JZMacAn4jdfCOJ3PH2tWnQ4qDN_OzYYquKHBTOe_ZdM,2050
9
+ wolof_translate/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ wolof_translate/models/transformers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ wolof_translate/models/transformers/main.py,sha256=Ua8AG0CRhkJ0At7Ma8nt7lcl9226DGg-PgrR8MOTgPo,30276
12
+ wolof_translate/models/transformers/main_2.py,sha256=ApUMR5LxSYHfF48wOkAgfEgnUm-4fYD9JFrq_llx_0U,12015
13
+ wolof_translate/models/transformers/optimization.py,sha256=2YJ66NUBJBM86CNNx8mCng1ryFAVbcS08fUK7f3OYOk,1401
14
+ wolof_translate/models/transformers/position.py,sha256=44Z-qSfB-NGhqXOXKbaztC8Bpc17BHcOX69zMssjbeg,1672
15
+ wolof_translate/models/transformers/size.py,sha256=UI4I30cwJMZ5RDs31X63HWW60_Zgy542FMC9sguQHqc,1317
16
+ wolof_translate/pipe/__init__.py,sha256=n2k4pXK1y-xxVnX13E9H1-hkYtpHbCAJVs-LMARWlvI,81
17
+ wolof_translate/pipe/nlp_pipeline.py,sha256=jmC5xXb1pAZ9uWXFepGCcYjIIzMk3Hpqu1cBMaMN2KE,15610
18
+ wolof_translate/tokenizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ wolof_translate/trainers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ wolof_translate/trainers/transformer_trainer.py,sha256=yK6GtQUXRdcG9XxR5QeEOgId8T6Oh3WJtFVmiyFmI_U,26691
21
+ wolof_translate/trainers/transformer_trainer_custom.py,sha256=hHUBcU4YK6wuRUMiwX5xG0oTaoDLt9bVSZbzIFRuces,31926
22
+ wolof_translate/trainers/transformer_trainer_ml.py,sha256=WgggaugkVHSJlwIAZT-QwI90Fl-_zT8Clhb-7M0m8gM,33561
23
+ wolof_translate/trainers/transformer_trainer_ml_.py,sha256=QaN9DB5pqhBxV4WlFmJCmUyfwlX-UyAzKRwL6rVEr4Q,38199
24
+ wolof_translate/utils/__init__.py,sha256=Nl3300H-Xd3uTHDR8y-rYa-UUR9FqbqZPwUKJUpQOb4,64
25
+ wolof_translate/utils/bucket_iterator.py,sha256=Hglii1Hj6H_K51JunTjUAxuLd4ehPb6LeeMVhsmhNxQ,6248
26
+ wolof_translate/utils/database_manager.py,sha256=7yhgBN1LvVFNEQikxCjSCva82h5nX44Nx2zh8cpFWyA,3543
27
+ wolof_translate/utils/display_predictions.py,sha256=y5H5lfgIODl6E5Zfb1YIwiAxIlHUxRBoChfQR5kjh24,5145
28
+ wolof_translate/utils/download_model.py,sha256=x92KpfVPvNK8Suen1qnOcPtZOlB4kXTfqWgoVuuMUEM,1241
29
+ wolof_translate/utils/evaluate_custom.py,sha256=cmcGfRAjhTuP9ekeJ0cioNoE1cQ7fQ7mZTh6_1IAaXM,3587
30
+ wolof_translate/utils/evaluation.py,sha256=Taxv4UAgg5q3WxC73pp84srr_wX6Kw9Ub9MloYrUmLs,1838
31
+ wolof_translate/utils/extract_new_sentences.py,sha256=li9UDgLa4nI6DSdB5oH0_m8xek3EEoVBL3CrVKTxGrc,22861
32
+ wolof_translate/utils/extract_poems.py,sha256=9Pf1PluUq257vcS2iinGPi6azGjgmHU7Q57uwQkHfAs,1314
33
+ wolof_translate/utils/extract_sentences.py,sha256=-PDBmceKUqiTdV9ieezSIITfADAnv_7OsNY8zdJi0To,15713
34
+ wolof_translate/utils/recuperate_datasets.py,sha256=4yTNXPOIfTokon0Bke50SdB8MT_Ojmu1aTmYv_K_w64,2644
35
+ wolof_translate/utils/recuperate_datasets_trunc.py,sha256=82T7mHbxruYJUw0L0ZUUoPHxO2Yr65rApakmIhe034M,2500
36
+ wolof_translate/utils/send_model.py,sha256=v_dQJDDpk3ak_DutbhwSqKF8-Q_-Gx9zezZsTot6Onk,797
37
+ wolof_translate/utils/sent_corrections.py,sha256=5iqdS4j78ayag0GxnCEl_dBUs4zbBAWAOac2h0ECv4c,3534
38
+ wolof_translate/utils/sent_transformers.py,sha256=kbbc5H-zPkxSM1uOghGeZa9fCAcm2GwTSuiRHM0asgI,574
39
+ wolof_translate/utils/sent_unification.py,sha256=UD9uZ--NREj5Z462n5hs-UjMPNhUN8Nr_6ZmR2w-B6Y,2104
40
+ wolof_translate/utils/split_with_valid.py,sha256=7-e6EfvPbLpTYrZOXJVYYqm_nV7n6yUYOaWkn8hsJJw,2424
41
+ wolof_translate/utils/tokenize_text.py,sha256=LZNsYmpchlkNsul00yb3HQToC-L7XSYuPHGCRCfbz9Y,1226
42
+ wolof_translate/utils/training.py,sha256=5vPVuqHL6_gqLkh4PTxXqW4UvAJBWNWVDDXC9Fk7IQI,6732
43
+ wolof_translate/utils/trunc_hg_training.py,sha256=mMGrU7Mjr9vYd7eLc8nbFRhRXwSWMKyg35lGf0L6RtQ,6418
44
+ wolof_translate/utils/improvements/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
+ wolof_translate/utils/improvements/end_marks.py,sha256=scmhMMYguZmrZTPozx1ZovizKrrPfPpMLXbU2-IOdGs,1194
46
+ wolof_translate-0.0.1.dist-info/METADATA,sha256=itQMCA-zGM3gSDiKco5dMtY5qfpPHvKNJbw9KtffxzI,818
47
+ wolof_translate-0.0.1.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
48
+ wolof_translate-0.0.1.dist-info/top_level.txt,sha256=YG-kBnOwUZyQ7SofNvMxNYjzCreH2PVcW2UaEg1-Reg,16
49
+ wolof_translate-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.37.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ wolof_translate