spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. com/johnsnowlabs/nlp/__init__.py +4 -2
  4. spark_nlp-6.2.1.dist-info/METADATA +362 -0
  5. spark_nlp-6.2.1.dist-info/RECORD +292 -0
  6. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
  7. sparknlp/__init__.py +281 -27
  8. sparknlp/annotation.py +137 -6
  9. sparknlp/annotation_audio.py +61 -0
  10. sparknlp/annotation_image.py +82 -0
  11. sparknlp/annotator/__init__.py +93 -0
  12. sparknlp/annotator/audio/__init__.py +16 -0
  13. sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
  14. sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
  15. sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
  16. sparknlp/annotator/chunk2_doc.py +85 -0
  17. sparknlp/annotator/chunker.py +137 -0
  18. sparknlp/annotator/classifier_dl/__init__.py +61 -0
  19. sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  20. sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
  21. sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
  22. sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
  23. sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  24. sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  25. sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
  26. sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
  27. sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
  28. sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
  29. sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
  30. sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
  31. sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
  32. sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
  33. sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  34. sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
  35. sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
  36. sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
  37. sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
  38. sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
  39. sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
  40. sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
  41. sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
  42. sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
  43. sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  44. sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
  45. sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
  46. sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
  47. sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
  48. sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
  49. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  50. sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
  51. sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  52. sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
  53. sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
  54. sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
  55. sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
  56. sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
  57. sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
  58. sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  59. sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
  60. sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
  61. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
  62. sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
  63. sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
  64. sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
  65. sparknlp/annotator/cleaners/__init__.py +15 -0
  66. sparknlp/annotator/cleaners/cleaner.py +202 -0
  67. sparknlp/annotator/cleaners/extractor.py +191 -0
  68. sparknlp/annotator/coref/__init__.py +1 -0
  69. sparknlp/annotator/coref/spanbert_coref.py +221 -0
  70. sparknlp/annotator/cv/__init__.py +29 -0
  71. sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
  72. sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
  73. sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
  74. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  75. sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
  76. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  77. sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
  78. sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  79. sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  80. sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  81. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  82. sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  83. sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
  84. sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
  85. sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
  86. sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
  87. sparknlp/annotator/dataframe_optimizer.py +216 -0
  88. sparknlp/annotator/date2_chunk.py +88 -0
  89. sparknlp/annotator/dependency/__init__.py +17 -0
  90. sparknlp/annotator/dependency/dependency_parser.py +294 -0
  91. sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
  92. sparknlp/annotator/document_character_text_splitter.py +228 -0
  93. sparknlp/annotator/document_normalizer.py +235 -0
  94. sparknlp/annotator/document_token_splitter.py +175 -0
  95. sparknlp/annotator/document_token_splitter_test.py +85 -0
  96. sparknlp/annotator/embeddings/__init__.py +45 -0
  97. sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
  98. sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
  99. sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
  100. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
  101. sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
  102. sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
  103. sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
  104. sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
  105. sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
  106. sparknlp/annotator/embeddings/doc2vec.py +352 -0
  107. sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
  108. sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
  109. sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
  110. sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
  111. sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
  112. sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
  113. sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
  114. sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  115. sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  116. sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
  117. sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
  118. sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
  119. sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  120. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  121. sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
  122. sparknlp/annotator/embeddings/word2vec.py +353 -0
  123. sparknlp/annotator/embeddings/word_embeddings.py +385 -0
  124. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
  125. sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
  126. sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
  127. sparknlp/annotator/er/__init__.py +16 -0
  128. sparknlp/annotator/er/entity_ruler.py +267 -0
  129. sparknlp/annotator/graph_extraction.py +368 -0
  130. sparknlp/annotator/keyword_extraction/__init__.py +16 -0
  131. sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
  132. sparknlp/annotator/ld_dl/__init__.py +16 -0
  133. sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
  134. sparknlp/annotator/lemmatizer.py +250 -0
  135. sparknlp/annotator/matcher/__init__.py +20 -0
  136. sparknlp/annotator/matcher/big_text_matcher.py +272 -0
  137. sparknlp/annotator/matcher/date_matcher.py +303 -0
  138. sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
  139. sparknlp/annotator/matcher/regex_matcher.py +221 -0
  140. sparknlp/annotator/matcher/text_matcher.py +290 -0
  141. sparknlp/annotator/n_gram_generator.py +141 -0
  142. sparknlp/annotator/ner/__init__.py +21 -0
  143. sparknlp/annotator/ner/ner_approach.py +94 -0
  144. sparknlp/annotator/ner/ner_converter.py +148 -0
  145. sparknlp/annotator/ner/ner_crf.py +397 -0
  146. sparknlp/annotator/ner/ner_dl.py +591 -0
  147. sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
  148. sparknlp/annotator/ner/ner_overwriter.py +166 -0
  149. sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
  150. sparknlp/annotator/normalizer.py +230 -0
  151. sparknlp/annotator/openai/__init__.py +16 -0
  152. sparknlp/annotator/openai/openai_completion.py +349 -0
  153. sparknlp/annotator/openai/openai_embeddings.py +106 -0
  154. sparknlp/annotator/param/__init__.py +17 -0
  155. sparknlp/annotator/param/classifier_encoder.py +98 -0
  156. sparknlp/annotator/param/evaluation_dl_params.py +130 -0
  157. sparknlp/annotator/pos/__init__.py +16 -0
  158. sparknlp/annotator/pos/perceptron.py +263 -0
  159. sparknlp/annotator/sentence/__init__.py +17 -0
  160. sparknlp/annotator/sentence/sentence_detector.py +290 -0
  161. sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
  162. sparknlp/annotator/sentiment/__init__.py +17 -0
  163. sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
  164. sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
  165. sparknlp/annotator/seq2seq/__init__.py +35 -0
  166. sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
  167. sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
  168. sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
  169. sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
  170. sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  171. sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  172. sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
  173. sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
  174. sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  175. sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
  176. sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
  177. sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
  178. sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  179. sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  180. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  181. sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  182. sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  183. sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
  184. sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  185. sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
  186. sparknlp/annotator/similarity/__init__.py +0 -0
  187. sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
  188. sparknlp/annotator/spell_check/__init__.py +18 -0
  189. sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
  190. sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
  191. sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
  192. sparknlp/annotator/stemmer.py +79 -0
  193. sparknlp/annotator/stop_words_cleaner.py +190 -0
  194. sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
  195. sparknlp/annotator/token/__init__.py +19 -0
  196. sparknlp/annotator/token/chunk_tokenizer.py +118 -0
  197. sparknlp/annotator/token/recursive_tokenizer.py +205 -0
  198. sparknlp/annotator/token/regex_tokenizer.py +208 -0
  199. sparknlp/annotator/token/tokenizer.py +561 -0
  200. sparknlp/annotator/token2_chunk.py +76 -0
  201. sparknlp/annotator/ws/__init__.py +16 -0
  202. sparknlp/annotator/ws/word_segmenter.py +429 -0
  203. sparknlp/base/__init__.py +30 -0
  204. sparknlp/base/audio_assembler.py +95 -0
  205. sparknlp/base/doc2_chunk.py +169 -0
  206. sparknlp/base/document_assembler.py +164 -0
  207. sparknlp/base/embeddings_finisher.py +201 -0
  208. sparknlp/base/finisher.py +217 -0
  209. sparknlp/base/gguf_ranking_finisher.py +234 -0
  210. sparknlp/base/graph_finisher.py +125 -0
  211. sparknlp/base/has_recursive_fit.py +24 -0
  212. sparknlp/base/has_recursive_transform.py +22 -0
  213. sparknlp/base/image_assembler.py +172 -0
  214. sparknlp/base/light_pipeline.py +429 -0
  215. sparknlp/base/multi_document_assembler.py +164 -0
  216. sparknlp/base/prompt_assembler.py +207 -0
  217. sparknlp/base/recursive_pipeline.py +107 -0
  218. sparknlp/base/table_assembler.py +145 -0
  219. sparknlp/base/token_assembler.py +124 -0
  220. sparknlp/common/__init__.py +26 -0
  221. sparknlp/common/annotator_approach.py +41 -0
  222. sparknlp/common/annotator_model.py +47 -0
  223. sparknlp/common/annotator_properties.py +114 -0
  224. sparknlp/common/annotator_type.py +38 -0
  225. sparknlp/common/completion_post_processing.py +37 -0
  226. sparknlp/common/coverage_result.py +22 -0
  227. sparknlp/common/match_strategy.py +33 -0
  228. sparknlp/common/properties.py +1298 -0
  229. sparknlp/common/read_as.py +33 -0
  230. sparknlp/common/recursive_annotator_approach.py +35 -0
  231. sparknlp/common/storage.py +149 -0
  232. sparknlp/common/utils.py +39 -0
  233. sparknlp/functions.py +315 -5
  234. sparknlp/internal/__init__.py +1199 -0
  235. sparknlp/internal/annotator_java_ml.py +32 -0
  236. sparknlp/internal/annotator_transformer.py +37 -0
  237. sparknlp/internal/extended_java_wrapper.py +63 -0
  238. sparknlp/internal/params_getters_setters.py +71 -0
  239. sparknlp/internal/recursive.py +70 -0
  240. sparknlp/logging/__init__.py +15 -0
  241. sparknlp/logging/comet.py +467 -0
  242. sparknlp/partition/__init__.py +16 -0
  243. sparknlp/partition/partition.py +244 -0
  244. sparknlp/partition/partition_properties.py +902 -0
  245. sparknlp/partition/partition_transformer.py +200 -0
  246. sparknlp/pretrained/__init__.py +17 -0
  247. sparknlp/pretrained/pretrained_pipeline.py +158 -0
  248. sparknlp/pretrained/resource_downloader.py +216 -0
  249. sparknlp/pretrained/utils.py +35 -0
  250. sparknlp/reader/__init__.py +15 -0
  251. sparknlp/reader/enums.py +19 -0
  252. sparknlp/reader/pdf_to_text.py +190 -0
  253. sparknlp/reader/reader2doc.py +124 -0
  254. sparknlp/reader/reader2image.py +136 -0
  255. sparknlp/reader/reader2table.py +44 -0
  256. sparknlp/reader/reader_assembler.py +159 -0
  257. sparknlp/reader/sparknlp_reader.py +461 -0
  258. sparknlp/training/__init__.py +20 -0
  259. sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  260. sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
  261. sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  262. sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
  263. sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
  264. sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
  265. sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
  266. sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
  267. sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
  268. sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
  269. sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
  270. sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
  271. sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
  272. sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
  273. sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
  274. sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  275. sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
  276. sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  277. sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
  278. sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
  279. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
  280. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
  281. sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
  282. sparknlp/training/conll.py +150 -0
  283. sparknlp/training/conllu.py +103 -0
  284. sparknlp/training/pos.py +103 -0
  285. sparknlp/training/pub_tator.py +76 -0
  286. sparknlp/training/spacy_to_annotation.py +57 -0
  287. sparknlp/training/tfgraphs.py +5 -0
  288. sparknlp/upload_to_hub.py +149 -0
  289. sparknlp/util.py +51 -5
  290. com/__init__.pyc +0 -0
  291. com/__pycache__/__init__.cpython-36.pyc +0 -0
  292. com/johnsnowlabs/__init__.pyc +0 -0
  293. com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
  294. com/johnsnowlabs/nlp/__init__.pyc +0 -0
  295. com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
  296. spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
  297. spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
  298. sparknlp/__init__.pyc +0 -0
  299. sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
  300. sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
  301. sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
  302. sparknlp/__pycache__/base.cpython-36.pyc +0 -0
  303. sparknlp/__pycache__/common.cpython-36.pyc +0 -0
  304. sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
  305. sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
  306. sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
  307. sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
  308. sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
  309. sparknlp/__pycache__/training.cpython-36.pyc +0 -0
  310. sparknlp/__pycache__/util.cpython-36.pyc +0 -0
  311. sparknlp/annotation.pyc +0 -0
  312. sparknlp/annotator.py +0 -3006
  313. sparknlp/annotator.pyc +0 -0
  314. sparknlp/base.py +0 -347
  315. sparknlp/base.pyc +0 -0
  316. sparknlp/common.py +0 -193
  317. sparknlp/common.pyc +0 -0
  318. sparknlp/embeddings.py +0 -40
  319. sparknlp/embeddings.pyc +0 -0
  320. sparknlp/internal.py +0 -288
  321. sparknlp/internal.pyc +0 -0
  322. sparknlp/pretrained.py +0 -123
  323. sparknlp/pretrained.pyc +0 -0
  324. sparknlp/storage.py +0 -32
  325. sparknlp/storage.pyc +0 -0
  326. sparknlp/training.py +0 -62
  327. sparknlp/training.pyc +0 -0
  328. sparknlp/util.pyc +0 -0
  329. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,235 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for the DocumentNormalizer"""
15
+ from sparknlp.common import *
16
+
17
+
18
+ class DocumentNormalizer(AnnotatorModel):
19
+ """Annotator which normalizes raw text from tagged text, e.g. scraped web
20
+ pages or xml documents, from document type columns into Sentence.
21
+
22
+ Removes all dirty characters from text following one or more input regex
23
+ patterns. Can apply not wanted character removal with a specific policy.
24
+ Can apply lower case normalization.
25
+
26
+ For extended examples of usage, see the `Examples <https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/document-normalizer/document_normalizer_notebook.ipynb
27
+ >`__.
28
+
29
+ ====================== ======================
30
+ Input Annotation types Output Annotation type
31
+ ====================== ======================
32
+ ``DOCUMENT`` ``DOCUMENT``
33
+ ====================== ======================
34
+
35
+ Parameters
36
+ ----------
37
+ action
38
+ action to perform before applying regex patterns on text, by default
39
+ "clean"
40
+ patterns
41
+ normalization regex patterns which match will be removed from document,
42
+ by default ['<[^>]*>']
43
+ replacement
44
+ replacement string to apply when regexes match, by default " "
45
+ lowercase
46
+ whether to convert strings to lowercase, by default False
47
+ policy
48
+ policy to remove pattern from text, by default "pretty_all"
49
+ encoding
50
+ file encoding to apply on normalized documents, by default "UTF-8"
51
+
52
+ Examples
53
+ --------
54
+ >>> import sparknlp
55
+ >>> from sparknlp.base import *
56
+ >>> from sparknlp.annotator import *
57
+ >>> from pyspark.ml import Pipeline
58
+ >>> documentAssembler = DocumentAssembler() \\
59
+ ... .setInputCol("text") \\
60
+ ... .setOutputCol("document")
61
+ >>> cleanUpPatterns = ["<[^>]>"]
62
+ >>> documentNormalizer = DocumentNormalizer() \\
63
+ ... .setInputCols("document") \\
64
+ ... .setOutputCol("normalizedDocument") \\
65
+ ... .setAction("clean") \\
66
+ ... .setPatterns(cleanUpPatterns) \\
67
+ ... .setReplacement(" ") \\
68
+ ... .setPolicy("pretty_all") \\
69
+ ... .setLowercase(True)
70
+ >>> pipeline = Pipeline().setStages([
71
+ ... documentAssembler,
72
+ ... documentNormalizer
73
+ ... ])
74
+ >>> text = \"\"\"
75
+ ... <div id="theworldsgreatest" class='my-right my-hide-small my-wide toptext' style="font-family:'Segoe UI',Arial,sans-serif">
76
+ ... THE WORLD'S LARGEST WEB DEVELOPER SITE
77
+ ... <h1 style="font-size:300%;">THE WORLD'S LARGEST WEB DEVELOPER SITE</h1>
78
+ ... <p style="font-size:160%;">Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum..</p>
79
+ ... </div>
80
+ ... </div>\"\"\"
81
+ >>> data = spark.createDataFrame([[text]]).toDF("text")
82
+ >>> pipelineModel = pipeline.fit(data)
83
+ >>> result = pipelineModel.transform(data)
84
+ >>> result.selectExpr("normalizedDocument.result").show(truncate=False)
85
+ +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
86
+ |result |
87
+ +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
88
+ |[ the world's largest web developer site the world's largest web developer site lorem ipsum is simply dummy text of the printing and typesetting industry. lorem ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. it has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. it was popularised in the 1960s with the release of letraset sheets containing lorem ipsum passages, and more recently with desktop publishing software like aldus pagemaker including versions of lorem ipsum..]|
89
+ +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
90
+ """
91
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT]
92
+
93
+ outputAnnotatorType = AnnotatorType.DOCUMENT
94
+
95
+ action = Param(Params._dummy(),
96
+ "action",
97
+ "action to perform applying regex patterns on text",
98
+ typeConverter=TypeConverters.toString)
99
+
100
+ patterns = Param(Params._dummy(),
101
+ "patterns",
102
+ "normalization regex patterns which match will be removed from document. Defaults is <[^>]*>",
103
+ typeConverter=TypeConverters.toListString)
104
+
105
+ replacement = Param(Params._dummy(),
106
+ "replacement",
107
+ "replacement string to apply when regexes match",
108
+ typeConverter=TypeConverters.toString)
109
+
110
+ lowercase = Param(Params._dummy(),
111
+ "lowercase",
112
+ "whether to convert strings to lowercase",
113
+ typeConverter=TypeConverters.toBoolean)
114
+
115
+ policy = Param(Params._dummy(),
116
+ "policy",
117
+ "policy to remove pattern from text",
118
+ typeConverter=TypeConverters.toString)
119
+
120
+ encoding = Param(Params._dummy(),
121
+ "encoding",
122
+ "file encoding to apply on normalized documents",
123
+ typeConverter=TypeConverters.toString)
124
+
125
+ presetPattern = Param(
126
+ Params._dummy(),
127
+ "presetPattern",
128
+ "Selects a single text cleaning function from the functional presets (e.g., 'CLEAN_BULLETS', 'CLEAN_DASHES', etc.).",
129
+ typeConverter=TypeConverters.toString
130
+ )
131
+
132
+ autoMode = Param(
133
+ Params._dummy(),
134
+ "autoMode",
135
+ "Enables a predefined cleaning mode combining multiple text cleaner functions (e.g., 'light_clean', 'document_clean', 'html_clean', 'full_auto').",
136
+ typeConverter=TypeConverters.toString
137
+ )
138
+
139
+
140
+ @keyword_only
141
+ def __init__(self):
142
+ super(DocumentNormalizer, self).__init__(classname="com.johnsnowlabs.nlp.annotators.DocumentNormalizer")
143
+ self._setDefault(
144
+ action="clean",
145
+ patterns=["<[^>]*>"],
146
+ replacement=" ",
147
+ lowercase=False,
148
+ policy="pretty_all",
149
+ encoding="UTF-8"
150
+ )
151
+
152
+ def setAction(self, value):
153
+ """Sets action to perform before applying regex patterns on text, by
154
+ default "clean".
155
+
156
+ Parameters
157
+ ----------
158
+ value : str
159
+ Action to perform before applying regex patterns
160
+ """
161
+ return self._set(action=value)
162
+
163
+ def setPatterns(self, value):
164
+ """Sets normalization regex patterns which match will be removed from
165
+ document, by default ['<[^>]*>'].
166
+
167
+ Parameters
168
+ ----------
169
+ value : List[str]
170
+ Normalization regex patterns which match will be removed from
171
+ document
172
+ """
173
+ return self._set(patterns=value)
174
+
175
+ def setReplacement(self, value):
176
+ """Sets replacement string to apply when regexes match, by default " ".
177
+
178
+ Parameters
179
+ ----------
180
+ value : str
181
+ Replacement string to apply when regexes match
182
+ """
183
+ return self._set(replacement=value)
184
+
185
+ def setLowercase(self, value):
186
+ """Sets whether to convert strings to lowercase, by default False.
187
+
188
+ Parameters
189
+ ----------
190
+ value : bool
191
+ Whether to convert strings to lowercase, by default False
192
+ """
193
+ return self._set(lowercase=value)
194
+
195
+ def setPolicy(self, value):
196
+ """Sets policy to remove pattern from text, by default "pretty_all".
197
+
198
+ Parameters
199
+ ----------
200
+ value : str
201
+ Policy to remove pattern from text, by default "pretty_all"
202
+ """
203
+ return self._set(policy=value)
204
+
205
+ def setEncoding(self, value):
206
+ """Sets file encoding to apply on normalized documents, by default
207
+ "UTF-8".
208
+
209
+ Parameters
210
+ ----------
211
+ value : str
212
+ File encoding to apply on normalized documents, by default "UTF-8"
213
+ """
214
+ return self._set(encoding=value)
215
+
216
+ def setPresetPattern(self, value):
217
+ """Sets a single text cleaning preset pattern.
218
+
219
+ Parameters
220
+ ----------
221
+ value : str
222
+ Preset cleaning pattern name, e.g., 'CLEAN_BULLETS', 'CLEAN_DASHES'.
223
+ """
224
+ return self._set(presetPattern=value)
225
+
226
+
227
+ def setAutoMode(self, value):
228
+ """Sets an automatic text cleaning mode using predefined groups of cleaning functions.
229
+
230
+ Parameters
231
+ ----------
232
+ value : str
233
+ Auto cleaning mode, e.g., 'light_clean', 'document_clean', 'social_clean', 'html_clean', 'full_auto'.
234
+ """
235
+ return self._set(autoMode=value)
@@ -0,0 +1,175 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for the DocumentNormalizer"""
15
+ from sparknlp.common import *
16
+
17
+
18
+ class DocumentTokenSplitter(AnnotatorModel):
19
+ """Annotator that splits large documents into smaller documents based on the number of tokens in
20
+ the text.
21
+
22
+ Currently, DocumentTokenSplitter splits the text by whitespaces to create the tokens. The
23
+ number of these tokens will then be used as a measure of the text length. In the future, other
24
+ tokenization techniques will be supported.
25
+
26
+ For example, given 3 tokens and overlap 1:
27
+
28
+ .. code-block:: python
29
+
30
+ He was, I take it, the most perfect reasoning and observing machine that the world has seen.
31
+
32
+ ["He was, I", "I take it,", "it, the most", "most perfect reasoning", "reasoning and observing", "observing machine that", "that the world", "world has seen."]
33
+
34
+
35
+ Additionally, you can set
36
+
37
+ - whether to trim whitespaces with setTrimWhitespace
38
+ - whether to explode the splits to individual rows with setExplodeSplits
39
+
40
+ For extended examples of usage, see the
41
+ `DocumentTokenSplitterTest <https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/DocumentTokenSplitterTest.scala>`__.
42
+
43
+ ====================== ======================
44
+ Input Annotation types Output Annotation type
45
+ ====================== ======================
46
+ ``DOCUMENT`` ``DOCUMENT``
47
+ ====================== ======================
48
+
49
+ Parameters
50
+ ----------
51
+
52
+ numTokens
53
+ Limit of the number of tokens in a text
54
+ tokenOverlap
55
+ Length of the token overlap between text chunks, by default `0`.
56
+ explodeSplits
57
+ Whether to explode split chunks to separate rows, by default `False`.
58
+ trimWhitespace
59
+ Whether to trim whitespaces of extracted chunks, by default `True`.
60
+
61
+ Examples
62
+ --------
63
+ >>> import sparknlp
64
+ >>> from sparknlp.base import *
65
+ >>> from sparknlp.annotator import *
66
+ >>> from pyspark.ml import Pipeline
67
+ >>> textDF = spark.read.text(
68
+ ... "sherlockholmes.txt",
69
+ ... wholetext=True
70
+ ... ).toDF("text")
71
+ >>> documentAssembler = DocumentAssembler().setInputCol("text")
72
+ >>> textSplitter = DocumentTokenSplitter() \\
73
+ ... .setInputCols(["document"]) \\
74
+ ... .setOutputCol("splits") \\
75
+ ... .setNumTokens(512) \\
76
+ ... .setTokenOverlap(10) \\
77
+ ... .setExplodeSplits(True)
78
+ >>> pipeline = Pipeline().setStages([documentAssembler, textSplitter])
79
+ >>> result = pipeline.fit(textDF).transform(textDF)
80
+ >>> result.selectExpr(
81
+ ... "splits.result as result",
82
+ ... "splits[0].begin as begin",
83
+ ... "splits[0].end as end",
84
+ ... "splits[0].end - splits[0].begin as length",
85
+ ... "splits[0].metadata.numTokens as tokens") \\
86
+ ... .show(8, truncate = 80)
87
+ +--------------------------------------------------------------------------------+-----+-----+------+------+
88
+ | result|begin| end|length|tokens|
89
+ +--------------------------------------------------------------------------------+-----+-----+------+------+
90
+ |[ Project Gutenberg's The Adventures of Sherlock Holmes, by Arthur Conan Doyl...| 0| 3018| 3018| 512|
91
+ |[study of crime, and occupied his\\nimmense faculties and extraordinary powers...| 2950| 5707| 2757| 512|
92
+ |[but as I have changed my clothes I can't imagine how you\\ndeduce it. As to M...| 5659| 8483| 2824| 512|
93
+ |[quarters received. Be in your chamber then at that hour, and do\\nnot take it...| 8427|11241| 2814| 512|
94
+ |[a pity\\nto miss it."\\n\\n"But your client--"\\n\\n"Never mind him. I may want y...|11188|13970| 2782| 512|
95
+ |[person who employs me wishes his agent to be unknown to\\nyou, and I may conf...|13918|16898| 2980| 512|
96
+ |[letters back."\\n\\n"Precisely so. But how--"\\n\\n"Was there a secret marriage?...|16836|19744| 2908| 512|
97
+ |[seven hundred in\\nnotes," he said.\\n\\nHolmes scribbled a receipt upon a shee...|19683|22551| 2868| 512|
98
+ +--------------------------------------------------------------------------------+-----+-----+------+------+
99
+
100
+ """
101
+
102
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT]
103
+
104
+ outputAnnotatorType = AnnotatorType.DOCUMENT
105
+
106
+ numTokens = Param(Params._dummy(),
107
+ "numTokens",
108
+ "Limit of the number of tokens in a text",
109
+ typeConverter=TypeConverters.toInt)
110
+ tokenOverlap = Param(Params._dummy(),
111
+ "tokenOverlap",
112
+ "Length of the token overlap between text chunks",
113
+ typeConverter=TypeConverters.toInt)
114
+ explodeSplits = Param(Params._dummy(),
115
+ "explodeSplits",
116
+ "Whether to explode split chunks to separate rows",
117
+ typeConverter=TypeConverters.toBoolean)
118
+ trimWhitespace = Param(Params._dummy(),
119
+ "trimWhitespace",
120
+ "Whether to trim whitespaces of extracted chunks",
121
+ typeConverter=TypeConverters.toBoolean)
122
+
123
+ @keyword_only
124
+ def __init__(self):
125
+ super(DocumentTokenSplitter, self).__init__(
126
+ classname="com.johnsnowlabs.nlp.annotators.DocumentTokenSplitter")
127
+ self._setDefault(
128
+ tokenOverlap=0,
129
+ explodeSplits=False,
130
+ trimWhitespace=True
131
+ )
132
+
133
+ def setNumTokens(self, value):
134
+ """Sets the limit of the number of tokens in a text
135
+
136
+ Parameters
137
+ ----------
138
+ value : int
139
+ Number of tokens in a text
140
+ """
141
+ if value < 1:
142
+ raise ValueError("Number of tokens should be larger than 0.")
143
+ return self._set(numTokens=value)
144
+
145
+ def setTokenOverlap(self, value):
146
+ """Length of the token overlap between text chunks, by default `0`.
147
+
148
+ Parameters
149
+ ----------
150
+ value : int
151
+ Length of the token overlap between text chunks
152
+ """
153
+ if value > self.getOrDefault(self.numTokens):
154
+ raise ValueError("Token overlap can't be larger than number of tokens.")
155
+ return self._set(tokenOverlap=value)
156
+
157
+ def setExplodeSplits(self, value):
158
+ """Sets whether to explode split chunks to separate rows, by default `False`.
159
+
160
+ Parameters
161
+ ----------
162
+ value : bool
163
+ Whether to explode split chunks to separate rows
164
+ """
165
+ return self._set(explodeSplits=value)
166
+
167
+ def setTrimWhitespace(self, value):
168
+ """Sets whether to trim whitespaces of extracted chunks, by default `True`.
169
+
170
+ Parameters
171
+ ----------
172
+ value : bool
173
+ Whether to trim whitespaces of extracted chunks
174
+ """
175
+ return self._set(trimWhitespace=value)
@@ -0,0 +1,85 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import unittest
15
+
16
+ import pytest
17
+
18
+ from sparknlp.annotator import *
19
+ from sparknlp.base import *
20
+ from test.util import SparkSessionForTest
21
+
22
+
23
+ @pytest.mark.fast
24
+ class DocumentTokenSplitterTestSpec(unittest.TestCase):
25
+ def setUp(self):
26
+ self.data = SparkSessionForTest.spark.createDataFrame(
27
+ [
28
+ [
29
+ (
30
+ "All emotions, and that\none particularly, were abhorrent to his cold, precise"
31
+ " but\nadmirably balanced mind.\n\nHe was, I take it, the most perfect\nreasoning"
32
+ " and observing machine that the world has seen."
33
+ )
34
+ ]
35
+ ]
36
+ ).toDF("text")
37
+
38
+ def test_run(self):
39
+ df = self.data
40
+
41
+ document_assembler = (
42
+ DocumentAssembler().setInputCol("text").setOutputCol("document")
43
+ )
44
+
45
+ document_token_splitter = (
46
+ DocumentTokenSplitter()
47
+ .setInputCols("document")
48
+ .setOutputCol("splits")
49
+ .setNumTokens(3)
50
+ .setTokenOverlap(1)
51
+ .setExplodeSplits(True)
52
+ .setTrimWhitespace(True)
53
+ )
54
+
55
+ pipeline = Pipeline().setStages([document_assembler, document_token_splitter])
56
+
57
+ pipeline_df = pipeline.fit(df).transform(df)
58
+
59
+ results = pipeline_df.select("splits").collect()
60
+
61
+ splits = [
62
+ row["splits"][0].result.replace("\n\n", " ").replace("\n", " ")
63
+ for row in results
64
+ ]
65
+
66
+ expected = [
67
+ "All emotions, and",
68
+ "and that one",
69
+ "one particularly, were",
70
+ "were abhorrent to",
71
+ "to his cold,",
72
+ "cold, precise but",
73
+ "but admirably balanced",
74
+ "balanced mind. He",
75
+ "He was, I",
76
+ "I take it,",
77
+ "it, the most",
78
+ "most perfect reasoning",
79
+ "reasoning and observing",
80
+ "observing machine that",
81
+ "that the world",
82
+ "world has seen.",
83
+ ]
84
+
85
+ assert splits == expected
@@ -0,0 +1,45 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Module of annotators for text embeddings."""
16
+ from sparknlp.annotator.embeddings.albert_embeddings import *
17
+ from sparknlp.annotator.embeddings.bert_embeddings import *
18
+ from sparknlp.annotator.embeddings.bert_sentence_embeddings import *
19
+ from sparknlp.annotator.embeddings.camembert_embeddings import *
20
+ from sparknlp.annotator.embeddings.chunk_embeddings import *
21
+ from sparknlp.annotator.embeddings.deberta_embeddings import *
22
+ from sparknlp.annotator.embeddings.distil_bert_embeddings import *
23
+ from sparknlp.annotator.embeddings.doc2vec import *
24
+ from sparknlp.annotator.embeddings.elmo_embeddings import *
25
+ from sparknlp.annotator.embeddings.e5_embeddings import *
26
+ from sparknlp.annotator.embeddings.instructor_embeddings import *
27
+ from sparknlp.annotator.embeddings.longformer_embeddings import *
28
+ from sparknlp.annotator.embeddings.minilm_embeddings import *
29
+ from sparknlp.annotator.embeddings.mpnet_embeddings import *
30
+ from sparknlp.annotator.embeddings.roberta_embeddings import *
31
+ from sparknlp.annotator.embeddings.roberta_sentence_embeddings import *
32
+ from sparknlp.annotator.embeddings.sentence_embeddings import *
33
+ from sparknlp.annotator.embeddings.universal_sentence_encoder import *
34
+ from sparknlp.annotator.embeddings.word2vec import *
35
+ from sparknlp.annotator.embeddings.word_embeddings import *
36
+ from sparknlp.annotator.embeddings.xlm_roberta_embeddings import *
37
+ from sparknlp.annotator.embeddings.xlm_roberta_sentence_embeddings import *
38
+ from sparknlp.annotator.embeddings.xlnet_embeddings import *
39
+ from sparknlp.annotator.embeddings.bge_embeddings import *
40
+ from sparknlp.annotator.embeddings.uae_embeddings import *
41
+ from sparknlp.annotator.embeddings.mxbai_embeddings import *
42
+ from sparknlp.annotator.embeddings.snowflake_embeddings import *
43
+ from sparknlp.annotator.embeddings.nomic_embeddings import *
44
+ from sparknlp.annotator.embeddings.auto_gguf_embeddings import *
45
+ from sparknlp.annotator.embeddings.e5v_embeddings import *