spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. com/johnsnowlabs/nlp/__init__.py +4 -2
  4. spark_nlp-6.2.1.dist-info/METADATA +362 -0
  5. spark_nlp-6.2.1.dist-info/RECORD +292 -0
  6. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
  7. sparknlp/__init__.py +281 -27
  8. sparknlp/annotation.py +137 -6
  9. sparknlp/annotation_audio.py +61 -0
  10. sparknlp/annotation_image.py +82 -0
  11. sparknlp/annotator/__init__.py +93 -0
  12. sparknlp/annotator/audio/__init__.py +16 -0
  13. sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
  14. sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
  15. sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
  16. sparknlp/annotator/chunk2_doc.py +85 -0
  17. sparknlp/annotator/chunker.py +137 -0
  18. sparknlp/annotator/classifier_dl/__init__.py +61 -0
  19. sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  20. sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
  21. sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
  22. sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
  23. sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  24. sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  25. sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
  26. sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
  27. sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
  28. sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
  29. sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
  30. sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
  31. sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
  32. sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
  33. sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  34. sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
  35. sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
  36. sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
  37. sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
  38. sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
  39. sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
  40. sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
  41. sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
  42. sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
  43. sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  44. sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
  45. sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
  46. sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
  47. sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
  48. sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
  49. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  50. sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
  51. sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  52. sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
  53. sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
  54. sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
  55. sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
  56. sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
  57. sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
  58. sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  59. sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
  60. sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
  61. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
  62. sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
  63. sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
  64. sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
  65. sparknlp/annotator/cleaners/__init__.py +15 -0
  66. sparknlp/annotator/cleaners/cleaner.py +202 -0
  67. sparknlp/annotator/cleaners/extractor.py +191 -0
  68. sparknlp/annotator/coref/__init__.py +1 -0
  69. sparknlp/annotator/coref/spanbert_coref.py +221 -0
  70. sparknlp/annotator/cv/__init__.py +29 -0
  71. sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
  72. sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
  73. sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
  74. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  75. sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
  76. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  77. sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
  78. sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  79. sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  80. sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  81. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  82. sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  83. sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
  84. sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
  85. sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
  86. sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
  87. sparknlp/annotator/dataframe_optimizer.py +216 -0
  88. sparknlp/annotator/date2_chunk.py +88 -0
  89. sparknlp/annotator/dependency/__init__.py +17 -0
  90. sparknlp/annotator/dependency/dependency_parser.py +294 -0
  91. sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
  92. sparknlp/annotator/document_character_text_splitter.py +228 -0
  93. sparknlp/annotator/document_normalizer.py +235 -0
  94. sparknlp/annotator/document_token_splitter.py +175 -0
  95. sparknlp/annotator/document_token_splitter_test.py +85 -0
  96. sparknlp/annotator/embeddings/__init__.py +45 -0
  97. sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
  98. sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
  99. sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
  100. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
  101. sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
  102. sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
  103. sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
  104. sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
  105. sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
  106. sparknlp/annotator/embeddings/doc2vec.py +352 -0
  107. sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
  108. sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
  109. sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
  110. sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
  111. sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
  112. sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
  113. sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
  114. sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  115. sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  116. sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
  117. sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
  118. sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
  119. sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  120. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  121. sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
  122. sparknlp/annotator/embeddings/word2vec.py +353 -0
  123. sparknlp/annotator/embeddings/word_embeddings.py +385 -0
  124. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
  125. sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
  126. sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
  127. sparknlp/annotator/er/__init__.py +16 -0
  128. sparknlp/annotator/er/entity_ruler.py +267 -0
  129. sparknlp/annotator/graph_extraction.py +368 -0
  130. sparknlp/annotator/keyword_extraction/__init__.py +16 -0
  131. sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
  132. sparknlp/annotator/ld_dl/__init__.py +16 -0
  133. sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
  134. sparknlp/annotator/lemmatizer.py +250 -0
  135. sparknlp/annotator/matcher/__init__.py +20 -0
  136. sparknlp/annotator/matcher/big_text_matcher.py +272 -0
  137. sparknlp/annotator/matcher/date_matcher.py +303 -0
  138. sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
  139. sparknlp/annotator/matcher/regex_matcher.py +221 -0
  140. sparknlp/annotator/matcher/text_matcher.py +290 -0
  141. sparknlp/annotator/n_gram_generator.py +141 -0
  142. sparknlp/annotator/ner/__init__.py +21 -0
  143. sparknlp/annotator/ner/ner_approach.py +94 -0
  144. sparknlp/annotator/ner/ner_converter.py +148 -0
  145. sparknlp/annotator/ner/ner_crf.py +397 -0
  146. sparknlp/annotator/ner/ner_dl.py +591 -0
  147. sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
  148. sparknlp/annotator/ner/ner_overwriter.py +166 -0
  149. sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
  150. sparknlp/annotator/normalizer.py +230 -0
  151. sparknlp/annotator/openai/__init__.py +16 -0
  152. sparknlp/annotator/openai/openai_completion.py +349 -0
  153. sparknlp/annotator/openai/openai_embeddings.py +106 -0
  154. sparknlp/annotator/param/__init__.py +17 -0
  155. sparknlp/annotator/param/classifier_encoder.py +98 -0
  156. sparknlp/annotator/param/evaluation_dl_params.py +130 -0
  157. sparknlp/annotator/pos/__init__.py +16 -0
  158. sparknlp/annotator/pos/perceptron.py +263 -0
  159. sparknlp/annotator/sentence/__init__.py +17 -0
  160. sparknlp/annotator/sentence/sentence_detector.py +290 -0
  161. sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
  162. sparknlp/annotator/sentiment/__init__.py +17 -0
  163. sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
  164. sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
  165. sparknlp/annotator/seq2seq/__init__.py +35 -0
  166. sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
  167. sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
  168. sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
  169. sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
  170. sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  171. sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  172. sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
  173. sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
  174. sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  175. sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
  176. sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
  177. sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
  178. sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  179. sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  180. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  181. sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  182. sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  183. sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
  184. sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  185. sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
  186. sparknlp/annotator/similarity/__init__.py +0 -0
  187. sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
  188. sparknlp/annotator/spell_check/__init__.py +18 -0
  189. sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
  190. sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
  191. sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
  192. sparknlp/annotator/stemmer.py +79 -0
  193. sparknlp/annotator/stop_words_cleaner.py +190 -0
  194. sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
  195. sparknlp/annotator/token/__init__.py +19 -0
  196. sparknlp/annotator/token/chunk_tokenizer.py +118 -0
  197. sparknlp/annotator/token/recursive_tokenizer.py +205 -0
  198. sparknlp/annotator/token/regex_tokenizer.py +208 -0
  199. sparknlp/annotator/token/tokenizer.py +561 -0
  200. sparknlp/annotator/token2_chunk.py +76 -0
  201. sparknlp/annotator/ws/__init__.py +16 -0
  202. sparknlp/annotator/ws/word_segmenter.py +429 -0
  203. sparknlp/base/__init__.py +30 -0
  204. sparknlp/base/audio_assembler.py +95 -0
  205. sparknlp/base/doc2_chunk.py +169 -0
  206. sparknlp/base/document_assembler.py +164 -0
  207. sparknlp/base/embeddings_finisher.py +201 -0
  208. sparknlp/base/finisher.py +217 -0
  209. sparknlp/base/gguf_ranking_finisher.py +234 -0
  210. sparknlp/base/graph_finisher.py +125 -0
  211. sparknlp/base/has_recursive_fit.py +24 -0
  212. sparknlp/base/has_recursive_transform.py +22 -0
  213. sparknlp/base/image_assembler.py +172 -0
  214. sparknlp/base/light_pipeline.py +429 -0
  215. sparknlp/base/multi_document_assembler.py +164 -0
  216. sparknlp/base/prompt_assembler.py +207 -0
  217. sparknlp/base/recursive_pipeline.py +107 -0
  218. sparknlp/base/table_assembler.py +145 -0
  219. sparknlp/base/token_assembler.py +124 -0
  220. sparknlp/common/__init__.py +26 -0
  221. sparknlp/common/annotator_approach.py +41 -0
  222. sparknlp/common/annotator_model.py +47 -0
  223. sparknlp/common/annotator_properties.py +114 -0
  224. sparknlp/common/annotator_type.py +38 -0
  225. sparknlp/common/completion_post_processing.py +37 -0
  226. sparknlp/common/coverage_result.py +22 -0
  227. sparknlp/common/match_strategy.py +33 -0
  228. sparknlp/common/properties.py +1298 -0
  229. sparknlp/common/read_as.py +33 -0
  230. sparknlp/common/recursive_annotator_approach.py +35 -0
  231. sparknlp/common/storage.py +149 -0
  232. sparknlp/common/utils.py +39 -0
  233. sparknlp/functions.py +315 -5
  234. sparknlp/internal/__init__.py +1199 -0
  235. sparknlp/internal/annotator_java_ml.py +32 -0
  236. sparknlp/internal/annotator_transformer.py +37 -0
  237. sparknlp/internal/extended_java_wrapper.py +63 -0
  238. sparknlp/internal/params_getters_setters.py +71 -0
  239. sparknlp/internal/recursive.py +70 -0
  240. sparknlp/logging/__init__.py +15 -0
  241. sparknlp/logging/comet.py +467 -0
  242. sparknlp/partition/__init__.py +16 -0
  243. sparknlp/partition/partition.py +244 -0
  244. sparknlp/partition/partition_properties.py +902 -0
  245. sparknlp/partition/partition_transformer.py +200 -0
  246. sparknlp/pretrained/__init__.py +17 -0
  247. sparknlp/pretrained/pretrained_pipeline.py +158 -0
  248. sparknlp/pretrained/resource_downloader.py +216 -0
  249. sparknlp/pretrained/utils.py +35 -0
  250. sparknlp/reader/__init__.py +15 -0
  251. sparknlp/reader/enums.py +19 -0
  252. sparknlp/reader/pdf_to_text.py +190 -0
  253. sparknlp/reader/reader2doc.py +124 -0
  254. sparknlp/reader/reader2image.py +136 -0
  255. sparknlp/reader/reader2table.py +44 -0
  256. sparknlp/reader/reader_assembler.py +159 -0
  257. sparknlp/reader/sparknlp_reader.py +461 -0
  258. sparknlp/training/__init__.py +20 -0
  259. sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  260. sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
  261. sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  262. sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
  263. sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
  264. sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
  265. sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
  266. sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
  267. sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
  268. sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
  269. sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
  270. sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
  271. sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
  272. sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
  273. sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
  274. sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  275. sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
  276. sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  277. sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
  278. sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
  279. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
  280. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
  281. sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
  282. sparknlp/training/conll.py +150 -0
  283. sparknlp/training/conllu.py +103 -0
  284. sparknlp/training/pos.py +103 -0
  285. sparknlp/training/pub_tator.py +76 -0
  286. sparknlp/training/spacy_to_annotation.py +57 -0
  287. sparknlp/training/tfgraphs.py +5 -0
  288. sparknlp/upload_to_hub.py +149 -0
  289. sparknlp/util.py +51 -5
  290. com/__init__.pyc +0 -0
  291. com/__pycache__/__init__.cpython-36.pyc +0 -0
  292. com/johnsnowlabs/__init__.pyc +0 -0
  293. com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
  294. com/johnsnowlabs/nlp/__init__.pyc +0 -0
  295. com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
  296. spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
  297. spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
  298. sparknlp/__init__.pyc +0 -0
  299. sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
  300. sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
  301. sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
  302. sparknlp/__pycache__/base.cpython-36.pyc +0 -0
  303. sparknlp/__pycache__/common.cpython-36.pyc +0 -0
  304. sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
  305. sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
  306. sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
  307. sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
  308. sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
  309. sparknlp/__pycache__/training.cpython-36.pyc +0 -0
  310. sparknlp/__pycache__/util.cpython-36.pyc +0 -0
  311. sparknlp/annotation.pyc +0 -0
  312. sparknlp/annotator.py +0 -3006
  313. sparknlp/annotator.pyc +0 -0
  314. sparknlp/base.py +0 -347
  315. sparknlp/base.pyc +0 -0
  316. sparknlp/common.py +0 -193
  317. sparknlp/common.pyc +0 -0
  318. sparknlp/embeddings.py +0 -40
  319. sparknlp/embeddings.pyc +0 -0
  320. sparknlp/internal.py +0 -288
  321. sparknlp/internal.pyc +0 -0
  322. sparknlp/pretrained.py +0 -123
  323. sparknlp/pretrained.pyc +0 -0
  324. sparknlp/storage.py +0 -32
  325. sparknlp/storage.pyc +0 -0
  326. sparknlp/training.py +0 -62
  327. sparknlp/training.pyc +0 -0
  328. sparknlp/util.pyc +0 -0
  329. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,173 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for XlmRoBertaForTokenClassification."""
15
+
16
+ from sparknlp.common import *
17
+
18
+
19
+ class XlmRoBertaForTokenClassification(AnnotatorModel,
20
+ HasCaseSensitiveProperties,
21
+ HasBatchedAnnotate,
22
+ HasEngine,
23
+ HasMaxSentenceLengthLimit):
24
+ """XlmRoBertaForTokenClassification can load XLM-RoBERTa Models with a token
25
+ classification head on top (a linear layer on top of the hidden-states
26
+ output) e.g. for Named-Entity-Recognition (NER) tasks.
27
+
28
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
29
+ object:
30
+
31
+ >>> token_classifier = XlmRoBertaForTokenClassification.pretrained() \\
32
+ ... .setInputCols(["token", "document"]) \\
33
+ ... .setOutputCol("label")
34
+ The default model is ``"mpnet_base_token_classifier"``, if no
35
+ name is provided.
36
+
37
+ For available pretrained models please see the `Models Hub
38
+ <https://sparknlp.org/models?task=Named+Entity+Recognition>`__.
39
+ To see which models are compatible and how to import them see
40
+ `Import Transformers into Spark NLP 🚀
41
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
42
+
43
+ ====================== ======================
44
+ Input Annotation types Output Annotation type
45
+ ====================== ======================
46
+ ``DOCUMENT, TOKEN`` ``NAMED_ENTITY``
47
+ ====================== ======================
48
+
49
+ Parameters
50
+ ----------
51
+ batchSize
52
+ Batch size. Large values allows faster processing but requires more
53
+ memory, by default 8
54
+ caseSensitive
55
+ Whether to ignore case in tokens for embeddings matching, by default
56
+ True
57
+ configProtoBytes
58
+ ConfigProto from tensorflow, serialized into byte array.
59
+ maxSentenceLength
60
+ Max sentence length to process, by default 128
61
+
62
+ Examples
63
+ --------
64
+ >>> import sparknlp
65
+ >>> from sparknlp.base import *
66
+ >>> from sparknlp.annotator import *
67
+ >>> from pyspark.ml import Pipeline
68
+ >>> documentAssembler = DocumentAssembler() \\
69
+ ... .setInputCol("text") \\
70
+ ... .setOutputCol("document")
71
+ >>> tokenizer = Tokenizer() \\
72
+ ... .setInputCols(["document"]) \\
73
+ ... .setOutputCol("token")
74
+ >>> tokenClassifier = XlmRoBertaForTokenClassification.pretrained() \\
75
+ ... .setInputCols(["token", "document"]) \\
76
+ ... .setOutputCol("label") \\
77
+ ... .setCaseSensitive(True)
78
+ >>> pipeline = Pipeline().setStages([
79
+ ... documentAssembler,
80
+ ... tokenizer,
81
+ ... tokenClassifier
82
+ ... ])
83
+ >>> data = spark.createDataFrame([["John Lenon was born in London and lived in Paris. My name is Sarah and I live in London"]]).toDF("text")
84
+ >>> result = pipeline.fit(data).transform(data)
85
+ >>> result.select("label.result").show(truncate=False)
86
+ +------------------------------------------------------------------------------------+
87
+ |result |
88
+ +------------------------------------------------------------------------------------+
89
+ |[B-PER, I-PER, O, O, O, B-LOC, O, O, O, B-LOC, O, O, O, O, B-PER, O, O, O, O, B-LOC]|
90
+ +------------------------------------------------------------------------------------+
91
+ """
92
+ name = "XlmRoBertaForTokenClassification"
93
+
94
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]
95
+
96
+ outputAnnotatorType = AnnotatorType.NAMED_ENTITY
97
+
98
+ configProtoBytes = Param(Params._dummy(),
99
+ "configProtoBytes",
100
+ "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
101
+ TypeConverters.toListInt)
102
+
103
+ def getClasses(self):
104
+ """
105
+ Returns labels used to train this model
106
+ """
107
+ return self._call_java("getClasses")
108
+
109
+ def setConfigProtoBytes(self, b):
110
+ """Sets configProto from tensorflow, serialized into byte array.
111
+
112
+ Parameters
113
+ ----------
114
+ b : List[int]
115
+ ConfigProto from tensorflow, serialized into byte array
116
+ """
117
+ return self._set(configProtoBytes=b)
118
+
119
+ @keyword_only
120
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.XlmRoBertaForTokenClassification",
121
+ java_model=None):
122
+ super(XlmRoBertaForTokenClassification, self).__init__(
123
+ classname=classname,
124
+ java_model=java_model
125
+ )
126
+ self._setDefault(
127
+ batchSize=8,
128
+ maxSentenceLength=128,
129
+ caseSensitive=True
130
+ )
131
+
132
+ @staticmethod
133
+ def loadSavedModel(folder, spark_session):
134
+ """Loads a locally saved model.
135
+
136
+ Parameters
137
+ ----------
138
+ folder : str
139
+ Folder of the saved model
140
+ spark_session : pyspark.sql.SparkSession
141
+ The current SparkSession
142
+
143
+ Returns
144
+ -------
145
+ XlmRoBertaForTokenClassification
146
+ The restored model
147
+ """
148
+ from sparknlp.internal import _XlmRoBertaTokenClassifierLoader
149
+ jModel = _XlmRoBertaTokenClassifierLoader(folder, spark_session._jsparkSession)._java_obj
150
+ return XlmRoBertaForTokenClassification(java_model=jModel)
151
+
152
+ @staticmethod
153
+ def pretrained(name="mpnet_base_token_classifier", lang="en", remote_loc=None):
154
+ """Downloads and loads a pretrained model.
155
+
156
+ Parameters
157
+ ----------
158
+ name : str, optional
159
+ Name of the pretrained model, by default
160
+ "mpnet_base_token_classifier"
161
+ lang : str, optional
162
+ Language of the pretrained model, by default "en"
163
+ remote_loc : str, optional
164
+ Optional remote address of the resource, by default None. Will use
165
+ Spark NLPs repositories otherwise.
166
+
167
+ Returns
168
+ -------
169
+ XlmRoBertaForTokenClassification
170
+ The restored model
171
+ """
172
+ from sparknlp.pretrained import ResourceDownloader
173
+ return ResourceDownloader.downloadModel(XlmRoBertaForTokenClassification, name, lang, remote_loc)
@@ -0,0 +1,225 @@
1
+ # Copyright 2017-2023 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for XlmRoBertaForZeroShotClassification."""
15
+
16
+ from sparknlp.common import *
17
+
18
+
19
+ class XlmRoBertaForZeroShotClassification(AnnotatorModel,
20
+ HasCaseSensitiveProperties,
21
+ HasBatchedAnnotate,
22
+ HasClassifierActivationProperties,
23
+ HasCandidateLabelsProperties,
24
+ HasEngine):
25
+ """XlmRoBertaForZeroShotClassification using a `ModelForSequenceClassification` trained on NLI (natural language
26
+ inference) tasks. Equivalent of `XlmRoBertaForSequenceClassification` models, but these models don't require a hardcoded
27
+ number of potential classes, they can be chosen at runtime. It usually means it's slower but it is much more
28
+ flexible.
29
+
30
+ Note that the model will loop through all provided labels. So the more labels you have, the
31
+ longer this process will take.
32
+
33
+ Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
34
+ pair and passed to the pretrained model.
35
+
36
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
37
+ object:
38
+
39
+ >>> sequenceClassifier = XlmRoBertaForZeroShotClassification.pretrained() \\
40
+ ... .setInputCols(["token", "document"]) \\
41
+ ... .setOutputCol("label")
42
+
43
+ The default model is ``"xlm_roberta_large_zero_shot_classifier_xnli_anli"``, if no name is
44
+ provided.
45
+
46
+ For available pretrained models please see the `Models Hub
47
+ <https://sparknlp.orgtask=Text+Classification>`__.
48
+
49
+ To see which models are compatible and how to import them see
50
+ `Import Transformers into Spark NLP 🚀
51
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
52
+
53
+ ====================== ======================
54
+ Input Annotation types Output Annotation type
55
+ ====================== ======================
56
+ ``DOCUMENT, TOKEN`` ``CATEGORY``
57
+ ====================== ======================
58
+
59
+ Parameters
60
+ ----------
61
+ batchSize
62
+ Batch size. Large values allows faster processing but requires more
63
+ memory, by default 8
64
+ caseSensitive
65
+ Whether to ignore case in tokens for embeddings matching, by default
66
+ True
67
+ configProtoBytes
68
+ ConfigProto from tensorflow, serialized into byte array.
69
+ maxSentenceLength
70
+ Max sentence length to process, by default 128
71
+ coalesceSentences
72
+ Instead of 1 class per sentence (if inputCols is `sentence`) output 1
73
+ class per document by averaging probabilities in all sentences, by
74
+ default False
75
+ activation
76
+ Whether to calculate logits via Softmax or Sigmoid, by default
77
+ `"softmax"`.
78
+
79
+ Examples
80
+ --------
81
+ >>> import sparknlp
82
+ >>> from sparknlp.base import *
83
+ >>> from sparknlp.annotator import *
84
+ >>> from pyspark.ml import Pipeline
85
+ >>> documentAssembler = DocumentAssembler() \\
86
+ ... .setInputCol("text") \\
87
+ ... .setOutputCol("document")
88
+ >>> tokenizer = Tokenizer() \\
89
+ ... .setInputCols(["document"]) \\
90
+ ... .setOutputCol("token")
91
+ >>> sequenceClassifier = XlmRoBertaForZeroShotClassification.pretrained() \\
92
+ ... .setInputCols(["token", "document"]) \\
93
+ ... .setOutputCol("label") \\
94
+ ... .setCaseSensitive(True)
95
+ >>> pipeline = Pipeline().setStages([
96
+ ... documentAssembler,
97
+ ... tokenizer,
98
+ ... sequenceClassifier
99
+ ... ])
100
+ >>> data = spark.createDataFrame([["I loved this movie when I was a child.", "It was pretty boring."]]).toDF("text")
101
+ >>> result = pipeline.fit(data).transform(data)
102
+ >>> result.select("label.result").show(truncate=False)
103
+ +------+
104
+ |result|
105
+ +------+
106
+ |[pos] |
107
+ |[neg] |
108
+ +------+
109
+ """
110
+ name = "XlmRoBertaForZeroShotClassification"
111
+
112
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]
113
+
114
+ outputAnnotatorType = AnnotatorType.CATEGORY
115
+
116
+ maxSentenceLength = Param(Params._dummy(),
117
+ "maxSentenceLength",
118
+ "Max sentence length to process",
119
+ typeConverter=TypeConverters.toInt)
120
+
121
+ configProtoBytes = Param(Params._dummy(),
122
+ "configProtoBytes",
123
+ "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
124
+ TypeConverters.toListInt)
125
+
126
+ coalesceSentences = Param(Params._dummy(), "coalesceSentences",
127
+ "Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging probabilities in all sentences.",
128
+ TypeConverters.toBoolean)
129
+
130
+ def getClasses(self):
131
+ """
132
+ Returns labels used to train this model
133
+ """
134
+ return self._call_java("getClasses")
135
+
136
+ def setConfigProtoBytes(self, b):
137
+ """Sets configProto from tensorflow, serialized into byte array.
138
+
139
+ Parameters
140
+ ----------
141
+ b : List[int]
142
+ ConfigProto from tensorflow, serialized into byte array
143
+ """
144
+ return self._set(configProtoBytes=b)
145
+
146
+ def setMaxSentenceLength(self, value):
147
+ """Sets max sentence length to process, by default 128.
148
+
149
+ Parameters
150
+ ----------
151
+ value : int
152
+ Max sentence length to process
153
+ """
154
+ return self._set(maxSentenceLength=value)
155
+
156
+ def setCoalesceSentences(self, value):
157
+ """Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging
158
+ probabilities in all sentences. Due to max sequence length limit in almost all transformer models such as XlmRoBerta
159
+ (512 tokens), this parameter helps to feed all the sentences into the model and averaging all the probabilities
160
+ for the entire document instead of probabilities per sentence. (Default: true)
161
+
162
+ Parameters
163
+ ----------
164
+ value : bool
165
+ If the output of all sentences will be averaged to one output
166
+ """
167
+ return self._set(coalesceSentences=value)
168
+
169
+ @keyword_only
170
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.XlmRoBertaForZeroShotClassification",
171
+ java_model=None):
172
+ super(XlmRoBertaForZeroShotClassification, self).__init__(
173
+ classname=classname,
174
+ java_model=java_model
175
+ )
176
+ self._setDefault(
177
+ batchSize=8,
178
+ maxSentenceLength=128,
179
+ caseSensitive=True,
180
+ coalesceSentences=False,
181
+ activation="softmax"
182
+ )
183
+
184
+ @staticmethod
185
+ def loadSavedModel(folder, spark_session):
186
+ """Loads a locally saved model.
187
+
188
+ Parameters
189
+ ----------
190
+ folder : str
191
+ Folder of the saved model
192
+ spark_session : pyspark.sql.SparkSession
193
+ The current SparkSession
194
+
195
+ Returns
196
+ -------
197
+ XlmRoBertaForZeroShotClassification
198
+ The restored model
199
+ """
200
+ from sparknlp.internal import _XlmRoBertaForZeroShotClassification
201
+ jModel = _XlmRoBertaForZeroShotClassification(folder, spark_session._jsparkSession)._java_obj
202
+ return XlmRoBertaForZeroShotClassification(java_model=jModel)
203
+
204
+ @staticmethod
205
+ def pretrained(name="xlm_roberta_large_zero_shot_classifier_xnli_anli", lang="xx", remote_loc=None):
206
+ """Downloads and loads a pretrained model.
207
+
208
+ Parameters
209
+ ----------
210
+ name : str, optional
211
+ Name of the pretrained model, by default
212
+ "xlm_roberta_large_zero_shot_classifier_xnli_anli"
213
+ lang : str, optional
214
+ Language of the pretrained model, by default "en"
215
+ remote_loc : str, optional
216
+ Optional remote address of the resource, by default None. Will use
217
+ Spark NLPs repositories otherwise.
218
+
219
+ Returns
220
+ -------
221
+ XlmRoBertaForZeroShotClassification
222
+ The restored model
223
+ """
224
+ from sparknlp.pretrained import ResourceDownloader
225
+ return ResourceDownloader.downloadModel(XlmRoBertaForZeroShotClassification, name, lang, remote_loc)
@@ -0,0 +1,201 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for XlnetForSequenceClassification."""
15
+
16
+ from sparknlp.common import *
17
+
18
+
19
+ class XlnetForSequenceClassification(AnnotatorModel,
20
+ HasCaseSensitiveProperties,
21
+ HasBatchedAnnotate,
22
+ HasClassifierActivationProperties,
23
+ HasEngine,
24
+ HasMaxSentenceLengthLimit):
25
+ """XlnetForSequenceClassification can load XLNet Models with sequence classification/regression head on
26
+ top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.
27
+
28
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
29
+ object:
30
+
31
+ >>> sequenceClassifier = XlnetForSequenceClassification.pretrained() \\
32
+ ... .setInputCols(["token", "document"]) \\
33
+ ... .setOutputCol("label")
34
+
35
+ The default model is ``"xlnet_base_sequence_classifier_imdb"``, if no name is
36
+ provided.
37
+
38
+ For available pretrained models please see the `Models Hub
39
+ <https://sparknlp.org/models?task=Text+Classification>`__.
40
+
41
+ To see which models are compatible and how to import them see
42
+ `Import Transformers into Spark NLP 🚀
43
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
44
+
45
+ ====================== ======================
46
+ Input Annotation types Output Annotation type
47
+ ====================== ======================
48
+ ``DOCUMENT, TOKEN`` ``CATEGORY``
49
+ ====================== ======================
50
+
51
+ Parameters
52
+ ----------
53
+ batchSize
54
+ Batch size. Large values allows faster processing but requires more
55
+ memory, by default 8
56
+ caseSensitive
57
+ Whether to ignore case in tokens for embeddings matching, by default
58
+ True
59
+ configProtoBytes
60
+ ConfigProto from tensorflow, serialized into byte array.
61
+ maxSentenceLength
62
+ Max sentence length to process, by default 128
63
+ coalesceSentences
64
+ Instead of 1 class per sentence (if inputCols is `sentence`) output
65
+ 1 class per document by averaging probabilities in all sentences, by
66
+ default False.
67
+ activation
68
+ Whether to calculate logits via Softmax or Sigmoid, by default
69
+ `"softmax"`.
70
+
71
+ Examples
72
+ --------
73
+ >>> import sparknlp
74
+ >>> from sparknlp.base import *
75
+ >>> from sparknlp.annotator import *
76
+ >>> from pyspark.ml import Pipeline
77
+ >>> documentAssembler = DocumentAssembler() \\
78
+ ... .setInputCol("text") \\
79
+ ... .setOutputCol("document")
80
+ >>> tokenizer = Tokenizer() \\
81
+ ... .setInputCols(["document"]) \\
82
+ ... .setOutputCol("token")
83
+ >>> sequenceClassifier = XlnetForSequenceClassification.pretrained() \\
84
+ ... .setInputCols(["token", "document"]) \\
85
+ ... .setOutputCol("label") \\
86
+ ... .setCaseSensitive(True)
87
+ >>> pipeline = Pipeline().setStages([
88
+ ... documentAssembler,
89
+ ... tokenizer,
90
+ ... sequenceClassifier
91
+ ... ])
92
+ >>> data = spark.createDataFrame([["I loved this movie when I was a child.", "It was pretty boring."]]).toDF("text")
93
+ >>> result = pipeline.fit(data).transform(data)
94
+ >>> result.select("label.result").show(truncate=False)
95
+ +------+
96
+ |result|
97
+ +------+
98
+ |[pos] |
99
+ |[neg] |
100
+ +------+
101
+ """
102
+ name = "XlnetForSequenceClassification"
103
+
104
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]
105
+
106
+ outputAnnotatorType = AnnotatorType.CATEGORY
107
+
108
+ configProtoBytes = Param(Params._dummy(),
109
+ "configProtoBytes",
110
+ "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
111
+ TypeConverters.toListInt)
112
+
113
+ coalesceSentences = Param(Params._dummy(), "coalesceSentences",
114
+ "Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging probabilities in all sentences.",
115
+ TypeConverters.toBoolean)
116
+
117
+ def getClasses(self):
118
+ """
119
+ Returns labels used to train this model
120
+ """
121
+ return self._call_java("getClasses")
122
+
123
+ def setConfigProtoBytes(self, b):
124
+ """Sets configProto from tensorflow, serialized into byte array.
125
+
126
+ Parameters
127
+ ----------
128
+ b : List[int]
129
+ ConfigProto from tensorflow, serialized into byte array
130
+ """
131
+ return self._set(configProtoBytes=b)
132
+
133
+ def setCoalesceSentences(self, value):
134
+ """Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging probabilities in all sentences.
135
+ Due to max sequence length limit in almost all transformer models such as BERT (512 tokens), this parameter helps feeding all the sentences
136
+ into the model and averaging all the probabilities for the entire document instead of probabilities per sentence. (Default: true)
137
+
138
+ Parameters
139
+ ----------
140
+ value : bool
141
+ If the output of all sentences will be averaged to one output
142
+ """
143
+ return self._set(coalesceSentences=value)
144
+
145
+ @keyword_only
146
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.XlnetForSequenceClassification",
147
+ java_model=None):
148
+ super(XlnetForSequenceClassification, self).__init__(
149
+ classname=classname,
150
+ java_model=java_model
151
+ )
152
+ self._setDefault(
153
+ batchSize=8,
154
+ maxSentenceLength=128,
155
+ caseSensitive=True,
156
+ coalesceSentences=False,
157
+ activation="softmax"
158
+ )
159
+
160
+ @staticmethod
161
+ def loadSavedModel(folder, spark_session):
162
+ """Loads a locally saved model.
163
+
164
+ Parameters
165
+ ----------
166
+ folder : str
167
+ Folder of the saved model
168
+ spark_session : pyspark.sql.SparkSession
169
+ The current SparkSession
170
+
171
+ Returns
172
+ -------
173
+ XlnetForSequenceClassification
174
+ The restored model
175
+ """
176
+ from sparknlp.internal import _XlnetSequenceClassifierLoader
177
+ jModel = _XlnetSequenceClassifierLoader(folder, spark_session._jsparkSession)._java_obj
178
+ return XlnetForSequenceClassification(java_model=jModel)
179
+
180
+ @staticmethod
181
+ def pretrained(name="xlnet_base_sequence_classifier_imdb", lang="en", remote_loc=None):
182
+ """Downloads and loads a pretrained model.
183
+
184
+ Parameters
185
+ ----------
186
+ name : str, optional
187
+ Name of the pretrained model, by default
188
+ "xlnet_base_sequence_classifier_imdb"
189
+ lang : str, optional
190
+ Language of the pretrained model, by default "en"
191
+ remote_loc : str, optional
192
+ Optional remote address of the resource, by default None. Will use
193
+ Spark NLPs repositories otherwise.
194
+
195
+ Returns
196
+ -------
197
+ XlnetForSequenceClassification
198
+ The restored model
199
+ """
200
+ from sparknlp.pretrained import ResourceDownloader
201
+ return ResourceDownloader.downloadModel(XlnetForSequenceClassification, name, lang, remote_loc)