spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. com/johnsnowlabs/nlp/__init__.py +4 -2
  4. spark_nlp-6.2.1.dist-info/METADATA +362 -0
  5. spark_nlp-6.2.1.dist-info/RECORD +292 -0
  6. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
  7. sparknlp/__init__.py +281 -27
  8. sparknlp/annotation.py +137 -6
  9. sparknlp/annotation_audio.py +61 -0
  10. sparknlp/annotation_image.py +82 -0
  11. sparknlp/annotator/__init__.py +93 -0
  12. sparknlp/annotator/audio/__init__.py +16 -0
  13. sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
  14. sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
  15. sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
  16. sparknlp/annotator/chunk2_doc.py +85 -0
  17. sparknlp/annotator/chunker.py +137 -0
  18. sparknlp/annotator/classifier_dl/__init__.py +61 -0
  19. sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  20. sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
  21. sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
  22. sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
  23. sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  24. sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  25. sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
  26. sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
  27. sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
  28. sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
  29. sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
  30. sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
  31. sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
  32. sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
  33. sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  34. sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
  35. sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
  36. sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
  37. sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
  38. sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
  39. sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
  40. sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
  41. sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
  42. sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
  43. sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  44. sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
  45. sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
  46. sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
  47. sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
  48. sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
  49. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  50. sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
  51. sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  52. sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
  53. sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
  54. sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
  55. sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
  56. sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
  57. sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
  58. sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  59. sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
  60. sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
  61. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
  62. sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
  63. sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
  64. sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
  65. sparknlp/annotator/cleaners/__init__.py +15 -0
  66. sparknlp/annotator/cleaners/cleaner.py +202 -0
  67. sparknlp/annotator/cleaners/extractor.py +191 -0
  68. sparknlp/annotator/coref/__init__.py +1 -0
  69. sparknlp/annotator/coref/spanbert_coref.py +221 -0
  70. sparknlp/annotator/cv/__init__.py +29 -0
  71. sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
  72. sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
  73. sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
  74. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  75. sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
  76. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  77. sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
  78. sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  79. sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  80. sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  81. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  82. sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  83. sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
  84. sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
  85. sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
  86. sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
  87. sparknlp/annotator/dataframe_optimizer.py +216 -0
  88. sparknlp/annotator/date2_chunk.py +88 -0
  89. sparknlp/annotator/dependency/__init__.py +17 -0
  90. sparknlp/annotator/dependency/dependency_parser.py +294 -0
  91. sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
  92. sparknlp/annotator/document_character_text_splitter.py +228 -0
  93. sparknlp/annotator/document_normalizer.py +235 -0
  94. sparknlp/annotator/document_token_splitter.py +175 -0
  95. sparknlp/annotator/document_token_splitter_test.py +85 -0
  96. sparknlp/annotator/embeddings/__init__.py +45 -0
  97. sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
  98. sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
  99. sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
  100. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
  101. sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
  102. sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
  103. sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
  104. sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
  105. sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
  106. sparknlp/annotator/embeddings/doc2vec.py +352 -0
  107. sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
  108. sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
  109. sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
  110. sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
  111. sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
  112. sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
  113. sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
  114. sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  115. sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  116. sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
  117. sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
  118. sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
  119. sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  120. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  121. sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
  122. sparknlp/annotator/embeddings/word2vec.py +353 -0
  123. sparknlp/annotator/embeddings/word_embeddings.py +385 -0
  124. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
  125. sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
  126. sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
  127. sparknlp/annotator/er/__init__.py +16 -0
  128. sparknlp/annotator/er/entity_ruler.py +267 -0
  129. sparknlp/annotator/graph_extraction.py +368 -0
  130. sparknlp/annotator/keyword_extraction/__init__.py +16 -0
  131. sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
  132. sparknlp/annotator/ld_dl/__init__.py +16 -0
  133. sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
  134. sparknlp/annotator/lemmatizer.py +250 -0
  135. sparknlp/annotator/matcher/__init__.py +20 -0
  136. sparknlp/annotator/matcher/big_text_matcher.py +272 -0
  137. sparknlp/annotator/matcher/date_matcher.py +303 -0
  138. sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
  139. sparknlp/annotator/matcher/regex_matcher.py +221 -0
  140. sparknlp/annotator/matcher/text_matcher.py +290 -0
  141. sparknlp/annotator/n_gram_generator.py +141 -0
  142. sparknlp/annotator/ner/__init__.py +21 -0
  143. sparknlp/annotator/ner/ner_approach.py +94 -0
  144. sparknlp/annotator/ner/ner_converter.py +148 -0
  145. sparknlp/annotator/ner/ner_crf.py +397 -0
  146. sparknlp/annotator/ner/ner_dl.py +591 -0
  147. sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
  148. sparknlp/annotator/ner/ner_overwriter.py +166 -0
  149. sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
  150. sparknlp/annotator/normalizer.py +230 -0
  151. sparknlp/annotator/openai/__init__.py +16 -0
  152. sparknlp/annotator/openai/openai_completion.py +349 -0
  153. sparknlp/annotator/openai/openai_embeddings.py +106 -0
  154. sparknlp/annotator/param/__init__.py +17 -0
  155. sparknlp/annotator/param/classifier_encoder.py +98 -0
  156. sparknlp/annotator/param/evaluation_dl_params.py +130 -0
  157. sparknlp/annotator/pos/__init__.py +16 -0
  158. sparknlp/annotator/pos/perceptron.py +263 -0
  159. sparknlp/annotator/sentence/__init__.py +17 -0
  160. sparknlp/annotator/sentence/sentence_detector.py +290 -0
  161. sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
  162. sparknlp/annotator/sentiment/__init__.py +17 -0
  163. sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
  164. sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
  165. sparknlp/annotator/seq2seq/__init__.py +35 -0
  166. sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
  167. sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
  168. sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
  169. sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
  170. sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  171. sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  172. sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
  173. sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
  174. sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  175. sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
  176. sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
  177. sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
  178. sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  179. sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  180. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  181. sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  182. sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  183. sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
  184. sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  185. sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
  186. sparknlp/annotator/similarity/__init__.py +0 -0
  187. sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
  188. sparknlp/annotator/spell_check/__init__.py +18 -0
  189. sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
  190. sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
  191. sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
  192. sparknlp/annotator/stemmer.py +79 -0
  193. sparknlp/annotator/stop_words_cleaner.py +190 -0
  194. sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
  195. sparknlp/annotator/token/__init__.py +19 -0
  196. sparknlp/annotator/token/chunk_tokenizer.py +118 -0
  197. sparknlp/annotator/token/recursive_tokenizer.py +205 -0
  198. sparknlp/annotator/token/regex_tokenizer.py +208 -0
  199. sparknlp/annotator/token/tokenizer.py +561 -0
  200. sparknlp/annotator/token2_chunk.py +76 -0
  201. sparknlp/annotator/ws/__init__.py +16 -0
  202. sparknlp/annotator/ws/word_segmenter.py +429 -0
  203. sparknlp/base/__init__.py +30 -0
  204. sparknlp/base/audio_assembler.py +95 -0
  205. sparknlp/base/doc2_chunk.py +169 -0
  206. sparknlp/base/document_assembler.py +164 -0
  207. sparknlp/base/embeddings_finisher.py +201 -0
  208. sparknlp/base/finisher.py +217 -0
  209. sparknlp/base/gguf_ranking_finisher.py +234 -0
  210. sparknlp/base/graph_finisher.py +125 -0
  211. sparknlp/base/has_recursive_fit.py +24 -0
  212. sparknlp/base/has_recursive_transform.py +22 -0
  213. sparknlp/base/image_assembler.py +172 -0
  214. sparknlp/base/light_pipeline.py +429 -0
  215. sparknlp/base/multi_document_assembler.py +164 -0
  216. sparknlp/base/prompt_assembler.py +207 -0
  217. sparknlp/base/recursive_pipeline.py +107 -0
  218. sparknlp/base/table_assembler.py +145 -0
  219. sparknlp/base/token_assembler.py +124 -0
  220. sparknlp/common/__init__.py +26 -0
  221. sparknlp/common/annotator_approach.py +41 -0
  222. sparknlp/common/annotator_model.py +47 -0
  223. sparknlp/common/annotator_properties.py +114 -0
  224. sparknlp/common/annotator_type.py +38 -0
  225. sparknlp/common/completion_post_processing.py +37 -0
  226. sparknlp/common/coverage_result.py +22 -0
  227. sparknlp/common/match_strategy.py +33 -0
  228. sparknlp/common/properties.py +1298 -0
  229. sparknlp/common/read_as.py +33 -0
  230. sparknlp/common/recursive_annotator_approach.py +35 -0
  231. sparknlp/common/storage.py +149 -0
  232. sparknlp/common/utils.py +39 -0
  233. sparknlp/functions.py +315 -5
  234. sparknlp/internal/__init__.py +1199 -0
  235. sparknlp/internal/annotator_java_ml.py +32 -0
  236. sparknlp/internal/annotator_transformer.py +37 -0
  237. sparknlp/internal/extended_java_wrapper.py +63 -0
  238. sparknlp/internal/params_getters_setters.py +71 -0
  239. sparknlp/internal/recursive.py +70 -0
  240. sparknlp/logging/__init__.py +15 -0
  241. sparknlp/logging/comet.py +467 -0
  242. sparknlp/partition/__init__.py +16 -0
  243. sparknlp/partition/partition.py +244 -0
  244. sparknlp/partition/partition_properties.py +902 -0
  245. sparknlp/partition/partition_transformer.py +200 -0
  246. sparknlp/pretrained/__init__.py +17 -0
  247. sparknlp/pretrained/pretrained_pipeline.py +158 -0
  248. sparknlp/pretrained/resource_downloader.py +216 -0
  249. sparknlp/pretrained/utils.py +35 -0
  250. sparknlp/reader/__init__.py +15 -0
  251. sparknlp/reader/enums.py +19 -0
  252. sparknlp/reader/pdf_to_text.py +190 -0
  253. sparknlp/reader/reader2doc.py +124 -0
  254. sparknlp/reader/reader2image.py +136 -0
  255. sparknlp/reader/reader2table.py +44 -0
  256. sparknlp/reader/reader_assembler.py +159 -0
  257. sparknlp/reader/sparknlp_reader.py +461 -0
  258. sparknlp/training/__init__.py +20 -0
  259. sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  260. sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
  261. sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  262. sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
  263. sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
  264. sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
  265. sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
  266. sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
  267. sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
  268. sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
  269. sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
  270. sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
  271. sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
  272. sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
  273. sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
  274. sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  275. sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
  276. sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  277. sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
  278. sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
  279. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
  280. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
  281. sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
  282. sparknlp/training/conll.py +150 -0
  283. sparknlp/training/conllu.py +103 -0
  284. sparknlp/training/pos.py +103 -0
  285. sparknlp/training/pub_tator.py +76 -0
  286. sparknlp/training/spacy_to_annotation.py +57 -0
  287. sparknlp/training/tfgraphs.py +5 -0
  288. sparknlp/upload_to_hub.py +149 -0
  289. sparknlp/util.py +51 -5
  290. com/__init__.pyc +0 -0
  291. com/__pycache__/__init__.cpython-36.pyc +0 -0
  292. com/johnsnowlabs/__init__.pyc +0 -0
  293. com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
  294. com/johnsnowlabs/nlp/__init__.pyc +0 -0
  295. com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
  296. spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
  297. spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
  298. sparknlp/__init__.pyc +0 -0
  299. sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
  300. sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
  301. sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
  302. sparknlp/__pycache__/base.cpython-36.pyc +0 -0
  303. sparknlp/__pycache__/common.cpython-36.pyc +0 -0
  304. sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
  305. sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
  306. sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
  307. sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
  308. sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
  309. sparknlp/__pycache__/training.cpython-36.pyc +0 -0
  310. sparknlp/__pycache__/util.cpython-36.pyc +0 -0
  311. sparknlp/annotation.pyc +0 -0
  312. sparknlp/annotator.py +0 -3006
  313. sparknlp/annotator.pyc +0 -0
  314. sparknlp/base.py +0 -347
  315. sparknlp/base.pyc +0 -0
  316. sparknlp/common.py +0 -193
  317. sparknlp/common.pyc +0 -0
  318. sparknlp/embeddings.py +0 -40
  319. sparknlp/embeddings.pyc +0 -0
  320. sparknlp/internal.py +0 -288
  321. sparknlp/internal.pyc +0 -0
  322. sparknlp/pretrained.py +0 -123
  323. sparknlp/pretrained.pyc +0 -0
  324. sparknlp/storage.py +0 -32
  325. sparknlp/storage.pyc +0 -0
  326. sparknlp/training.py +0 -62
  327. sparknlp/training.pyc +0 -0
  328. sparknlp/util.pyc +0 -0
  329. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,179 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for AlbertForTokenClassification."""
15
+
16
+ from sparknlp.common import *
17
+
18
+
19
+ class AlbertForTokenClassification(AnnotatorModel,
20
+ HasCaseSensitiveProperties,
21
+ HasBatchedAnnotate,
22
+ HasEngine,
23
+ HasMaxSentenceLengthLimit):
24
+ """AlbertForTokenClassification can load ALBERT Models with a token
25
+ classification head on top (a linear layer on top of the hidden-states
26
+ output) e.g. for Named-Entity-Recognition (NER) tasks.
27
+
28
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
29
+ object:
30
+
31
+ >>> token_classifier = AlbertForTokenClassification.pretrained() \\
32
+ ... .setInputCols(["token", "document"]) \\
33
+ ... .setOutputCol("label")
34
+
35
+ The default model is ``"albert_base_token_classifier_conll03"``, if no name
36
+ is provided.
37
+ For available pretrained models please see the `Models Hub
38
+ <https://sparknlp.org/models?task=Named+Entity+Recognition>`__.
39
+
40
+ To see which models are compatible and how to import them see
41
+ `Import Transformers into Spark NLP 🚀
42
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
43
+
44
+ ====================== ======================
45
+ Input Annotation types Output Annotation type
46
+ ====================== ======================
47
+ ``DOCUMENT, TOKEN`` ``NAMED_ENTITY``
48
+ ====================== ======================
49
+
50
+ Parameters
51
+ ----------
52
+ batchSize
53
+ Batch size. Large values allows faster processing but requires more
54
+ memory, by default 8
55
+ caseSensitive
56
+ Whether to ignore case in tokens for embeddings matching, by default
57
+ False
58
+ configProtoBytes
59
+ ConfigProto from tensorflow, serialized into byte array.
60
+ maxSentenceLength
61
+ Max sentence length to process, by default 128
62
+
63
+ Examples
64
+ --------
65
+ >>> import sparknlp
66
+ >>> from sparknlp.base import *
67
+ >>> from sparknlp.annotator import *
68
+ >>> from pyspark.ml import Pipeline
69
+ >>> documentAssembler = DocumentAssembler() \\
70
+ ... .setInputCol("text") \\
71
+ ... .setOutputCol("document")
72
+ >>> tokenizer = Tokenizer() \\
73
+ ... .setInputCols(["document"]) \\
74
+ ... .setOutputCol("token")
75
+ >>> tokenClassifier = AlbertForTokenClassification.pretrained() \\
76
+ ... .setInputCols(["token", "document"]) \\
77
+ ... .setOutputCol("label") \\
78
+ ... .setCaseSensitive(True)
79
+ >>> pipeline = Pipeline().setStages([
80
+ ... documentAssembler,
81
+ ... tokenizer,
82
+ ... tokenClassifier
83
+ ... ])
84
+ >>> data = spark.createDataFrame([["John Lenon was born in London and lived in Paris. My name is Sarah and I live in London"]]).toDF("text")
85
+ >>> result = pipeline.fit(data).transform(data)
86
+ >>> result.select("label.result").show(truncate=False)
87
+ +------------------------------------------------------------------------------------+
88
+ |result |
89
+ +------------------------------------------------------------------------------------+
90
+ |[B-PER, I-PER, O, O, O, B-LOC, O, O, O, B-LOC, O, O, O, O, B-PER, O, O, O, O, B-LOC]|
91
+ +------------------------------------------------------------------------------------+
92
+
93
+ See Also
94
+ --------
95
+ AlbertEmbeddings : for token-level embeddings
96
+ """
97
+
98
+ name = "AlbertForTokenClassification"
99
+
100
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]
101
+
102
+ outputAnnotatorType = AnnotatorType.NAMED_ENTITY
103
+
104
+ configProtoBytes = Param(Params._dummy(),
105
+ "configProtoBytes",
106
+ "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
107
+ TypeConverters.toListInt)
108
+
109
+ def getClasses(self):
110
+ """
111
+ Returns labels used to train this model
112
+ """
113
+ return self._call_java("getClasses")
114
+
115
+ def setConfigProtoBytes(self, b):
116
+ """Sets configProto from tensorflow, serialized into byte array.
117
+
118
+ Parameters
119
+ ----------
120
+ b : List[int]
121
+ ConfigProto from tensorflow, serialized into byte array
122
+ """
123
+ return self._set(configProtoBytes=b)
124
+
125
+ @keyword_only
126
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.AlbertForTokenClassification",
127
+ java_model=None):
128
+ super(AlbertForTokenClassification, self).__init__(
129
+ classname=classname,
130
+ java_model=java_model
131
+ )
132
+ self._setDefault(
133
+ batchSize=8,
134
+ maxSentenceLength=128,
135
+ caseSensitive=False
136
+ )
137
+
138
+ @staticmethod
139
+ def loadSavedModel(folder, spark_session):
140
+ """Loads a locally saved model.
141
+
142
+ Parameters
143
+ ----------
144
+ folder : str
145
+ Folder of the saved model
146
+ spark_session : pyspark.sql.SparkSession
147
+ The current SparkSession
148
+
149
+ Returns
150
+ -------
151
+ AlbertForTokenClassification
152
+ The restored model
153
+ """
154
+ from sparknlp.internal import _AlbertTokenClassifierLoader
155
+ jModel = _AlbertTokenClassifierLoader(folder, spark_session._jsparkSession)._java_obj
156
+ return AlbertForTokenClassification(java_model=jModel)
157
+
158
+ @staticmethod
159
+ def pretrained(name="albert_base_token_classifier_conll03", lang="en", remote_loc=None):
160
+ """Downloads and loads a pretrained model.
161
+
162
+ Parameters
163
+ ----------
164
+ name : str, optional
165
+ Name of the pretrained model, by default
166
+ "albert_base_token_classifier_conll03"
167
+ lang : str, optional
168
+ Language of the pretrained model, by default "en"
169
+ remote_loc : str, optional
170
+ Optional remote address of the resource, by default None. Will use
171
+ Spark NLPs repositories otherwise.
172
+
173
+ Returns
174
+ -------
175
+ AlbertForTokenClassification
176
+ The restored model
177
+ """
178
+ from sparknlp.pretrained import ResourceDownloader
179
+ return ResourceDownloader.downloadModel(AlbertForTokenClassification, name, lang, remote_loc)
@@ -0,0 +1,211 @@
1
+ # Copyright 2017-2024 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Contains classes for AlbertForZeroShotClassification."""
16
+
17
+ from sparknlp.common import *
18
+
19
+
20
+ class AlbertForZeroShotClassification(AnnotatorModel,
21
+ HasCaseSensitiveProperties,
22
+ HasBatchedAnnotate,
23
+ HasClassifierActivationProperties,
24
+ HasCandidateLabelsProperties,
25
+ HasEngine,
26
+ HasMaxSentenceLengthLimit):
27
+ """AlbertForZeroShotClassification using a `ModelForSequenceClassification` trained on NLI (natural language
28
+ inference) tasks. Equivalent of `DistilBertForSequenceClassification` models, but these models don't require a hardcoded
29
+ number of potential classes, they can be chosen at runtime. It usually means it's slower but it is much more
30
+ flexible.
31
+
32
+ Note that the model will loop through all provided labels. So the more labels you have, the
33
+ longer this process will take.
34
+
35
+ Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
36
+ pair and passed to the pretrained model.
37
+
38
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
39
+ object:
40
+
41
+ >>> sequenceClassifier = AlbertForZeroShotClassification.pretrained() \\
42
+ ... .setInputCols(["token", "document"]) \\
43
+ ... .setOutputCol("label")
44
+
45
+ The default model is ``"albert_base_zero_shot_classifier_onnx"``, if no name is
46
+ provided.
47
+
48
+ For available pretrained models please see the `Models Hub
49
+ <https://sparknlp.orgtask=Text+Classification>`__.
50
+
51
+ To see which models are compatible and how to import them see
52
+ `Import Transformers into Spark NLP 🚀
53
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
54
+
55
+ ====================== ======================
56
+ Input Annotation types Output Annotation type
57
+ ====================== ======================
58
+ ``DOCUMENT, TOKEN`` ``CATEGORY``
59
+ ====================== ======================
60
+
61
+ Parameters
62
+ ----------
63
+ batchSize
64
+ Batch size. Large values allows faster processing but requires more
65
+ memory, by default 8
66
+ caseSensitive
67
+ Whether to ignore case in tokens for embeddings matching, by default
68
+ True
69
+ configProtoBytes
70
+ ConfigProto from tensorflow, serialized into byte array.
71
+ maxSentenceLength
72
+ Max sentence length to process, by default 128
73
+ coalesceSentences
74
+ Instead of 1 class per sentence (if inputCols is `sentence`) output 1
75
+ class per document by averaging probabilities in all sentences, by
76
+ default False
77
+ activation
78
+ Whether to calculate logits via Softmax or Sigmoid, by default
79
+ `"softmax"`.
80
+
81
+ Examples
82
+ --------
83
+ >>> import sparknlp
84
+ >>> from sparknlp.base import *
85
+ >>> from sparknlp.annotator import *
86
+ >>> from pyspark.ml import Pipeline
87
+ >>> documentAssembler = DocumentAssembler() \\
88
+ ... .setInputCol("text") \\
89
+ ... .setOutputCol("document")
90
+ >>> tokenizer = Tokenizer() \\
91
+ ... .setInputCols(["document"]) \\
92
+ ... .setOutputCol("token")
93
+ >>> sequenceClassifier = AlbertForZeroShotClassification.pretrained() \\
94
+ ... .setInputCols(["token", "document"]) \\
95
+ ... .setOutputCol("label") \\
96
+ ... .setCaseSensitive(True)
97
+ >>> pipeline = Pipeline().setStages([
98
+ ... documentAssembler,
99
+ ... tokenizer,
100
+ ... sequenceClassifier
101
+ ... ])
102
+ >>> data = spark.createDataFrame([["I have a problem with my iphone that needs to be resolved asap!!"]]).toDF("text")
103
+ >>> result = pipeline.fit(data).transform(data)
104
+ >>> result.select("label.result").show(truncate=False)
105
+ +---------+
106
+ |result |
107
+ +---------+
108
+ |[urgent] |
109
+ +---------+
110
+ """
111
+ name = "AlbertForZeroShotClassification"
112
+
113
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]
114
+
115
+ outputAnnotatorType = AnnotatorType.CATEGORY
116
+
117
+ configProtoBytes = Param(Params._dummy(),
118
+ "configProtoBytes",
119
+ "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
120
+ TypeConverters.toListInt)
121
+
122
+ coalesceSentences = Param(Params._dummy(), "coalesceSentences",
123
+ "Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging probabilities in all sentences.",
124
+ TypeConverters.toBoolean)
125
+
126
+ def getClasses(self):
127
+ """
128
+ Returns labels used to train this model
129
+ """
130
+ return self._call_java("getClasses")
131
+
132
+ def setConfigProtoBytes(self, b):
133
+ """Sets configProto from tensorflow, serialized into byte array.
134
+
135
+ Parameters
136
+ ----------
137
+ b : List[int]
138
+ ConfigProto from tensorflow, serialized into byte array
139
+ """
140
+ return self._set(configProtoBytes=b)
141
+
142
+ def setCoalesceSentences(self, value):
143
+ """Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging
144
+ probabilities in all sentences. Due to max sequence length limit in almost all transformer models such as Bart
145
+ (512 tokens), this parameter helps to feed all the sentences into the model and averaging all the probabilities
146
+ for the entire document instead of probabilities per sentence. (Default: true)
147
+
148
+ Parameters
149
+ ----------
150
+ value : bool
151
+ If the output of all sentences will be averaged to one output
152
+ """
153
+ return self._set(coalesceSentences=value)
154
+
155
+ @keyword_only
156
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.AlbertForZeroShotClassification",
157
+ java_model=None):
158
+ super(AlbertForZeroShotClassification, self).__init__(
159
+ classname=classname,
160
+ java_model=java_model
161
+ )
162
+ self._setDefault(
163
+ batchSize=8,
164
+ maxSentenceLength=128,
165
+ caseSensitive=True,
166
+ coalesceSentences=False,
167
+ activation="softmax"
168
+ )
169
+
170
+ @staticmethod
171
+ def loadSavedModel(folder, spark_session):
172
+ """Loads a locally saved model.
173
+
174
+ Parameters
175
+ ----------
176
+ folder : str
177
+ Folder of the saved model
178
+ spark_session : pyspark.sql.SparkSession
179
+ The current SparkSession
180
+
181
+ Returns
182
+ -------
183
+ AlbertForZeroShotClassification
184
+ The restored model
185
+ """
186
+ from sparknlp.internal import _AlbertForZeroShotClassificationLoader
187
+ jModel = _AlbertForZeroShotClassificationLoader(folder, spark_session._jsparkSession)._java_obj
188
+ return AlbertForZeroShotClassification(java_model=jModel)
189
+
190
+ @staticmethod
191
+ def pretrained(name="albert_zero_shot_classifier_onnx", lang="en", remote_loc=None):
192
+ """Downloads and loads a pretrained model.
193
+
194
+ Parameters
195
+ ----------
196
+ name : str, optional
197
+ Name of the pretrained model, by default
198
+ "albert_zero_shot_classifier_onnx"
199
+ lang : str, optional
200
+ Language of the pretrained model, by default "en"
201
+ remote_loc : str, optional
202
+ Optional remote address of the resource, by default None. Will use
203
+ Spark NLPs repositories otherwise.
204
+
205
+ Returns
206
+ -------
207
+ BartForZeroShotClassification
208
+ The restored model
209
+ """
210
+ from sparknlp.pretrained import ResourceDownloader
211
+ return ResourceDownloader.downloadModel(AlbertForZeroShotClassification, name, lang, remote_loc)
@@ -0,0 +1,225 @@
1
+ # Copyright 2017-2023 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for BartForZeroShotClassification."""
15
+
16
+ from sparknlp.common import *
17
+
18
+
19
+ class BartForZeroShotClassification(AnnotatorModel,
20
+ HasCaseSensitiveProperties,
21
+ HasBatchedAnnotate,
22
+ HasClassifierActivationProperties,
23
+ HasCandidateLabelsProperties,
24
+ HasEngine):
25
+ """BartForZeroShotClassification using a `ModelForSequenceClassification` trained on NLI (natural language
26
+ inference) tasks. Equivalent of `BartForSequenceClassification` models, but these models don't require a hardcoded
27
+ number of potential classes, they can be chosen at runtime. It usually means it's slower but it is much more
28
+ flexible.
29
+
30
+ Note that the model will loop through all provided labels. So the more labels you have, the
31
+ longer this process will take.
32
+
33
+ Any combination of sequences and labels can be passed and each combination will be posed as a premise/hypothesis
34
+ pair and passed to the pretrained model.
35
+
36
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
37
+ object:
38
+
39
+ >>> sequenceClassifier = BartForZeroShotClassification.pretrained() \\
40
+ ... .setInputCols(["token", "document"]) \\
41
+ ... .setOutputCol("label")
42
+
43
+ The default model is ``"bart_large_zero_shot_classifier_mnli"``, if no name is
44
+ provided.
45
+
46
+ For available pretrained models please see the `Models Hub
47
+ <https://sparknlp.orgtask=Text+Classification>`__.
48
+
49
+ To see which models are compatible and how to import them see
50
+ `Import Transformers into Spark NLP 🚀
51
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
52
+
53
+ ====================== ======================
54
+ Input Annotation types Output Annotation type
55
+ ====================== ======================
56
+ ``DOCUMENT, TOKEN`` ``CATEGORY``
57
+ ====================== ======================
58
+
59
+ Parameters
60
+ ----------
61
+ batchSize
62
+ Batch size. Large values allows faster processing but requires more
63
+ memory, by default 8
64
+ caseSensitive
65
+ Whether to ignore case in tokens for embeddings matching, by default
66
+ True
67
+ configProtoBytes
68
+ ConfigProto from tensorflow, serialized into byte array.
69
+ maxSentenceLength
70
+ Max sentence length to process, by default 128
71
+ coalesceSentences
72
+ Instead of 1 class per sentence (if inputCols is `sentence`) output 1
73
+ class per document by averaging probabilities in all sentences, by
74
+ default False
75
+ activation
76
+ Whether to calculate logits via Softmax or Sigmoid, by default
77
+ `"softmax"`.
78
+
79
+ Examples
80
+ --------
81
+ >>> import sparknlp
82
+ >>> from sparknlp.base import *
83
+ >>> from sparknlp.annotator import *
84
+ >>> from pyspark.ml import Pipeline
85
+ >>> documentAssembler = DocumentAssembler() \\
86
+ ... .setInputCol("text") \\
87
+ ... .setOutputCol("document")
88
+ >>> tokenizer = Tokenizer() \\
89
+ ... .setInputCols(["document"]) \\
90
+ ... .setOutputCol("token")
91
+ >>> sequenceClassifier = BartForZeroShotClassification.pretrained() \\
92
+ ... .setInputCols(["token", "document"]) \\
93
+ ... .setOutputCol("label") \\
94
+ ... .setCaseSensitive(True)
95
+ >>> pipeline = Pipeline().setStages([
96
+ ... documentAssembler,
97
+ ... tokenizer,
98
+ ... sequenceClassifier
99
+ ... ])
100
+ >>> data = spark.createDataFrame([["I loved this movie when I was a child.", "It was pretty boring."]]).toDF("text")
101
+ >>> result = pipeline.fit(data).transform(data)
102
+ >>> result.select("label.result").show(truncate=False)
103
+ +------+
104
+ |result|
105
+ +------+
106
+ |[pos] |
107
+ |[neg] |
108
+ +------+
109
+ """
110
+ name = "BartForZeroShotClassification"
111
+
112
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]
113
+
114
+ outputAnnotatorType = AnnotatorType.CATEGORY
115
+
116
+ maxSentenceLength = Param(Params._dummy(),
117
+ "maxSentenceLength",
118
+ "Max sentence length to process",
119
+ typeConverter=TypeConverters.toInt)
120
+
121
+ configProtoBytes = Param(Params._dummy(),
122
+ "configProtoBytes",
123
+ "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
124
+ TypeConverters.toListInt)
125
+
126
+ coalesceSentences = Param(Params._dummy(), "coalesceSentences",
127
+ "Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging probabilities in all sentences.",
128
+ TypeConverters.toBoolean)
129
+
130
+ def getClasses(self):
131
+ """
132
+ Returns labels used to train this model
133
+ """
134
+ return self._call_java("getClasses")
135
+
136
+ def setConfigProtoBytes(self, b):
137
+ """Sets configProto from tensorflow, serialized into byte array.
138
+
139
+ Parameters
140
+ ----------
141
+ b : List[int]
142
+ ConfigProto from tensorflow, serialized into byte array
143
+ """
144
+ return self._set(configProtoBytes=b)
145
+
146
+ def setMaxSentenceLength(self, value):
147
+ """Sets max sentence length to process, by default 128.
148
+
149
+ Parameters
150
+ ----------
151
+ value : int
152
+ Max sentence length to process
153
+ """
154
+ return self._set(maxSentenceLength=value)
155
+
156
+ def setCoalesceSentences(self, value):
157
+ """Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging
158
+ probabilities in all sentences. Due to max sequence length limit in almost all transformer models such as Bart
159
+ (512 tokens), this parameter helps to feed all the sentences into the model and averaging all the probabilities
160
+ for the entire document instead of probabilities per sentence. (Default: true)
161
+
162
+ Parameters
163
+ ----------
164
+ value : bool
165
+ If the output of all sentences will be averaged to one output
166
+ """
167
+ return self._set(coalesceSentences=value)
168
+
169
+ @keyword_only
170
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.BartForZeroShotClassification",
171
+ java_model=None):
172
+ super(BartForZeroShotClassification, self).__init__(
173
+ classname=classname,
174
+ java_model=java_model
175
+ )
176
+ self._setDefault(
177
+ batchSize=8,
178
+ maxSentenceLength=128,
179
+ caseSensitive=True,
180
+ coalesceSentences=False,
181
+ activation="softmax"
182
+ )
183
+
184
+ @staticmethod
185
+ def loadSavedModel(folder, spark_session):
186
+ """Loads a locally saved model.
187
+
188
+ Parameters
189
+ ----------
190
+ folder : str
191
+ Folder of the saved model
192
+ spark_session : pyspark.sql.SparkSession
193
+ The current SparkSession
194
+
195
+ Returns
196
+ -------
197
+ BartForZeroShotClassification
198
+ The restored model
199
+ """
200
+ from sparknlp.internal import _BartForZeroShotClassification
201
+ jModel = _BartForZeroShotClassification(folder, spark_session._jsparkSession)._java_obj
202
+ return BartForZeroShotClassification(java_model=jModel)
203
+
204
+ @staticmethod
205
+ def pretrained(name="bart_large_zero_shot_classifier_mnli", lang="en", remote_loc=None):
206
+ """Downloads and loads a pretrained model.
207
+
208
+ Parameters
209
+ ----------
210
+ name : str, optional
211
+ Name of the pretrained model, by default
212
+ "bart_large_zero_shot_classifier_mnli"
213
+ lang : str, optional
214
+ Language of the pretrained model, by default "en"
215
+ remote_loc : str, optional
216
+ Optional remote address of the resource, by default None. Will use
217
+ Spark NLPs repositories otherwise.
218
+
219
+ Returns
220
+ -------
221
+ BartForZeroShotClassification
222
+ The restored model
223
+ """
224
+ from sparknlp.pretrained import ResourceDownloader
225
+ return ResourceDownloader.downloadModel(BartForZeroShotClassification, name, lang, remote_loc)