spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. com/johnsnowlabs/nlp/__init__.py +4 -2
  4. spark_nlp-6.2.1.dist-info/METADATA +362 -0
  5. spark_nlp-6.2.1.dist-info/RECORD +292 -0
  6. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
  7. sparknlp/__init__.py +281 -27
  8. sparknlp/annotation.py +137 -6
  9. sparknlp/annotation_audio.py +61 -0
  10. sparknlp/annotation_image.py +82 -0
  11. sparknlp/annotator/__init__.py +93 -0
  12. sparknlp/annotator/audio/__init__.py +16 -0
  13. sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
  14. sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
  15. sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
  16. sparknlp/annotator/chunk2_doc.py +85 -0
  17. sparknlp/annotator/chunker.py +137 -0
  18. sparknlp/annotator/classifier_dl/__init__.py +61 -0
  19. sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  20. sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
  21. sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
  22. sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
  23. sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  24. sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  25. sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
  26. sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
  27. sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
  28. sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
  29. sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
  30. sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
  31. sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
  32. sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
  33. sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  34. sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
  35. sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
  36. sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
  37. sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
  38. sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
  39. sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
  40. sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
  41. sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
  42. sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
  43. sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  44. sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
  45. sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
  46. sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
  47. sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
  48. sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
  49. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  50. sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
  51. sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  52. sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
  53. sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
  54. sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
  55. sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
  56. sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
  57. sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
  58. sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  59. sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
  60. sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
  61. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
  62. sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
  63. sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
  64. sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
  65. sparknlp/annotator/cleaners/__init__.py +15 -0
  66. sparknlp/annotator/cleaners/cleaner.py +202 -0
  67. sparknlp/annotator/cleaners/extractor.py +191 -0
  68. sparknlp/annotator/coref/__init__.py +1 -0
  69. sparknlp/annotator/coref/spanbert_coref.py +221 -0
  70. sparknlp/annotator/cv/__init__.py +29 -0
  71. sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
  72. sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
  73. sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
  74. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  75. sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
  76. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  77. sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
  78. sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  79. sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  80. sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  81. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  82. sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  83. sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
  84. sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
  85. sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
  86. sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
  87. sparknlp/annotator/dataframe_optimizer.py +216 -0
  88. sparknlp/annotator/date2_chunk.py +88 -0
  89. sparknlp/annotator/dependency/__init__.py +17 -0
  90. sparknlp/annotator/dependency/dependency_parser.py +294 -0
  91. sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
  92. sparknlp/annotator/document_character_text_splitter.py +228 -0
  93. sparknlp/annotator/document_normalizer.py +235 -0
  94. sparknlp/annotator/document_token_splitter.py +175 -0
  95. sparknlp/annotator/document_token_splitter_test.py +85 -0
  96. sparknlp/annotator/embeddings/__init__.py +45 -0
  97. sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
  98. sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
  99. sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
  100. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
  101. sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
  102. sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
  103. sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
  104. sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
  105. sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
  106. sparknlp/annotator/embeddings/doc2vec.py +352 -0
  107. sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
  108. sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
  109. sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
  110. sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
  111. sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
  112. sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
  113. sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
  114. sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  115. sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  116. sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
  117. sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
  118. sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
  119. sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  120. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  121. sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
  122. sparknlp/annotator/embeddings/word2vec.py +353 -0
  123. sparknlp/annotator/embeddings/word_embeddings.py +385 -0
  124. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
  125. sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
  126. sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
  127. sparknlp/annotator/er/__init__.py +16 -0
  128. sparknlp/annotator/er/entity_ruler.py +267 -0
  129. sparknlp/annotator/graph_extraction.py +368 -0
  130. sparknlp/annotator/keyword_extraction/__init__.py +16 -0
  131. sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
  132. sparknlp/annotator/ld_dl/__init__.py +16 -0
  133. sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
  134. sparknlp/annotator/lemmatizer.py +250 -0
  135. sparknlp/annotator/matcher/__init__.py +20 -0
  136. sparknlp/annotator/matcher/big_text_matcher.py +272 -0
  137. sparknlp/annotator/matcher/date_matcher.py +303 -0
  138. sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
  139. sparknlp/annotator/matcher/regex_matcher.py +221 -0
  140. sparknlp/annotator/matcher/text_matcher.py +290 -0
  141. sparknlp/annotator/n_gram_generator.py +141 -0
  142. sparknlp/annotator/ner/__init__.py +21 -0
  143. sparknlp/annotator/ner/ner_approach.py +94 -0
  144. sparknlp/annotator/ner/ner_converter.py +148 -0
  145. sparknlp/annotator/ner/ner_crf.py +397 -0
  146. sparknlp/annotator/ner/ner_dl.py +591 -0
  147. sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
  148. sparknlp/annotator/ner/ner_overwriter.py +166 -0
  149. sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
  150. sparknlp/annotator/normalizer.py +230 -0
  151. sparknlp/annotator/openai/__init__.py +16 -0
  152. sparknlp/annotator/openai/openai_completion.py +349 -0
  153. sparknlp/annotator/openai/openai_embeddings.py +106 -0
  154. sparknlp/annotator/param/__init__.py +17 -0
  155. sparknlp/annotator/param/classifier_encoder.py +98 -0
  156. sparknlp/annotator/param/evaluation_dl_params.py +130 -0
  157. sparknlp/annotator/pos/__init__.py +16 -0
  158. sparknlp/annotator/pos/perceptron.py +263 -0
  159. sparknlp/annotator/sentence/__init__.py +17 -0
  160. sparknlp/annotator/sentence/sentence_detector.py +290 -0
  161. sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
  162. sparknlp/annotator/sentiment/__init__.py +17 -0
  163. sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
  164. sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
  165. sparknlp/annotator/seq2seq/__init__.py +35 -0
  166. sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
  167. sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
  168. sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
  169. sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
  170. sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  171. sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  172. sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
  173. sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
  174. sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  175. sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
  176. sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
  177. sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
  178. sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  179. sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  180. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  181. sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  182. sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  183. sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
  184. sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  185. sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
  186. sparknlp/annotator/similarity/__init__.py +0 -0
  187. sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
  188. sparknlp/annotator/spell_check/__init__.py +18 -0
  189. sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
  190. sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
  191. sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
  192. sparknlp/annotator/stemmer.py +79 -0
  193. sparknlp/annotator/stop_words_cleaner.py +190 -0
  194. sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
  195. sparknlp/annotator/token/__init__.py +19 -0
  196. sparknlp/annotator/token/chunk_tokenizer.py +118 -0
  197. sparknlp/annotator/token/recursive_tokenizer.py +205 -0
  198. sparknlp/annotator/token/regex_tokenizer.py +208 -0
  199. sparknlp/annotator/token/tokenizer.py +561 -0
  200. sparknlp/annotator/token2_chunk.py +76 -0
  201. sparknlp/annotator/ws/__init__.py +16 -0
  202. sparknlp/annotator/ws/word_segmenter.py +429 -0
  203. sparknlp/base/__init__.py +30 -0
  204. sparknlp/base/audio_assembler.py +95 -0
  205. sparknlp/base/doc2_chunk.py +169 -0
  206. sparknlp/base/document_assembler.py +164 -0
  207. sparknlp/base/embeddings_finisher.py +201 -0
  208. sparknlp/base/finisher.py +217 -0
  209. sparknlp/base/gguf_ranking_finisher.py +234 -0
  210. sparknlp/base/graph_finisher.py +125 -0
  211. sparknlp/base/has_recursive_fit.py +24 -0
  212. sparknlp/base/has_recursive_transform.py +22 -0
  213. sparknlp/base/image_assembler.py +172 -0
  214. sparknlp/base/light_pipeline.py +429 -0
  215. sparknlp/base/multi_document_assembler.py +164 -0
  216. sparknlp/base/prompt_assembler.py +207 -0
  217. sparknlp/base/recursive_pipeline.py +107 -0
  218. sparknlp/base/table_assembler.py +145 -0
  219. sparknlp/base/token_assembler.py +124 -0
  220. sparknlp/common/__init__.py +26 -0
  221. sparknlp/common/annotator_approach.py +41 -0
  222. sparknlp/common/annotator_model.py +47 -0
  223. sparknlp/common/annotator_properties.py +114 -0
  224. sparknlp/common/annotator_type.py +38 -0
  225. sparknlp/common/completion_post_processing.py +37 -0
  226. sparknlp/common/coverage_result.py +22 -0
  227. sparknlp/common/match_strategy.py +33 -0
  228. sparknlp/common/properties.py +1298 -0
  229. sparknlp/common/read_as.py +33 -0
  230. sparknlp/common/recursive_annotator_approach.py +35 -0
  231. sparknlp/common/storage.py +149 -0
  232. sparknlp/common/utils.py +39 -0
  233. sparknlp/functions.py +315 -5
  234. sparknlp/internal/__init__.py +1199 -0
  235. sparknlp/internal/annotator_java_ml.py +32 -0
  236. sparknlp/internal/annotator_transformer.py +37 -0
  237. sparknlp/internal/extended_java_wrapper.py +63 -0
  238. sparknlp/internal/params_getters_setters.py +71 -0
  239. sparknlp/internal/recursive.py +70 -0
  240. sparknlp/logging/__init__.py +15 -0
  241. sparknlp/logging/comet.py +467 -0
  242. sparknlp/partition/__init__.py +16 -0
  243. sparknlp/partition/partition.py +244 -0
  244. sparknlp/partition/partition_properties.py +902 -0
  245. sparknlp/partition/partition_transformer.py +200 -0
  246. sparknlp/pretrained/__init__.py +17 -0
  247. sparknlp/pretrained/pretrained_pipeline.py +158 -0
  248. sparknlp/pretrained/resource_downloader.py +216 -0
  249. sparknlp/pretrained/utils.py +35 -0
  250. sparknlp/reader/__init__.py +15 -0
  251. sparknlp/reader/enums.py +19 -0
  252. sparknlp/reader/pdf_to_text.py +190 -0
  253. sparknlp/reader/reader2doc.py +124 -0
  254. sparknlp/reader/reader2image.py +136 -0
  255. sparknlp/reader/reader2table.py +44 -0
  256. sparknlp/reader/reader_assembler.py +159 -0
  257. sparknlp/reader/sparknlp_reader.py +461 -0
  258. sparknlp/training/__init__.py +20 -0
  259. sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  260. sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
  261. sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  262. sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
  263. sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
  264. sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
  265. sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
  266. sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
  267. sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
  268. sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
  269. sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
  270. sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
  271. sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
  272. sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
  273. sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
  274. sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  275. sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
  276. sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  277. sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
  278. sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
  279. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
  280. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
  281. sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
  282. sparknlp/training/conll.py +150 -0
  283. sparknlp/training/conllu.py +103 -0
  284. sparknlp/training/pos.py +103 -0
  285. sparknlp/training/pub_tator.py +76 -0
  286. sparknlp/training/spacy_to_annotation.py +57 -0
  287. sparknlp/training/tfgraphs.py +5 -0
  288. sparknlp/upload_to_hub.py +149 -0
  289. sparknlp/util.py +51 -5
  290. com/__init__.pyc +0 -0
  291. com/__pycache__/__init__.cpython-36.pyc +0 -0
  292. com/johnsnowlabs/__init__.pyc +0 -0
  293. com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
  294. com/johnsnowlabs/nlp/__init__.pyc +0 -0
  295. com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
  296. spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
  297. spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
  298. sparknlp/__init__.pyc +0 -0
  299. sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
  300. sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
  301. sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
  302. sparknlp/__pycache__/base.cpython-36.pyc +0 -0
  303. sparknlp/__pycache__/common.cpython-36.pyc +0 -0
  304. sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
  305. sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
  306. sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
  307. sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
  308. sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
  309. sparknlp/__pycache__/training.cpython-36.pyc +0 -0
  310. sparknlp/__pycache__/util.cpython-36.pyc +0 -0
  311. sparknlp/annotation.pyc +0 -0
  312. sparknlp/annotator.py +0 -3006
  313. sparknlp/annotator.pyc +0 -0
  314. sparknlp/base.py +0 -347
  315. sparknlp/base.pyc +0 -0
  316. sparknlp/common.py +0 -193
  317. sparknlp/common.pyc +0 -0
  318. sparknlp/embeddings.py +0 -40
  319. sparknlp/embeddings.pyc +0 -0
  320. sparknlp/internal.py +0 -288
  321. sparknlp/internal.pyc +0 -0
  322. sparknlp/pretrained.py +0 -123
  323. sparknlp/pretrained.pyc +0 -0
  324. sparknlp/storage.py +0 -32
  325. sparknlp/storage.pyc +0 -0
  326. sparknlp/training.py +0 -62
  327. sparknlp/training.pyc +0 -0
  328. sparknlp/util.pyc +0 -0
  329. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,61 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Module of annotators for text classification."""
16
+ from sparknlp.annotator.classifier_dl.albert_for_sequence_classification import *
17
+ from sparknlp.annotator.classifier_dl.albert_for_token_classification import *
18
+ from sparknlp.annotator.classifier_dl.albert_for_question_answering import *
19
+ from sparknlp.annotator.classifier_dl.bert_for_sequence_classification import *
20
+ from sparknlp.annotator.classifier_dl.bert_for_token_classification import *
21
+ from sparknlp.annotator.classifier_dl.bert_for_question_answering import *
22
+ from sparknlp.annotator.classifier_dl.classifier_dl import *
23
+ from sparknlp.annotator.classifier_dl.deberta_for_sequence_classification import *
24
+ from sparknlp.annotator.classifier_dl.deberta_for_token_classification import *
25
+ from sparknlp.annotator.classifier_dl.deberta_for_question_answering import *
26
+ from sparknlp.annotator.classifier_dl.distil_bert_for_sequence_classification import *
27
+ from sparknlp.annotator.classifier_dl.distil_bert_for_token_classification import *
28
+ from sparknlp.annotator.classifier_dl.distil_bert_for_question_answering import *
29
+ from sparknlp.annotator.classifier_dl.longformer_for_sequence_classification import *
30
+ from sparknlp.annotator.classifier_dl.longformer_for_token_classification import *
31
+ from sparknlp.annotator.classifier_dl.longformer_for_question_answering import *
32
+ from sparknlp.annotator.classifier_dl.multi_classifier_dl import *
33
+ from sparknlp.annotator.classifier_dl.roberta_for_sequence_classification import *
34
+ from sparknlp.annotator.classifier_dl.roberta_for_token_classification import *
35
+ from sparknlp.annotator.classifier_dl.roberta_for_question_answering import *
36
+ from sparknlp.annotator.classifier_dl.sentiment_dl import *
37
+ from sparknlp.annotator.classifier_dl.xlm_roberta_for_sequence_classification import *
38
+ from sparknlp.annotator.classifier_dl.xlm_roberta_for_token_classification import *
39
+ from sparknlp.annotator.classifier_dl.xlm_roberta_for_question_answering import *
40
+ from sparknlp.annotator.classifier_dl.xlnet_for_sequence_classification import *
41
+ from sparknlp.annotator.classifier_dl.xlnet_for_token_classification import *
42
+ from sparknlp.annotator.classifier_dl.camembert_for_token_classification import *
43
+ from sparknlp.annotator.classifier_dl.tapas_for_question_answering import *
44
+ from sparknlp.annotator.classifier_dl.camembert_for_sequence_classification import *
45
+ from sparknlp.annotator.classifier_dl.camembert_for_question_answering import *
46
+ from sparknlp.annotator.classifier_dl.bert_for_zero_shot_classification import *
47
+ from sparknlp.annotator.classifier_dl.distil_bert_for_zero_shot_classification import *
48
+ from sparknlp.annotator.classifier_dl.roberta_for_zero_shot_classification import *
49
+ from sparknlp.annotator.classifier_dl.xlm_roberta_for_zero_shot_classification import *
50
+ from sparknlp.annotator.classifier_dl.bart_for_zero_shot_classification import *
51
+ from sparknlp.annotator.classifier_dl.deberta_for_zero_shot_classification import *
52
+ from sparknlp.annotator.classifier_dl.mpnet_for_sequence_classification import *
53
+ from sparknlp.annotator.classifier_dl.mpnet_for_question_answering import *
54
+ from sparknlp.annotator.classifier_dl.mpnet_for_token_classification import *
55
+ from sparknlp.annotator.classifier_dl.albert_for_zero_shot_classification import *
56
+ from sparknlp.annotator.classifier_dl.camembert_for_zero_shot_classification import *
57
+ from sparknlp.annotator.classifier_dl.bert_for_multiple_choice import *
58
+ from sparknlp.annotator.classifier_dl.xlm_roberta_for_multiple_choice import *
59
+ from sparknlp.annotator.classifier_dl.roberta_for_multiple_choice import *
60
+ from sparknlp.annotator.classifier_dl.distilbert_for_multiple_choice import *
61
+ from sparknlp.annotator.classifier_dl.albert_for_multiple_choice import *
@@ -0,0 +1,161 @@
1
+ # Copyright 2017-2024 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from sparknlp.common import *
16
+
17
+ class AlbertForMultipleChoice(AnnotatorModel,
18
+ HasCaseSensitiveProperties,
19
+ HasBatchedAnnotate,
20
+ HasEngine,
21
+ HasMaxSentenceLengthLimit):
22
+ """AlbertForMultipleChoice can load ALBERT Models with a multiple choice classification head on top
23
+ (a linear layer on top of the pooled output and a softmax) e.g. for RocStories/SWAG tasks.
24
+
25
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
26
+ object:
27
+
28
+ >>> spanClassifier = AlbertForMultipleChoice.pretrained() \\
29
+ ... .setInputCols(["document_question", "document_context"]) \\
30
+ ... .setOutputCol("answer")
31
+
32
+ The default model is ``"albert_base_uncased_multiple_choice"``, if no name is
33
+ provided.
34
+
35
+ For available pretrained models please see the `Models Hub
36
+ <https://sparknlp.org/models?task=Multiple+Choice>`__.
37
+
38
+ To see which models are compatible and how to import them see
39
+ `Import Transformers into Spark NLP 🚀
40
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
41
+
42
+ ====================== ======================
43
+ Input Annotation types Output Annotation type
44
+ ====================== ======================
45
+ ``DOCUMENT, DOCUMENT`` ``CHUNK``
46
+ ====================== ======================
47
+
48
+ Parameters
49
+ ----------
50
+ batchSize
51
+ Batch size. Large values allows faster processing but requires more
52
+ memory, by default 8
53
+ caseSensitive
54
+ Whether to ignore case in tokens for embeddings matching, by default
55
+ False
56
+ maxSentenceLength
57
+ Max sentence length to process, by default 512
58
+
59
+ Examples
60
+ --------
61
+ >>> import sparknlp
62
+ >>> from sparknlp.base import *
63
+ >>> from sparknlp.annotator import *
64
+ >>> from pyspark.ml import Pipeline
65
+ >>> documentAssembler = MultiDocumentAssembler() \\
66
+ ... .setInputCols(["question", "context"]) \\
67
+ ... .setOutputCols(["document_question", "document_context"])
68
+ >>> questionAnswering = AlbertForMultipleChoice.pretrained() \\
69
+ ... .setInputCols(["document_question", "document_context"]) \\
70
+ ... .setOutputCol("answer") \\
71
+ ... .setCaseSensitive(False)
72
+ >>> pipeline = Pipeline().setStages([
73
+ ... documentAssembler,
74
+ ... questionAnswering
75
+ ... ])
76
+ >>> data = spark.createDataFrame([["The Eiffel Tower is located in which country??", "Germany, France, Italy"]]).toDF("question", "context")
77
+ >>> result = pipeline.fit(data).transform(data)
78
+ >>> result.select("answer.result").show(truncate=False)
79
+ +--------------------+
80
+ |result |
81
+ +--------------------+
82
+ |[France] |
83
+ +--------------------+
84
+ """
85
+ name = "AlbertForMultipleChoice"
86
+
87
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]
88
+
89
+ outputAnnotatorType = AnnotatorType.CHUNK
90
+
91
+ choicesDelimiter = Param(Params._dummy(),
92
+ "choicesDelimiter",
93
+ "Delimiter character use to split the choices",
94
+ TypeConverters.toString)
95
+
96
+ def setChoicesDelimiter(self, value):
97
+ """Sets delimiter character use to split the choices
98
+
99
+ Parameters
100
+ ----------
101
+ value : string
102
+ Delimiter character use to split the choices
103
+ """
104
+ return self._set(caseSensitive=value)
105
+
106
+ @keyword_only
107
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.AlbertForMultipleChoice",
108
+ java_model=None):
109
+ super(AlbertForMultipleChoice, self).__init__(
110
+ classname=classname,
111
+ java_model=java_model
112
+ )
113
+ self._setDefault(
114
+ batchSize=4,
115
+ maxSentenceLength=512,
116
+ caseSensitive=False,
117
+ choicesDelimiter = ","
118
+ )
119
+
120
+ @staticmethod
121
+ def loadSavedModel(folder, spark_session):
122
+ """Loads a locally saved model.
123
+
124
+ Parameters
125
+ ----------
126
+ folder : str
127
+ Folder of the saved model
128
+ spark_session : pyspark.sql.SparkSession
129
+ The current SparkSession
130
+
131
+ Returns
132
+ -------
133
+ BertForQuestionAnswering
134
+ The restored model
135
+ """
136
+ from sparknlp.internal import _AlbertMultipleChoiceLoader
137
+ jModel = _AlbertMultipleChoiceLoader(folder, spark_session._jsparkSession)._java_obj
138
+ return AlbertForMultipleChoice(java_model=jModel)
139
+
140
+ @staticmethod
141
+ def pretrained(name="albert_base_uncased_multiple_choice", lang="en", remote_loc=None):
142
+ """Downloads and loads a pretrained model.
143
+
144
+ Parameters
145
+ ----------
146
+ name : str, optional
147
+ Name of the pretrained model, by default
148
+ "bert_base_uncased_multiple_choice"
149
+ lang : str, optional
150
+ Language of the pretrained model, by default "en"
151
+ remote_loc : str, optional
152
+ Optional remote address of the resource, by default None. Will use
153
+ Spark NLPs repositories otherwise.
154
+
155
+ Returns
156
+ -------
157
+ BertForQuestionAnswering
158
+ The restored model
159
+ """
160
+ from sparknlp.pretrained import ResourceDownloader
161
+ return ResourceDownloader.downloadModel(AlbertForMultipleChoice, name, lang, remote_loc)
@@ -0,0 +1,172 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from sparknlp.common import *
16
+
17
+
18
+ class AlbertForQuestionAnswering(AnnotatorModel,
19
+ HasCaseSensitiveProperties,
20
+ HasBatchedAnnotate,
21
+ HasEngine,
22
+ HasMaxSentenceLengthLimit):
23
+ """AlbertForQuestionAnswering can load ALBERT Models with a span classification head on top for extractive
24
+ question-answering tasks like SQuAD (a linear layer on top of the hidden-states output to compute span start
25
+ logits and span end logits).
26
+
27
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
28
+ object:
29
+
30
+ >>> spanClassifier = AlbertForQuestionAnswering.pretrained() \\
31
+ ... .setInputCols(["document_question", "document_context"]) \\
32
+ ... .setOutputCol("answer")
33
+
34
+ The default model is ``"albert_base_qa_squad2"``, if no name is
35
+ provided.
36
+
37
+ For available pretrained models please see the `Models Hub
38
+ <https://sparknlp.org/models?task=Question+Answering>`__.
39
+
40
+ To see which models are compatible and how to import them see
41
+ `Import Transformers into Spark NLP 🚀
42
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
43
+
44
+ ====================== ======================
45
+ Input Annotation types Output Annotation type
46
+ ====================== ======================
47
+ ``DOCUMENT, DOCUMENT`` ``CHUNK``
48
+ ====================== ======================
49
+
50
+ Parameters
51
+ ----------
52
+ batchSize
53
+ Batch size. Large values allows faster processing but requires more
54
+ memory, by default 8
55
+ caseSensitive
56
+ Whether to ignore case in tokens for embeddings matching, by default
57
+ False
58
+ configProtoBytes
59
+ ConfigProto from tensorflow, serialized into byte array.
60
+ maxSentenceLength
61
+ Max sentence length to process, by default 128
62
+
63
+ Examples
64
+ --------
65
+ >>> import sparknlp
66
+ >>> from sparknlp.base import *
67
+ >>> from sparknlp.annotator import *
68
+ >>> from pyspark.ml import Pipeline
69
+ >>> documentAssembler = MultiDocumentAssembler() \\
70
+ ... .setInputCols(["question", "context"]) \\
71
+ ... .setOutputCol(["document_question", "document_context"])
72
+ >>> spanClassifier = AlbertForQuestionAnswering.pretrained() \\
73
+ ... .setInputCols(["document_question", "document_context"]) \\
74
+ ... .setOutputCol("answer") \\
75
+ ... .setCaseSensitive(False)
76
+ >>> pipeline = Pipeline().setStages([
77
+ ... documentAssembler,
78
+ ... spanClassifier
79
+ ... ])
80
+ >>> data = spark.createDataFrame([["What's my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context")
81
+ >>> result = pipeline.fit(data).transform(data)
82
+ >>> result.select("answer.result").show(truncate=False)
83
+ +--------------------+
84
+ |result |
85
+ +--------------------+
86
+ |[Clara] |
87
+ +--------------------+
88
+ """
89
+ name = "AlbertForQuestionAnswering"
90
+
91
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]
92
+
93
+ outputAnnotatorType = AnnotatorType.CHUNK
94
+
95
+ configProtoBytes = Param(Params._dummy(),
96
+ "configProtoBytes",
97
+ "ConfigProto from tensorflow, serialized into byte array. Get with "
98
+ "config_proto.SerializeToString()",
99
+ TypeConverters.toListInt)
100
+
101
+ coalesceSentences = Param(Params._dummy(), "coalesceSentences",
102
+ "Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per "
103
+ "document by averaging probabilities in all sentences.",
104
+ TypeConverters.toBoolean)
105
+
106
+ def setConfigProtoBytes(self, b):
107
+ """Sets configProto from tensorflow, serialized into byte array.
108
+
109
+ Parameters
110
+ ----------
111
+ b : List[int]
112
+ ConfigProto from tensorflow, serialized into byte array
113
+ """
114
+ return self._set(configProtoBytes=b)
115
+
116
+
117
+
118
+ @keyword_only
119
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.AlbertForQuestionAnswering",
120
+ java_model=None):
121
+ super(AlbertForQuestionAnswering, self).__init__(
122
+ classname=classname,
123
+ java_model=java_model
124
+ )
125
+ self._setDefault(
126
+ batchSize=8,
127
+ maxSentenceLength=128,
128
+ caseSensitive=False
129
+ )
130
+
131
+ @staticmethod
132
+ def loadSavedModel(folder, spark_session):
133
+ """Loads a locally saved model.
134
+
135
+ Parameters
136
+ ----------
137
+ folder : str
138
+ Folder of the saved model
139
+ spark_session : pyspark.sql.SparkSession
140
+ The current SparkSession
141
+
142
+ Returns
143
+ -------
144
+ AlbertForQuestionAnswering
145
+ The restored model
146
+ """
147
+ from sparknlp.internal import _AlbertQuestionAnsweringLoader
148
+ jModel = _AlbertQuestionAnsweringLoader(folder, spark_session._jsparkSession)._java_obj
149
+ return AlbertForQuestionAnswering(java_model=jModel)
150
+
151
+ @staticmethod
152
+ def pretrained(name="albert_base_qa_squad2", lang="en", remote_loc=None):
153
+ """Downloads and loads a pretrained model.
154
+
155
+ Parameters
156
+ ----------
157
+ name : str, optional
158
+ Name of the pretrained model, by default
159
+ "albert_base_qa_squad2"
160
+ lang : str, optional
161
+ Language of the pretrained model, by default "en"
162
+ remote_loc : str, optional
163
+ Optional remote address of the resource, by default None. Will use
164
+ Spark NLPs repositories otherwise.
165
+
166
+ Returns
167
+ -------
168
+ AlbertForQuestionAnswering
169
+ The restored model
170
+ """
171
+ from sparknlp.pretrained import ResourceDownloader
172
+ return ResourceDownloader.downloadModel(AlbertForQuestionAnswering, name, lang, remote_loc)
@@ -0,0 +1,201 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes concerning AlbertForSequenceClassification."""
15
+
16
+ from sparknlp.common import *
17
+
18
+
19
+ class AlbertForSequenceClassification(AnnotatorModel,
20
+ HasCaseSensitiveProperties,
21
+ HasBatchedAnnotate,
22
+ HasClassifierActivationProperties,
23
+ HasEngine,
24
+ HasMaxSentenceLengthLimit):
25
+ """AlbertForSequenceClassification can load Albert Models with sequence classification/regression head on
26
+ top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.
27
+
28
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
29
+ object:
30
+
31
+ >>> sequenceClassifier = AlbertForSequenceClassification.pretrained() \\
32
+ ... .setInputCols(["token", "document"]) \\
33
+ ... .setOutputCol("label")
34
+
35
+ The default model is ``"albert_base_sequence_classifier_imdb"``, if no name is
36
+ provided.
37
+
38
+ For available pretrained models please see the `Models Hub
39
+ <https://sparknlp.org/models?task=Text+Classification>`__.
40
+
41
+ To see which models are compatible and how to import them see
42
+ `Import Transformers into Spark NLP 🚀
43
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
44
+
45
+ ====================== ======================
46
+ Input Annotation types Output Annotation type
47
+ ====================== ======================
48
+ ``DOCUMENT, TOKEN`` ``CATEGORY``
49
+ ====================== ======================
50
+
51
+ Parameters
52
+ ----------
53
+ batchSize
54
+ Batch size. Large values allows faster processing but requires more
55
+ memory, by default 8
56
+ caseSensitive
57
+ Whether to ignore case in tokens for embeddings matching, by default
58
+ False
59
+ configProtoBytes
60
+ ConfigProto from tensorflow, serialized into byte array.
61
+ maxSentenceLength
62
+ Max sentence length to process, by default 128
63
+ coalesceSentences
64
+ Instead of 1 class per sentence (if inputCols is `sentence`) output
65
+ 1 class per document by averaging probabilities in all sentences, by
66
+ default False.
67
+ activation
68
+ Whether to calculate logits via Softmax or Sigmoid, by default
69
+ `"softmax"`.
70
+
71
+ Examples
72
+ --------
73
+ >>> import sparknlp
74
+ >>> from sparknlp.base import *
75
+ >>> from sparknlp.annotator import *
76
+ >>> from pyspark.ml import Pipeline
77
+ >>> documentAssembler = DocumentAssembler() \\
78
+ ... .setInputCol("text") \\
79
+ ... .setOutputCol("document")
80
+ >>> tokenizer = Tokenizer() \\
81
+ ... .setInputCols(["document"]) \\
82
+ ... .setOutputCol("token")
83
+ >>> sequenceClassifier = AlbertForSequenceClassification.pretrained() \\
84
+ ... .setInputCols(["token", "document"]) \\
85
+ ... .setOutputCol("label") \\
86
+ ... .setCaseSensitive(True)
87
+ >>> pipeline = Pipeline().setStages([
88
+ ... documentAssembler,
89
+ ... tokenizer,
90
+ ... sequenceClassifier
91
+ ... ])
92
+ >>> data = spark.createDataFrame([["I loved this movie when I was a child.", "It was pretty boring."]).toDF("text")
93
+ >>> result = pipeline.fit(data).transform(data)
94
+ >>> result.select("label.result").show(truncate=False)
95
+ +------+
96
+ |result|
97
+ +------+
98
+ |[pos] |
99
+ |[neg] |
100
+ +------+
101
+ """
102
+ name = "AlbertForSequenceClassification"
103
+
104
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]
105
+
106
+ outputAnnotatorType = AnnotatorType.CATEGORY
107
+
108
+ configProtoBytes = Param(Params._dummy(),
109
+ "configProtoBytes",
110
+ "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
111
+ TypeConverters.toListInt)
112
+
113
+ coalesceSentences = Param(Params._dummy(), "coalesceSentences",
114
+ "Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging probabilities in all sentences.",
115
+ TypeConverters.toBoolean)
116
+
117
+ def getClasses(self):
118
+ """
119
+ Returns labels used to train this model
120
+ """
121
+ return self._call_java("getClasses")
122
+
123
+ def setConfigProtoBytes(self, b):
124
+ """Sets configProto from tensorflow, serialized into byte array.
125
+
126
+ Parameters
127
+ ----------
128
+ b : List[int]
129
+ ConfigProto from tensorflow, serialized into byte array
130
+ """
131
+ return self._set(configProtoBytes=b)
132
+
133
+ def setCoalesceSentences(self, value):
134
+ """Instead of 1 class per sentence (if inputCols is '''sentence''') output 1 class per document by averaging probabilities in all sentences.
135
+ Due to max sequence length limit in almost all transformer models such as BERT (512 tokens), this parameter helps feeding all the sentences
136
+ into the model and averaging all the probabilities for the entire document instead of probabilities per sentence. (Default: true)
137
+
138
+ Parameters
139
+ ----------
140
+ value : bool
141
+ If the output of all sentences will be averaged to one output
142
+ """
143
+ return self._set(coalesceSentences=value)
144
+
145
+ @keyword_only
146
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.AlbertForSequenceClassification",
147
+ java_model=None):
148
+ super(AlbertForSequenceClassification, self).__init__(
149
+ classname=classname,
150
+ java_model=java_model
151
+ )
152
+ self._setDefault(
153
+ batchSize=8,
154
+ maxSentenceLength=128,
155
+ caseSensitive=False,
156
+ coalesceSentences=False,
157
+ activation="softmax"
158
+ )
159
+
160
+ @staticmethod
161
+ def loadSavedModel(folder, spark_session):
162
+ """Loads a locally saved model.
163
+
164
+ Parameters
165
+ ----------
166
+ folder : str
167
+ Folder of the saved model
168
+ spark_session : pyspark.sql.SparkSession
169
+ The current SparkSession
170
+
171
+ Returns
172
+ -------
173
+ AlbertForSequenceClassification
174
+ The restored model
175
+ """
176
+ from sparknlp.internal import _AlbertSequenceClassifierLoader
177
+ jModel = _AlbertSequenceClassifierLoader(folder, spark_session._jsparkSession)._java_obj
178
+ return AlbertForSequenceClassification(java_model=jModel)
179
+
180
+ @staticmethod
181
+ def pretrained(name="albert_base_sequence_classifier_imdb", lang="en", remote_loc=None):
182
+ """Downloads and loads a pretrained model.
183
+
184
+ Parameters
185
+ ----------
186
+ name : str, optional
187
+ Name of the pretrained model, by default
188
+ "albert_base_sequence_classifier_imdb"
189
+ lang : str, optional
190
+ Language of the pretrained model, by default "en"
191
+ remote_loc : str, optional
192
+ Optional remote address of the resource, by default None. Will use
193
+ Spark NLPs repositories otherwise.
194
+
195
+ Returns
196
+ -------
197
+ AlbertForSequenceClassification
198
+ The restored model
199
+ """
200
+ from sparknlp.pretrained import ResourceDownloader
201
+ return ResourceDownloader.downloadModel(AlbertForSequenceClassification, name, lang, remote_loc)