spark-nlp 5.5.2__tar.gz → 6.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (277) hide show
  1. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/PKG-INFO +20 -11
  2. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/README.md +19 -10
  3. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/setup.py +1 -1
  4. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/spark_nlp.egg-info/PKG-INFO +20 -11
  5. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/spark_nlp.egg-info/SOURCES.txt +16 -1
  6. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/__init__.py +2 -2
  7. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/__init__.py +4 -0
  8. spark-nlp-6.0.0/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  9. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +2 -2
  10. spark-nlp-6.0.0/sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  11. spark-nlp-6.0.0/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  12. spark-nlp-6.0.0/sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  13. spark-nlp-6.0.0/sparknlp/annotator/cleaners/__init__.py +15 -0
  14. spark-nlp-6.0.0/sparknlp/annotator/cleaners/cleaner.py +202 -0
  15. spark-nlp-6.0.0/sparknlp/annotator/cleaners/extractor.py +191 -0
  16. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/cv/__init__.py +6 -1
  17. spark-nlp-6.0.0/sparknlp/annotator/cv/janus_for_multimodal.py +356 -0
  18. spark-nlp-6.0.0/sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  19. spark-nlp-6.0.0/sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  20. spark-nlp-6.0.0/sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  21. spark-nlp-6.0.0/sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  22. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +10 -6
  23. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/bge_embeddings.py +7 -3
  24. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/__init__.py +3 -0
  25. spark-nlp-6.0.0/sparknlp/annotator/seq2seq/auto_gguf_model.py +299 -0
  26. spark-nlp-6.0.0/sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +333 -0
  27. spark-nlp-6.0.0/sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  28. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/llama3_transformer.py +4 -4
  29. spark-nlp-6.0.0/sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  30. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/image_assembler.py +58 -0
  31. spark-nlp-5.5.2/sparknlp/annotator/seq2seq/auto_gguf_model.py → spark-nlp-6.0.0/sparknlp/common/properties.py +755 -280
  32. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/internal/__init__.py +100 -2
  33. spark-nlp-6.0.0/sparknlp/reader/pdf_to_text.py +65 -0
  34. spark-nlp-6.0.0/sparknlp/reader/sparknlp_reader.py +321 -0
  35. spark-nlp-5.5.2/spark_nlp.egg-info/.uuid +0 -1
  36. spark-nlp-5.5.2/sparknlp/common/properties.py +0 -733
  37. spark-nlp-5.5.2/sparknlp/reader/sparknlp_reader.py +0 -121
  38. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/com/__init__.py +0 -0
  39. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/com/johnsnowlabs/__init__.py +0 -0
  40. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/com/johnsnowlabs/ml/__init__.py +0 -0
  41. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
  42. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/com/johnsnowlabs/nlp/__init__.py +0 -0
  43. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/setup.cfg +0 -0
  44. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/spark_nlp.egg-info/dependency_links.txt +0 -0
  45. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/spark_nlp.egg-info/top_level.txt +0 -0
  46. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotation.py +0 -0
  47. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotation_audio.py +0 -0
  48. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotation_image.py +0 -0
  49. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/__init__.py +0 -0
  50. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/audio/__init__.py +0 -0
  51. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  52. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  53. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  54. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/chunk2_doc.py +0 -0
  55. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/chunker.py +0 -0
  56. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  57. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  58. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  59. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
  60. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  61. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  62. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  63. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  64. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  65. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  66. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  67. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  68. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
  69. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  70. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  71. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  72. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  73. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
  74. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  75. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  76. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  77. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  78. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  79. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  80. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  81. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  82. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  83. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
  84. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  85. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  86. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  87. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  88. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  89. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  90. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  91. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  92. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  93. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
  94. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  95. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  96. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  97. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/coref/__init__.py +0 -0
  98. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  99. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
  100. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  101. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  102. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  103. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  104. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  105. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/date2_chunk.py +0 -0
  106. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/dependency/__init__.py +0 -0
  107. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  108. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  109. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  110. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/document_normalizer.py +0 -0
  111. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/document_token_splitter.py +0 -0
  112. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  113. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/__init__.py +0 -0
  114. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  115. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  116. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  117. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  118. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  119. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  120. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  121. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  122. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
  123. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  124. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  125. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  126. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
  127. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
  128. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
  129. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  130. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  131. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  132. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
  133. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  134. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  135. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  136. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  137. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  138. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  139. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  140. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/er/__init__.py +0 -0
  141. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/er/entity_ruler.py +0 -0
  142. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/graph_extraction.py +0 -0
  143. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  144. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  145. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  146. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  147. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/lemmatizer.py +0 -0
  148. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/matcher/__init__.py +0 -0
  149. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  150. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  151. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  152. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  153. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  154. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/n_gram_generator.py +0 -0
  155. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ner/__init__.py +0 -0
  156. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ner/ner_approach.py +0 -0
  157. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ner/ner_converter.py +0 -0
  158. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ner/ner_crf.py +0 -0
  159. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ner/ner_dl.py +0 -0
  160. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  161. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  162. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/normalizer.py +0 -0
  163. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/openai/__init__.py +0 -0
  164. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/openai/openai_completion.py +0 -0
  165. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
  166. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/param/__init__.py +0 -0
  167. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  168. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  169. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/pos/__init__.py +0 -0
  170. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/pos/perceptron.py +0 -0
  171. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/sentence/__init__.py +0 -0
  172. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  173. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  174. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/sentiment/__init__.py +0 -0
  175. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  176. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  177. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  178. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
  179. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  180. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
  181. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
  182. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  183. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
  184. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
  185. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
  186. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
  187. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
  188. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
  189. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  190. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/similarity/__init__.py +0 -0
  191. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  192. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/spell_check/__init__.py +0 -0
  193. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  194. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  195. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  196. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/stemmer.py +0 -0
  197. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  198. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  199. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/token/__init__.py +0 -0
  200. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  201. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  202. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  203. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/token/tokenizer.py +0 -0
  204. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/token2_chunk.py +0 -0
  205. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ws/__init__.py +0 -0
  206. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  207. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/__init__.py +0 -0
  208. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/audio_assembler.py +0 -0
  209. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/doc2_chunk.py +0 -0
  210. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/document_assembler.py +0 -0
  211. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/embeddings_finisher.py +0 -0
  212. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/finisher.py +0 -0
  213. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/graph_finisher.py +0 -0
  214. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/has_recursive_fit.py +0 -0
  215. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/has_recursive_transform.py +0 -0
  216. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/light_pipeline.py +0 -0
  217. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/multi_document_assembler.py +0 -0
  218. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/prompt_assembler.py +0 -0
  219. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/recursive_pipeline.py +0 -0
  220. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/table_assembler.py +0 -0
  221. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/base/token_assembler.py +0 -0
  222. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/__init__.py +0 -0
  223. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/annotator_approach.py +0 -0
  224. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/annotator_model.py +0 -0
  225. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/annotator_properties.py +0 -0
  226. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/annotator_type.py +0 -0
  227. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/coverage_result.py +0 -0
  228. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/match_strategy.py +0 -0
  229. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/read_as.py +0 -0
  230. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/recursive_annotator_approach.py +0 -0
  231. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/storage.py +0 -0
  232. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/common/utils.py +0 -0
  233. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/functions.py +0 -0
  234. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/internal/annotator_java_ml.py +0 -0
  235. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/internal/annotator_transformer.py +0 -0
  236. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/internal/extended_java_wrapper.py +0 -0
  237. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/internal/params_getters_setters.py +0 -0
  238. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/internal/recursive.py +0 -0
  239. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/logging/__init__.py +0 -0
  240. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/logging/comet.py +0 -0
  241. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/pretrained/__init__.py +0 -0
  242. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  243. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/pretrained/resource_downloader.py +0 -0
  244. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/pretrained/utils.py +0 -0
  245. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/reader/__init__.py +0 -0
  246. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/__init__.py +0 -0
  247. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  248. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  249. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  250. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  251. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  252. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  253. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  254. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  255. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  256. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  257. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  258. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  259. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  260. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  261. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  262. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  263. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  264. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  265. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  266. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  267. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  268. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  269. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  270. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/conll.py +0 -0
  271. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/conllu.py +0 -0
  272. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/pos.py +0 -0
  273. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/pub_tator.py +0 -0
  274. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/spacy_to_annotation.py +0 -0
  275. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/training/tfgraphs.py +0 -0
  276. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/upload_to_hub.py +0 -0
  277. {spark-nlp-5.5.2 → spark-nlp-6.0.0}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 5.5.2
3
+ Version: 6.0.0
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -87,7 +87,7 @@ documentation and examples
87
87
 
88
88
  ## Quick Start
89
89
 
90
- This is a quick example of how to use Spark NLP pre-trained pipeline in Python and PySpark:
90
+ This is a quick example of how to use a Spark NLP pre-trained pipeline in Python and PySpark:
91
91
 
92
92
  ```sh
93
93
  $ java -version
@@ -95,7 +95,7 @@ $ java -version
95
95
  $ conda create -n sparknlp python=3.7 -y
96
96
  $ conda activate sparknlp
97
97
  # spark-nlp by default is based on pyspark 3.x
98
- $ pip install spark-nlp==5.5.2 pyspark==3.3.1
98
+ $ pip install spark-nlp==6.0.0 pyspark==3.3.1
99
99
  ```
100
100
 
101
101
  In Python console or Jupyter `Python3` kernel:
@@ -161,10 +161,11 @@ For a quick example of using pipelines and models take a look at our official [d
161
161
 
162
162
  ### Apache Spark Support
163
163
 
164
- Spark NLP *5.5.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
164
+ Spark NLP *6.0.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
165
165
 
166
166
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
167
167
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
168
+ | 6.0.x | YES | YES | YES | YES | YES | YES | NO | NO |
168
169
  | 5.5.x | YES | YES | YES | YES | YES | YES | NO | NO |
169
170
  | 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
170
171
  | 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
@@ -178,6 +179,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
178
179
 
179
180
  | Spark NLP | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10| Scala 2.11 | Scala 2.12 |
180
181
  |-----------|------------|------------|------------|------------|------------|------------|------------|
182
+ | 6.0.x | NO | YES | YES | YES | YES | NO | YES |
181
183
  | 5.5.x | NO | YES | YES | YES | YES | NO | YES |
182
184
  | 5.4.x | NO | YES | YES | YES | YES | NO | YES |
183
185
  | 5.3.x | NO | YES | YES | YES | YES | NO | YES |
@@ -189,7 +191,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
189
191
 
190
192
  ### Databricks Support
191
193
 
192
- Spark NLP 5.5.2 has been tested and is compatible with the following runtimes:
194
+ Spark NLP 6.0.0 has been tested and is compatible with the following runtimes:
193
195
 
194
196
  | **CPU** | **GPU** |
195
197
  |--------------------|--------------------|
@@ -206,7 +208,7 @@ We are compatible with older runtimes. For a full list check databricks support
206
208
 
207
209
  ### EMR Support
208
210
 
209
- Spark NLP 5.5.2 has been tested and is compatible with the following EMR releases:
211
+ Spark NLP 6.0.0 has been tested and is compatible with the following EMR releases:
210
212
 
211
213
  | **EMR Release** |
212
214
  |--------------------|
@@ -216,6 +218,13 @@ Spark NLP 5.5.2 has been tested and is compatible with the following EMR release
216
218
  | emr-7.0.0 |
217
219
  | emr-7.1.0 |
218
220
  | emr-7.2.0 |
221
+ | emr-7.3.0 |
222
+ | emr-7.4.0 |
223
+ | emr-7.5.0 |
224
+ | emr-7.6.0 |
225
+ | emr-7.7.0 |
226
+ | emr-7.8.0 |
227
+
219
228
 
220
229
  We are compatible with older EMR releases. For a full list check EMR support in our official [documentation](https://sparknlp.org/docs/en/install#emr-support)
221
230
 
@@ -237,7 +246,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
237
246
  from our official documentation.
238
247
 
239
248
  If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
240
- projects [Spark NLP SBT S5.5.2r](https://github.com/maziyarpanahi/spark-nlp-starter)
249
+ projects [Spark NLP SBT S6.0.0r](https://github.com/maziyarpanahi/spark-nlp-starter)
241
250
 
242
251
  ### Python
243
252
 
@@ -246,7 +255,7 @@ Check all available installations for Python in our official [documentation](htt
246
255
 
247
256
  ### Compiled JARs
248
257
 
249
- To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official documenation
258
+ To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official documentation
250
259
 
251
260
  ## Platform-Specific Instructions
252
261
 
@@ -266,7 +275,7 @@ For detailed instructions on how to use Spark NLP on supported platforms, please
266
275
 
267
276
  Spark NLP library and all the pre-trained models/pipelines can be used entirely offline with no access to the Internet.
268
277
  Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation
269
- to use Spark NLP offline
278
+ to use Spark NLP offline.
270
279
 
271
280
  ## Advanced Settings
272
281
 
@@ -282,7 +291,7 @@ In Spark NLP we can define S3 locations to:
282
291
 
283
292
  Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
284
293
 
285
- ## Document5.5.2
294
+ ## Documentation
286
295
 
287
296
  ### Examples
288
297
 
@@ -315,7 +324,7 @@ the Spark NLP library:
315
324
  keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
316
325
  abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
317
326
  }
318
- }5.5.2
327
+ }
319
328
  ```
320
329
 
321
330
  ## Community support
@@ -55,7 +55,7 @@ documentation and examples
55
55
 
56
56
  ## Quick Start
57
57
 
58
- This is a quick example of how to use Spark NLP pre-trained pipeline in Python and PySpark:
58
+ This is a quick example of how to use a Spark NLP pre-trained pipeline in Python and PySpark:
59
59
 
60
60
  ```sh
61
61
  $ java -version
@@ -63,7 +63,7 @@ $ java -version
63
63
  $ conda create -n sparknlp python=3.7 -y
64
64
  $ conda activate sparknlp
65
65
  # spark-nlp by default is based on pyspark 3.x
66
- $ pip install spark-nlp==5.5.2 pyspark==3.3.1
66
+ $ pip install spark-nlp==6.0.0 pyspark==3.3.1
67
67
  ```
68
68
 
69
69
  In Python console or Jupyter `Python3` kernel:
@@ -129,10 +129,11 @@ For a quick example of using pipelines and models take a look at our official [d
129
129
 
130
130
  ### Apache Spark Support
131
131
 
132
- Spark NLP *5.5.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
132
+ Spark NLP *6.0.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
133
133
 
134
134
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
135
135
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
136
+ | 6.0.x | YES | YES | YES | YES | YES | YES | NO | NO |
136
137
  | 5.5.x | YES | YES | YES | YES | YES | YES | NO | NO |
137
138
  | 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
138
139
  | 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
@@ -146,6 +147,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
146
147
 
147
148
  | Spark NLP | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10| Scala 2.11 | Scala 2.12 |
148
149
  |-----------|------------|------------|------------|------------|------------|------------|------------|
150
+ | 6.0.x | NO | YES | YES | YES | YES | NO | YES |
149
151
  | 5.5.x | NO | YES | YES | YES | YES | NO | YES |
150
152
  | 5.4.x | NO | YES | YES | YES | YES | NO | YES |
151
153
  | 5.3.x | NO | YES | YES | YES | YES | NO | YES |
@@ -157,7 +159,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
157
159
 
158
160
  ### Databricks Support
159
161
 
160
- Spark NLP 5.5.2 has been tested and is compatible with the following runtimes:
162
+ Spark NLP 6.0.0 has been tested and is compatible with the following runtimes:
161
163
 
162
164
  | **CPU** | **GPU** |
163
165
  |--------------------|--------------------|
@@ -174,7 +176,7 @@ We are compatible with older runtimes. For a full list check databricks support
174
176
 
175
177
  ### EMR Support
176
178
 
177
- Spark NLP 5.5.2 has been tested and is compatible with the following EMR releases:
179
+ Spark NLP 6.0.0 has been tested and is compatible with the following EMR releases:
178
180
 
179
181
  | **EMR Release** |
180
182
  |--------------------|
@@ -184,6 +186,13 @@ Spark NLP 5.5.2 has been tested and is compatible with the following EMR release
184
186
  | emr-7.0.0 |
185
187
  | emr-7.1.0 |
186
188
  | emr-7.2.0 |
189
+ | emr-7.3.0 |
190
+ | emr-7.4.0 |
191
+ | emr-7.5.0 |
192
+ | emr-7.6.0 |
193
+ | emr-7.7.0 |
194
+ | emr-7.8.0 |
195
+
187
196
 
188
197
  We are compatible with older EMR releases. For a full list check EMR support in our official [documentation](https://sparknlp.org/docs/en/install#emr-support)
189
198
 
@@ -205,7 +214,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
205
214
  from our official documentation.
206
215
 
207
216
  If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
208
- projects [Spark NLP SBT S5.5.2r](https://github.com/maziyarpanahi/spark-nlp-starter)
217
+ projects [Spark NLP SBT S6.0.0r](https://github.com/maziyarpanahi/spark-nlp-starter)
209
218
 
210
219
  ### Python
211
220
 
@@ -214,7 +223,7 @@ Check all available installations for Python in our official [documentation](htt
214
223
 
215
224
  ### Compiled JARs
216
225
 
217
- To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official documenation
226
+ To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official documentation
218
227
 
219
228
  ## Platform-Specific Instructions
220
229
 
@@ -234,7 +243,7 @@ For detailed instructions on how to use Spark NLP on supported platforms, please
234
243
 
235
244
  Spark NLP library and all the pre-trained models/pipelines can be used entirely offline with no access to the Internet.
236
245
  Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation
237
- to use Spark NLP offline
246
+ to use Spark NLP offline.
238
247
 
239
248
  ## Advanced Settings
240
249
 
@@ -250,7 +259,7 @@ In Spark NLP we can define S3 locations to:
250
259
 
251
260
  Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
252
261
 
253
- ## Document5.5.2
262
+ ## Documentation
254
263
 
255
264
  ### Examples
256
265
 
@@ -283,7 +292,7 @@ the Spark NLP library:
283
292
  keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
284
293
  abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
285
294
  }
286
- }5.5.2
295
+ }
287
296
  ```
288
297
 
289
298
  ## Community support
@@ -41,7 +41,7 @@ setup(
41
41
  # project code, see
42
42
  # https://packaging.python.org/en/latest/single_source_version.html
43
43
 
44
- version='5.5.2', # Required
44
+ version='6.0.0', # Required
45
45
 
46
46
  # This is a one-line description or tagline of what your project does. This
47
47
  # corresponds to the 'Summary' metadata field:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 5.5.2
3
+ Version: 6.0.0
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -87,7 +87,7 @@ documentation and examples
87
87
 
88
88
  ## Quick Start
89
89
 
90
- This is a quick example of how to use Spark NLP pre-trained pipeline in Python and PySpark:
90
+ This is a quick example of how to use a Spark NLP pre-trained pipeline in Python and PySpark:
91
91
 
92
92
  ```sh
93
93
  $ java -version
@@ -95,7 +95,7 @@ $ java -version
95
95
  $ conda create -n sparknlp python=3.7 -y
96
96
  $ conda activate sparknlp
97
97
  # spark-nlp by default is based on pyspark 3.x
98
- $ pip install spark-nlp==5.5.2 pyspark==3.3.1
98
+ $ pip install spark-nlp==6.0.0 pyspark==3.3.1
99
99
  ```
100
100
 
101
101
  In Python console or Jupyter `Python3` kernel:
@@ -161,10 +161,11 @@ For a quick example of using pipelines and models take a look at our official [d
161
161
 
162
162
  ### Apache Spark Support
163
163
 
164
- Spark NLP *5.5.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
164
+ Spark NLP *6.0.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
165
165
 
166
166
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
167
167
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
168
+ | 6.0.x | YES | YES | YES | YES | YES | YES | NO | NO |
168
169
  | 5.5.x | YES | YES | YES | YES | YES | YES | NO | NO |
169
170
  | 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
170
171
  | 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
@@ -178,6 +179,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
178
179
 
179
180
  | Spark NLP | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10| Scala 2.11 | Scala 2.12 |
180
181
  |-----------|------------|------------|------------|------------|------------|------------|------------|
182
+ | 6.0.x | NO | YES | YES | YES | YES | NO | YES |
181
183
  | 5.5.x | NO | YES | YES | YES | YES | NO | YES |
182
184
  | 5.4.x | NO | YES | YES | YES | YES | NO | YES |
183
185
  | 5.3.x | NO | YES | YES | YES | YES | NO | YES |
@@ -189,7 +191,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
189
191
 
190
192
  ### Databricks Support
191
193
 
192
- Spark NLP 5.5.2 has been tested and is compatible with the following runtimes:
194
+ Spark NLP 6.0.0 has been tested and is compatible with the following runtimes:
193
195
 
194
196
  | **CPU** | **GPU** |
195
197
  |--------------------|--------------------|
@@ -206,7 +208,7 @@ We are compatible with older runtimes. For a full list check databricks support
206
208
 
207
209
  ### EMR Support
208
210
 
209
- Spark NLP 5.5.2 has been tested and is compatible with the following EMR releases:
211
+ Spark NLP 6.0.0 has been tested and is compatible with the following EMR releases:
210
212
 
211
213
  | **EMR Release** |
212
214
  |--------------------|
@@ -216,6 +218,13 @@ Spark NLP 5.5.2 has been tested and is compatible with the following EMR release
216
218
  | emr-7.0.0 |
217
219
  | emr-7.1.0 |
218
220
  | emr-7.2.0 |
221
+ | emr-7.3.0 |
222
+ | emr-7.4.0 |
223
+ | emr-7.5.0 |
224
+ | emr-7.6.0 |
225
+ | emr-7.7.0 |
226
+ | emr-7.8.0 |
227
+
219
228
 
220
229
  We are compatible with older EMR releases. For a full list check EMR support in our official [documentation](https://sparknlp.org/docs/en/install#emr-support)
221
230
 
@@ -237,7 +246,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
237
246
  from our official documentation.
238
247
 
239
248
  If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
240
- projects [Spark NLP SBT S5.5.2r](https://github.com/maziyarpanahi/spark-nlp-starter)
249
+ projects [Spark NLP SBT S6.0.0r](https://github.com/maziyarpanahi/spark-nlp-starter)
241
250
 
242
251
  ### Python
243
252
 
@@ -246,7 +255,7 @@ Check all available installations for Python in our official [documentation](htt
246
255
 
247
256
  ### Compiled JARs
248
257
 
249
- To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official documenation
258
+ To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official documentation
250
259
 
251
260
  ## Platform-Specific Instructions
252
261
 
@@ -266,7 +275,7 @@ For detailed instructions on how to use Spark NLP on supported platforms, please
266
275
 
267
276
  Spark NLP library and all the pre-trained models/pipelines can be used entirely offline with no access to the Internet.
268
277
  Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation
269
- to use Spark NLP offline
278
+ to use Spark NLP offline.
270
279
 
271
280
  ## Advanced Settings
272
281
 
@@ -282,7 +291,7 @@ In Spark NLP we can define S3 locations to:
282
291
 
283
292
  Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
284
293
 
285
- ## Document5.5.2
294
+ ## Documentation
286
295
 
287
296
  ### Examples
288
297
 
@@ -315,7 +324,7 @@ the Spark NLP library:
315
324
  keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
316
325
  abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
317
326
  }
318
- }5.5.2
327
+ }
319
328
  ```
320
329
 
321
330
  ## Community support
@@ -6,7 +6,6 @@ com/johnsnowlabs/__init__.py
6
6
  com/johnsnowlabs/ml/__init__.py
7
7
  com/johnsnowlabs/ml/ai/__init__.py
8
8
  com/johnsnowlabs/nlp/__init__.py
9
- spark_nlp.egg-info/.uuid
10
9
  spark_nlp.egg-info/PKG-INFO
11
10
  spark_nlp.egg-info/SOURCES.txt
12
11
  spark_nlp.egg-info/dependency_links.txt
@@ -39,6 +38,7 @@ sparknlp/annotator/audio/hubert_for_ctc.py
39
38
  sparknlp/annotator/audio/wav2vec2_for_ctc.py
40
39
  sparknlp/annotator/audio/whisper_for_ctc.py
41
40
  sparknlp/annotator/classifier_dl/__init__.py
41
+ sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py
42
42
  sparknlp/annotator/classifier_dl/albert_for_question_answering.py
43
43
  sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py
44
44
  sparknlp/annotator/classifier_dl/albert_for_token_classification.py
@@ -62,6 +62,7 @@ sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py
62
62
  sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py
63
63
  sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py
64
64
  sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py
65
+ sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py
65
66
  sparknlp/annotator/classifier_dl/longformer_for_question_answering.py
66
67
  sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py
67
68
  sparknlp/annotator/classifier_dl/longformer_for_token_classification.py
@@ -69,24 +70,34 @@ sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py
69
70
  sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py
70
71
  sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py
71
72
  sparknlp/annotator/classifier_dl/multi_classifier_dl.py
73
+ sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py
72
74
  sparknlp/annotator/classifier_dl/roberta_for_question_answering.py
73
75
  sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py
74
76
  sparknlp/annotator/classifier_dl/roberta_for_token_classification.py
75
77
  sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py
76
78
  sparknlp/annotator/classifier_dl/sentiment_dl.py
77
79
  sparknlp/annotator/classifier_dl/tapas_for_question_answering.py
80
+ sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py
78
81
  sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py
79
82
  sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py
80
83
  sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py
81
84
  sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py
82
85
  sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py
83
86
  sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py
87
+ sparknlp/annotator/cleaners/__init__.py
88
+ sparknlp/annotator/cleaners/cleaner.py
89
+ sparknlp/annotator/cleaners/extractor.py
84
90
  sparknlp/annotator/coref/__init__.py
85
91
  sparknlp/annotator/coref/spanbert_coref.py
86
92
  sparknlp/annotator/cv/__init__.py
87
93
  sparknlp/annotator/cv/blip_for_question_answering.py
88
94
  sparknlp/annotator/cv/clip_for_zero_shot_classification.py
89
95
  sparknlp/annotator/cv/convnext_for_image_classification.py
96
+ sparknlp/annotator/cv/janus_for_multimodal.py
97
+ sparknlp/annotator/cv/llava_for_multimodal.py
98
+ sparknlp/annotator/cv/mllama_for_multimodal.py
99
+ sparknlp/annotator/cv/phi3_vision_for_multimodal.py
100
+ sparknlp/annotator/cv/qwen2vl_transformer.py
90
101
  sparknlp/annotator/cv/swin_for_image_classification.py
91
102
  sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py
92
103
  sparknlp/annotator/cv/vit_for_image_classification.py
@@ -157,7 +168,9 @@ sparknlp/annotator/sentiment/sentiment_detector.py
157
168
  sparknlp/annotator/sentiment/vivekn_sentiment.py
158
169
  sparknlp/annotator/seq2seq/__init__.py
159
170
  sparknlp/annotator/seq2seq/auto_gguf_model.py
171
+ sparknlp/annotator/seq2seq/auto_gguf_vision_model.py
160
172
  sparknlp/annotator/seq2seq/bart_transformer.py
173
+ sparknlp/annotator/seq2seq/cohere_transformer.py
161
174
  sparknlp/annotator/seq2seq/cpm_transformer.py
162
175
  sparknlp/annotator/seq2seq/gpt2_transformer.py
163
176
  sparknlp/annotator/seq2seq/llama2_transformer.py
@@ -166,6 +179,7 @@ sparknlp/annotator/seq2seq/m2m100_transformer.py
166
179
  sparknlp/annotator/seq2seq/marian_transformer.py
167
180
  sparknlp/annotator/seq2seq/mistral_transformer.py
168
181
  sparknlp/annotator/seq2seq/nllb_transformer.py
182
+ sparknlp/annotator/seq2seq/olmo_transformer.py
169
183
  sparknlp/annotator/seq2seq/phi2_transformer.py
170
184
  sparknlp/annotator/seq2seq/phi3_transformer.py
171
185
  sparknlp/annotator/seq2seq/qwen_transformer.py
@@ -225,6 +239,7 @@ sparknlp/pretrained/pretrained_pipeline.py
225
239
  sparknlp/pretrained/resource_downloader.py
226
240
  sparknlp/pretrained/utils.py
227
241
  sparknlp/reader/__init__.py
242
+ sparknlp/reader/pdf_to_text.py
228
243
  sparknlp/reader/sparknlp_reader.py
229
244
  sparknlp/training/__init__.py
230
245
  sparknlp/training/conll.py
@@ -132,7 +132,7 @@ def start(gpu=False,
132
132
  The initiated Spark session.
133
133
 
134
134
  """
135
- current_version = "5.5.2"
135
+ current_version = "6.0.0"
136
136
 
137
137
  if params is None:
138
138
  params = {}
@@ -316,4 +316,4 @@ def version():
316
316
  str
317
317
  The current Spark NLP version.
318
318
  """
319
- return '5.5.2'
319
+ return '6.0.0'
@@ -55,3 +55,7 @@ from sparknlp.annotator.classifier_dl.mpnet_for_token_classification import *
55
55
  from sparknlp.annotator.classifier_dl.albert_for_zero_shot_classification import *
56
56
  from sparknlp.annotator.classifier_dl.camembert_for_zero_shot_classification import *
57
57
  from sparknlp.annotator.classifier_dl.bert_for_multiple_choice import *
58
+ from sparknlp.annotator.classifier_dl.xlm_roberta_for_multiple_choice import *
59
+ from sparknlp.annotator.classifier_dl.roberta_for_multiple_choice import *
60
+ from sparknlp.annotator.classifier_dl.distilbert_for_multiple_choice import *
61
+ from sparknlp.annotator.classifier_dl.albert_for_multiple_choice import *
@@ -0,0 +1,161 @@
1
+ # Copyright 2017-2024 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from sparknlp.common import *
16
+
17
+ class AlbertForMultipleChoice(AnnotatorModel,
18
+ HasCaseSensitiveProperties,
19
+ HasBatchedAnnotate,
20
+ HasEngine,
21
+ HasMaxSentenceLengthLimit):
22
+ """AlbertForMultipleChoice can load ALBERT Models with a multiple choice classification head on top
23
+ (a linear layer on top of the pooled output and a softmax) e.g. for RocStories/SWAG tasks.
24
+
25
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
26
+ object:
27
+
28
+ >>> spanClassifier = AlbertForMultipleChoice.pretrained() \\
29
+ ... .setInputCols(["document_question", "document_context"]) \\
30
+ ... .setOutputCol("answer")
31
+
32
+ The default model is ``"albert_base_uncased_multiple_choice"``, if no name is
33
+ provided.
34
+
35
+ For available pretrained models please see the `Models Hub
36
+ <https://sparknlp.org/models?task=Multiple+Choice>`__.
37
+
38
+ To see which models are compatible and how to import them see
39
+ `Import Transformers into Spark NLP 🚀
40
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
41
+
42
+ ====================== ======================
43
+ Input Annotation types Output Annotation type
44
+ ====================== ======================
45
+ ``DOCUMENT, DOCUMENT`` ``CHUNK``
46
+ ====================== ======================
47
+
48
+ Parameters
49
+ ----------
50
+ batchSize
51
+ Batch size. Large values allows faster processing but requires more
52
+ memory, by default 8
53
+ caseSensitive
54
+ Whether to ignore case in tokens for embeddings matching, by default
55
+ False
56
+ maxSentenceLength
57
+ Max sentence length to process, by default 512
58
+
59
+ Examples
60
+ --------
61
+ >>> import sparknlp
62
+ >>> from sparknlp.base import *
63
+ >>> from sparknlp.annotator import *
64
+ >>> from pyspark.ml import Pipeline
65
+ >>> documentAssembler = MultiDocumentAssembler() \\
66
+ ... .setInputCols(["question", "context"]) \\
67
+ ... .setOutputCols(["document_question", "document_context"])
68
+ >>> questionAnswering = AlbertForMultipleChoice.pretrained() \\
69
+ ... .setInputCols(["document_question", "document_context"]) \\
70
+ ... .setOutputCol("answer") \\
71
+ ... .setCaseSensitive(False)
72
+ >>> pipeline = Pipeline().setStages([
73
+ ... documentAssembler,
74
+ ... questionAnswering
75
+ ... ])
76
+ >>> data = spark.createDataFrame([["The Eiffel Tower is located in which country??", "Germany, France, Italy"]]).toDF("question", "context")
77
+ >>> result = pipeline.fit(data).transform(data)
78
+ >>> result.select("answer.result").show(truncate=False)
79
+ +--------------------+
80
+ |result |
81
+ +--------------------+
82
+ |[France] |
83
+ +--------------------+
84
+ """
85
+ name = "AlbertForMultipleChoice"
86
+
87
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]
88
+
89
+ outputAnnotatorType = AnnotatorType.CHUNK
90
+
91
+ choicesDelimiter = Param(Params._dummy(),
92
+ "choicesDelimiter",
93
+ "Delimiter character use to split the choices",
94
+ TypeConverters.toString)
95
+
96
+ def setChoicesDelimiter(self, value):
97
+ """Sets delimiter character use to split the choices
98
+
99
+ Parameters
100
+ ----------
101
+ value : string
102
+ Delimiter character use to split the choices
103
+ """
104
+ return self._set(caseSensitive=value)
105
+
106
+ @keyword_only
107
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.AlbertForMultipleChoice",
108
+ java_model=None):
109
+ super(AlbertForMultipleChoice, self).__init__(
110
+ classname=classname,
111
+ java_model=java_model
112
+ )
113
+ self._setDefault(
114
+ batchSize=4,
115
+ maxSentenceLength=512,
116
+ caseSensitive=False,
117
+ choicesDelimiter = ","
118
+ )
119
+
120
+ @staticmethod
121
+ def loadSavedModel(folder, spark_session):
122
+ """Loads a locally saved model.
123
+
124
+ Parameters
125
+ ----------
126
+ folder : str
127
+ Folder of the saved model
128
+ spark_session : pyspark.sql.SparkSession
129
+ The current SparkSession
130
+
131
+ Returns
132
+ -------
133
+ BertForQuestionAnswering
134
+ The restored model
135
+ """
136
+ from sparknlp.internal import _AlbertMultipleChoiceLoader
137
+ jModel = _AlbertMultipleChoiceLoader(folder, spark_session._jsparkSession)._java_obj
138
+ return AlbertForMultipleChoice(java_model=jModel)
139
+
140
+ @staticmethod
141
+ def pretrained(name="albert_base_uncased_multiple_choice", lang="en", remote_loc=None):
142
+ """Downloads and loads a pretrained model.
143
+
144
+ Parameters
145
+ ----------
146
+ name : str, optional
147
+ Name of the pretrained model, by default
148
+ "bert_base_uncased_multiple_choice"
149
+ lang : str, optional
150
+ Language of the pretrained model, by default "en"
151
+ remote_loc : str, optional
152
+ Optional remote address of the resource, by default None. Will use
153
+ Spark NLPs repositories otherwise.
154
+
155
+ Returns
156
+ -------
157
+ BertForQuestionAnswering
158
+ The restored model
159
+ """
160
+ from sparknlp.pretrained import ResourceDownloader
161
+ return ResourceDownloader.downloadModel(AlbertForMultipleChoice, name, lang, remote_loc)
@@ -130,7 +130,7 @@ class BertForMultipleChoice(AnnotatorModel,
130
130
 
131
131
  Returns
132
132
  -------
133
- BertForQuestionAnswering
133
+ BertForMultipleChoice
134
134
  The restored model
135
135
  """
136
136
  from sparknlp.internal import _BertMultipleChoiceLoader
@@ -154,7 +154,7 @@ class BertForMultipleChoice(AnnotatorModel,
154
154
 
155
155
  Returns
156
156
  -------
157
- BertForQuestionAnswering
157
+ BertForMultipleChoice
158
158
  The restored model
159
159
  """
160
160
  from sparknlp.pretrained import ResourceDownloader