spark-nlp 5.0.2__tar.gz → 5.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (223) hide show
  1. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/PKG-INFO +52 -45
  2. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/README.md +51 -44
  3. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/setup.py +1 -1
  4. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/spark_nlp.egg-info/PKG-INFO +52 -45
  5. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/spark_nlp.egg-info/SOURCES.txt +7 -1
  6. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/__init__.py +2 -2
  7. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/__init__.py +1 -0
  8. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/audio/__init__.py +1 -0
  9. spark-nlp-5.1.1/sparknlp/annotator/audio/whisper_for_ctc.py +250 -0
  10. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/__init__.py +3 -2
  11. spark-nlp-5.1.1/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  12. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/__init__.py +1 -0
  13. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/doc2vec.py +6 -0
  14. spark-nlp-5.1.1/sparknlp/annotator/embeddings/mpnet_embeddings.py +190 -0
  15. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/word2vec.py +6 -0
  16. spark-nlp-5.1.1/sparknlp/annotator/openai/__init__.py +16 -0
  17. spark-nlp-5.1.1/sparknlp/annotator/openai/openai_completion.py +352 -0
  18. spark-nlp-5.1.1/sparknlp/annotator/openai/openai_embeddings.py +132 -0
  19. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/properties.py +173 -0
  20. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/__init__.py +19 -1
  21. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/com/__init__.py +0 -0
  22. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/com/johnsnowlabs/__init__.py +0 -0
  23. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/com/johnsnowlabs/nlp/__init__.py +0 -0
  24. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/setup.cfg +0 -0
  25. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/spark_nlp.egg-info/dependency_links.txt +0 -0
  26. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/spark_nlp.egg-info/top_level.txt +0 -0
  27. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotation.py +0 -0
  28. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotation_audio.py +0 -0
  29. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotation_image.py +0 -0
  30. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  31. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  32. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/chunk2_doc.py +0 -0
  33. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/chunker.py +0 -0
  34. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  35. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  36. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  37. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  38. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  39. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  40. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  41. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  42. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  43. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  44. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  45. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  46. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  47. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  48. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  49. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  50. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  51. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  52. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  53. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  54. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  55. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  56. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  57. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  58. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  59. /spark-nlp-5.0.2/sparknlp/annotator/classifier_dl/roberta_bert_for_zero_shot_classification.py → /spark-nlp-5.1.1/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  60. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  61. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  62. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  63. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  64. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
  65. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  66. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  67. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  68. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/coref/__init__.py +0 -0
  69. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  70. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/cv/__init__.py +0 -0
  71. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  72. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  73. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  74. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/date2_chunk.py +0 -0
  75. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/dependency/__init__.py +0 -0
  76. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  77. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  78. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/document_normalizer.py +0 -0
  79. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  80. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  81. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  82. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  83. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  84. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  85. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  86. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
  87. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  88. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  89. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  90. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  91. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  92. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  93. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  94. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  95. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  96. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  97. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  98. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/er/__init__.py +0 -0
  99. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/er/entity_ruler.py +0 -0
  100. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/graph_extraction.py +0 -0
  101. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  102. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  103. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  104. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  105. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/lemmatizer.py +0 -0
  106. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/__init__.py +0 -0
  107. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  108. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  109. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  110. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  111. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  112. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/n_gram_generator.py +0 -0
  113. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/__init__.py +0 -0
  114. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_approach.py +0 -0
  115. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_converter.py +0 -0
  116. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_crf.py +0 -0
  117. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_dl.py +0 -0
  118. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  119. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  120. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/normalizer.py +0 -0
  121. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/param/__init__.py +0 -0
  122. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  123. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  124. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/pos/__init__.py +0 -0
  125. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/pos/perceptron.py +0 -0
  126. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentence/__init__.py +0 -0
  127. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  128. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  129. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentiment/__init__.py +0 -0
  130. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  131. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  132. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/__init__.py +0 -0
  133. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  134. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  135. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  136. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  137. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/similarity/__init__.py +0 -0
  138. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  139. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/__init__.py +0 -0
  140. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  141. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  142. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  143. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/stemmer.py +0 -0
  144. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  145. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  146. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/__init__.py +0 -0
  147. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  148. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  149. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  150. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/tokenizer.py +0 -0
  151. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ws/__init__.py +0 -0
  152. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  153. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/__init__.py +0 -0
  154. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/audio_assembler.py +0 -0
  155. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/doc2_chunk.py +0 -0
  156. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/document_assembler.py +0 -0
  157. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/embeddings_finisher.py +0 -0
  158. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/finisher.py +0 -0
  159. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/graph_finisher.py +0 -0
  160. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/has_recursive_fit.py +0 -0
  161. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/has_recursive_transform.py +0 -0
  162. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/image_assembler.py +0 -0
  163. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/light_pipeline.py +0 -0
  164. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/multi_document_assembler.py +0 -0
  165. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/recursive_pipeline.py +0 -0
  166. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/table_assembler.py +0 -0
  167. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/token2_chunk.py +0 -0
  168. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/token_assembler.py +0 -0
  169. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/__init__.py +0 -0
  170. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/annotator_approach.py +0 -0
  171. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/annotator_model.py +0 -0
  172. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/annotator_properties.py +0 -0
  173. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/annotator_type.py +0 -0
  174. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/coverage_result.py +0 -0
  175. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/match_strategy.py +0 -0
  176. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/read_as.py +0 -0
  177. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/recursive_annotator_approach.py +0 -0
  178. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/storage.py +0 -0
  179. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/utils.py +0 -0
  180. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/functions.py +0 -0
  181. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/annotator_java_ml.py +0 -0
  182. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/annotator_transformer.py +0 -0
  183. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/extended_java_wrapper.py +0 -0
  184. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/params_getters_setters.py +0 -0
  185. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/recursive.py +0 -0
  186. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/logging/__init__.py +0 -0
  187. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/logging/comet.py +0 -0
  188. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/pretrained/__init__.py +0 -0
  189. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  190. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/pretrained/resource_downloader.py +0 -0
  191. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/pretrained/utils.py +0 -0
  192. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/__init__.py +0 -0
  193. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  194. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  195. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  196. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  197. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  198. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  199. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  200. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  201. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  202. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  203. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  204. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  205. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  206. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  207. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  208. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  209. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  210. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  211. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  212. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  213. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  214. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  215. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  216. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/conll.py +0 -0
  217. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/conllu.py +0 -0
  218. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/pos.py +0 -0
  219. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/pub_tator.py +0 -0
  220. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/spacy_to_annotation.py +0 -0
  221. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/tfgraphs.py +0 -0
  222. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/upload_to_hub.py +0 -0
  223. {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 5.0.2
3
+ Version: 5.1.1
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -144,6 +144,8 @@ documentation and examples
144
144
  - XLM-RoBerta Sentence Embeddings (HuggingFace models)
145
145
  - Instructor Embeddings (HuggingFace models)
146
146
  - E5 Embeddings (HuggingFace models)
147
+ - MPNet Embeddings (HuggingFace models)
148
+ - OpenAI Embeddings
147
149
  - Sentence Embeddings
148
150
  - Chunk Embeddings
149
151
  - Unsupervised keywords extraction
@@ -181,6 +183,7 @@ documentation and examples
181
183
  - ConvNext Image Classification (Facebook ConvNext)
182
184
  - Automatic Speech Recognition (Wav2Vec2)
183
185
  - Automatic Speech Recognition (HuBERT)
186
+ - Automatic Speech Recognition (OpenAI Whisper)
184
187
  - Named entity recognition (Deep learning)
185
188
  - Easy ONNX and TensorFlow integrations
186
189
  - GPU Support
@@ -199,7 +202,7 @@ To use Spark NLP you need the following requirements:
199
202
 
200
203
  **GPU (optional):**
201
204
 
202
- Spark NLP 5.0.2 is built with ONNX 1.15.1 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
205
+ Spark NLP 5.1.1 is built with ONNX 1.15.1 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
203
206
 
204
207
  - NVIDIA® GPU drivers version 450.80.02 or higher
205
208
  - CUDA® Toolkit 11.2
@@ -215,7 +218,7 @@ $ java -version
215
218
  $ conda create -n sparknlp python=3.7 -y
216
219
  $ conda activate sparknlp
217
220
  # spark-nlp by default is based on pyspark 3.x
218
- $ pip install spark-nlp==5.0.2 pyspark==3.3.1
221
+ $ pip install spark-nlp==5.1.1 pyspark==3.3.1
219
222
  ```
220
223
 
221
224
  In Python console or Jupyter `Python3` kernel:
@@ -260,7 +263,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
260
263
 
261
264
  ## Apache Spark Support
262
265
 
263
- Spark NLP *5.0.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x
266
+ Spark NLP *5.1.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x
264
267
 
265
268
  | Spark NLP | Apache Spark 2.3.x | Apache Spark 2.4.x | Apache Spark 3.0.x | Apache Spark 3.1.x | Apache Spark 3.2.x | Apache Spark 3.3.x | Apache Spark 3.4.x |
266
269
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -299,7 +302,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
299
302
 
300
303
  ## Databricks Support
301
304
 
302
- Spark NLP 5.0.2 has been tested and is compatible with the following runtimes:
305
+ Spark NLP 5.1.1 has been tested and is compatible with the following runtimes:
303
306
 
304
307
  **CPU:**
305
308
 
@@ -335,6 +338,8 @@ Spark NLP 5.0.2 has been tested and is compatible with the following runtimes:
335
338
  - 13.1 ML
336
339
  - 13.2
337
340
  - 13.2 ML
341
+ - 13.3
342
+ - 13.3 ML
338
343
 
339
344
  **GPU:**
340
345
 
@@ -354,10 +359,11 @@ Spark NLP 5.0.2 has been tested and is compatible with the following runtimes:
354
359
  - 13.0 ML & GPU
355
360
  - 13.1 ML & GPU
356
361
  - 13.2 ML & GPU
362
+ - 13.3 ML & GPU
357
363
 
358
364
  ## EMR Support
359
365
 
360
- Spark NLP 5.0.2 has been tested and is compatible with the following EMR releases:
366
+ Spark NLP 5.1.1 has been tested and is compatible with the following EMR releases:
361
367
 
362
368
  - emr-6.2.0
363
369
  - emr-6.3.0
@@ -370,6 +376,7 @@ Spark NLP 5.0.2 has been tested and is compatible with the following EMR release
370
376
  - emr-6.9.0
371
377
  - emr-6.10.0
372
378
  - emr-6.11.0
379
+ - emr-6.12.0
373
380
 
374
381
  Full list of [Amazon EMR 6.x releases](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-release-6x.html)
375
382
 
@@ -401,11 +408,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
401
408
  ```sh
402
409
  # CPU
403
410
 
404
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
411
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
405
412
 
406
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
413
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
407
414
 
408
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
415
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
409
416
  ```
410
417
 
411
418
  The `spark-nlp` has been published to
@@ -414,11 +421,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
414
421
  ```sh
415
422
  # GPU
416
423
 
417
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.0.2
424
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
418
425
 
419
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.0.2
426
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
420
427
 
421
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.0.2
428
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
422
429
 
423
430
  ```
424
431
 
@@ -428,11 +435,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
428
435
  ```sh
429
436
  # AArch64
430
437
 
431
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.0.2
438
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
432
439
 
433
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.0.2
440
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
434
441
 
435
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.0.2
442
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
436
443
 
437
444
  ```
438
445
 
@@ -442,11 +449,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
442
449
  ```sh
443
450
  # M1/M2 (Apple Silicon)
444
451
 
445
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.0.2
452
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
446
453
 
447
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.0.2
454
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
448
455
 
449
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.0.2
456
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
450
457
 
451
458
  ```
452
459
 
@@ -460,7 +467,7 @@ set in your SparkSession:
460
467
  spark-shell \
461
468
  --driver-memory 16g \
462
469
  --conf spark.kryoserializer.buffer.max=2000M \
463
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
470
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
464
471
  ```
465
472
 
466
473
  ## Scala
@@ -478,7 +485,7 @@ coordinates:
478
485
  <dependency>
479
486
  <groupId>com.johnsnowlabs.nlp</groupId>
480
487
  <artifactId>spark-nlp_2.12</artifactId>
481
- <version>5.0.2</version>
488
+ <version>5.1.1</version>
482
489
  </dependency>
483
490
  ```
484
491
 
@@ -489,7 +496,7 @@ coordinates:
489
496
  <dependency>
490
497
  <groupId>com.johnsnowlabs.nlp</groupId>
491
498
  <artifactId>spark-nlp-gpu_2.12</artifactId>
492
- <version>5.0.2</version>
499
+ <version>5.1.1</version>
493
500
  </dependency>
494
501
  ```
495
502
 
@@ -500,7 +507,7 @@ coordinates:
500
507
  <dependency>
501
508
  <groupId>com.johnsnowlabs.nlp</groupId>
502
509
  <artifactId>spark-nlp-aarch64_2.12</artifactId>
503
- <version>5.0.2</version>
510
+ <version>5.1.1</version>
504
511
  </dependency>
505
512
  ```
506
513
 
@@ -511,7 +518,7 @@ coordinates:
511
518
  <dependency>
512
519
  <groupId>com.johnsnowlabs.nlp</groupId>
513
520
  <artifactId>spark-nlp-silicon_2.12</artifactId>
514
- <version>5.0.2</version>
521
+ <version>5.1.1</version>
515
522
  </dependency>
516
523
  ```
517
524
 
@@ -521,28 +528,28 @@ coordinates:
521
528
 
522
529
  ```sbtshell
523
530
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
524
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.0.2"
531
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.1"
525
532
  ```
526
533
 
527
534
  **spark-nlp-gpu:**
528
535
 
529
536
  ```sbtshell
530
537
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
531
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.0.2"
538
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.1"
532
539
  ```
533
540
 
534
541
  **spark-nlp-aarch64:**
535
542
 
536
543
  ```sbtshell
537
544
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
538
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.0.2"
545
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.1"
539
546
  ```
540
547
 
541
548
  **spark-nlp-silicon:**
542
549
 
543
550
  ```sbtshell
544
551
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
545
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.0.2"
552
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.1"
546
553
  ```
547
554
 
548
555
  Maven
@@ -564,7 +571,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
564
571
  Pip:
565
572
 
566
573
  ```bash
567
- pip install spark-nlp==5.0.2
574
+ pip install spark-nlp==5.1.1
568
575
  ```
569
576
 
570
577
  Conda:
@@ -593,7 +600,7 @@ spark = SparkSession.builder
593
600
  .config("spark.driver.memory", "16G")
594
601
  .config("spark.driver.maxResultSize", "0")
595
602
  .config("spark.kryoserializer.buffer.max", "2000M")
596
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2")
603
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
597
604
  .getOrCreate()
598
605
  ```
599
606
 
@@ -664,7 +671,7 @@ Use either one of the following options
664
671
  - Add the following Maven Coordinates to the interpreter's library list
665
672
 
666
673
  ```bash
667
- com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
674
+ com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
668
675
  ```
669
676
 
670
677
  - Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
@@ -675,7 +682,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
675
682
  Apart from the previous step, install the python module through pip
676
683
 
677
684
  ```bash
678
- pip install spark-nlp==5.0.2
685
+ pip install spark-nlp==5.1.1
679
686
  ```
680
687
 
681
688
  Or you can install `spark-nlp` from inside Zeppelin by using Conda:
@@ -703,7 +710,7 @@ launch the Jupyter from the same Python environment:
703
710
  $ conda create -n sparknlp python=3.8 -y
704
711
  $ conda activate sparknlp
705
712
  # spark-nlp by default is based on pyspark 3.x
706
- $ pip install spark-nlp==5.0.2 pyspark==3.3.1 jupyter
713
+ $ pip install spark-nlp==5.1.1 pyspark==3.3.1 jupyter
707
714
  $ jupyter notebook
708
715
  ```
709
716
 
@@ -720,7 +727,7 @@ export PYSPARK_PYTHON=python3
720
727
  export PYSPARK_DRIVER_PYTHON=jupyter
721
728
  export PYSPARK_DRIVER_PYTHON_OPTS=notebook
722
729
 
723
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
730
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
724
731
  ```
725
732
 
726
733
  Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
@@ -747,7 +754,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
747
754
  # -s is for spark-nlp
748
755
  # -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
749
756
  # by default they are set to the latest
750
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.0.2
757
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
751
758
  ```
752
759
 
753
760
  [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
@@ -770,7 +777,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
770
777
  # -s is for spark-nlp
771
778
  # -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
772
779
  # by default they are set to the latest
773
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.0.2
780
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
774
781
  ```
775
782
 
776
783
  [Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
@@ -789,9 +796,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
789
796
 
790
797
  3. In `Libraries` tab inside your cluster you need to follow these steps:
791
798
 
792
- 3.1. Install New -> PyPI -> `spark-nlp==5.0.2` -> Install
799
+ 3.1. Install New -> PyPI -> `spark-nlp==5.1.1` -> Install
793
800
 
794
- 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2` -> Install
801
+ 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1` -> Install
795
802
 
796
803
  4. Now you can attach your notebook to the cluster and use Spark NLP!
797
804
 
@@ -842,7 +849,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
842
849
  "spark.kryoserializer.buffer.max": "2000M",
843
850
  "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
844
851
  "spark.driver.maxResultSize": "0",
845
- "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2"
852
+ "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1"
846
853
  }
847
854
  }]
848
855
  ```
@@ -851,7 +858,7 @@ A sample of AWS CLI to launch EMR cluster:
851
858
 
852
859
  ```.sh
853
860
  aws emr create-cluster \
854
- --name "Spark NLP 5.0.2" \
861
+ --name "Spark NLP 5.1.1" \
855
862
  --release-label emr-6.2.0 \
856
863
  --applications Name=Hadoop Name=Spark Name=Hive \
857
864
  --instance-type m4.4xlarge \
@@ -915,7 +922,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
915
922
  --enable-component-gateway \
916
923
  --metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
917
924
  --initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
918
- --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
925
+ --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
919
926
  ```
920
927
 
921
928
  2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
@@ -954,7 +961,7 @@ spark = SparkSession.builder
954
961
  .config("spark.kryoserializer.buffer.max", "2000m")
955
962
  .config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
956
963
  .config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
957
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2")
964
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
958
965
  .getOrCreate()
959
966
  ```
960
967
 
@@ -968,7 +975,7 @@ spark-shell \
968
975
  --conf spark.kryoserializer.buffer.max=2000M \
969
976
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
970
977
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
971
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
978
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
972
979
  ```
973
980
 
974
981
  **pyspark:**
@@ -981,7 +988,7 @@ pyspark \
981
988
  --conf spark.kryoserializer.buffer.max=2000M \
982
989
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
983
990
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
984
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
991
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
985
992
  ```
986
993
 
987
994
  **Databricks:**
@@ -1253,7 +1260,7 @@ spark = SparkSession.builder
1253
1260
  .config("spark.driver.memory", "16G")
1254
1261
  .config("spark.driver.maxResultSize", "0")
1255
1262
  .config("spark.kryoserializer.buffer.max", "2000M")
1256
- .config("spark.jars", "/tmp/spark-nlp-assembly-5.0.2.jar")
1263
+ .config("spark.jars", "/tmp/spark-nlp-assembly-5.1.1.jar")
1257
1264
  .getOrCreate()
1258
1265
  ```
1259
1266
 
@@ -1262,7 +1269,7 @@ spark = SparkSession.builder
1262
1269
  version (3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x)
1263
1270
  - If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
1264
1271
  to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
1265
- i.e., `hdfs:///tmp/spark-nlp-assembly-5.0.2.jar`)
1272
+ i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.1.jar`)
1266
1273
 
1267
1274
  Example of using pretrained Models and Pipelines in offline:
1268
1275