spark-nlp 5.4.2__tar.gz → 5.5.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (253) hide show
  1. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/PKG-INFO +45 -45
  2. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/README.md +44 -44
  3. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/setup.py +1 -1
  4. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/PKG-INFO +45 -45
  5. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/SOURCES.txt +12 -0
  6. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/__init__.py +2 -2
  7. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/__init__.py +3 -1
  8. spark-nlp-5.5.0rc1/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  9. spark-nlp-5.5.0rc1/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  10. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +2 -15
  11. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/__init__.py +3 -0
  12. spark-nlp-5.5.0rc1/sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  13. spark-nlp-5.5.0rc1/sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  14. spark-nlp-5.5.0rc1/sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  15. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/__init__.py +7 -0
  16. spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/auto_gguf_model.py +804 -0
  17. spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  18. spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  19. spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  20. spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  21. spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/qwen_transformer.py +339 -0
  22. spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  23. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/__init__.py +89 -0
  24. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/__init__.py +0 -0
  25. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/johnsnowlabs/__init__.py +0 -0
  26. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/johnsnowlabs/ml/__init__.py +0 -0
  27. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
  28. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/johnsnowlabs/nlp/__init__.py +0 -0
  29. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/setup.cfg +0 -0
  30. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/.uuid +0 -0
  31. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/dependency_links.txt +0 -0
  32. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/top_level.txt +0 -0
  33. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotation.py +0 -0
  34. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotation_audio.py +0 -0
  35. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotation_image.py +0 -0
  36. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/__init__.py +0 -0
  37. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/audio/__init__.py +0 -0
  38. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  39. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  40. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  41. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/chunk2_doc.py +0 -0
  42. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/chunker.py +0 -0
  43. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  44. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  45. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  46. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  47. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  48. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  49. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  50. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  51. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  52. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  53. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  54. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  55. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  56. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  57. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  58. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  59. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  60. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  61. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  62. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  63. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  64. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  65. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  66. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  67. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
  68. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  69. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  70. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  71. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  72. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  73. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  74. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  75. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  76. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  77. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
  78. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  79. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  80. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  81. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/coref/__init__.py +0 -0
  82. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  83. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/__init__.py +0 -0
  84. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  85. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  86. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  87. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  88. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  89. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/date2_chunk.py +0 -0
  90. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/dependency/__init__.py +0 -0
  91. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  92. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  93. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  94. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/document_normalizer.py +0 -0
  95. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/document_token_splitter.py +0 -0
  96. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  97. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  98. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  99. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  100. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
  101. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  102. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  103. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  104. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  105. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  106. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
  107. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  108. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  109. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  110. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
  111. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  112. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  113. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  114. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  115. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  116. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  117. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  118. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  119. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  120. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  121. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/er/__init__.py +0 -0
  122. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/er/entity_ruler.py +0 -0
  123. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/graph_extraction.py +0 -0
  124. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  125. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  126. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  127. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  128. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/lemmatizer.py +0 -0
  129. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/__init__.py +0 -0
  130. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  131. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  132. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  133. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  134. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  135. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/n_gram_generator.py +0 -0
  136. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/__init__.py +0 -0
  137. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_approach.py +0 -0
  138. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_converter.py +0 -0
  139. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_crf.py +0 -0
  140. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_dl.py +0 -0
  141. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  142. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  143. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/normalizer.py +0 -0
  144. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/openai/__init__.py +0 -0
  145. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/openai/openai_completion.py +0 -0
  146. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
  147. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/param/__init__.py +0 -0
  148. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  149. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  150. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/pos/__init__.py +0 -0
  151. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/pos/perceptron.py +0 -0
  152. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentence/__init__.py +0 -0
  153. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  154. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  155. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentiment/__init__.py +0 -0
  156. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  157. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  158. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  159. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  160. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
  161. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
  162. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  163. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
  164. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
  165. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  166. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/similarity/__init__.py +0 -0
  167. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  168. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/spell_check/__init__.py +0 -0
  169. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  170. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  171. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  172. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/stemmer.py +0 -0
  173. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  174. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  175. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/__init__.py +0 -0
  176. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  177. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  178. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  179. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/tokenizer.py +0 -0
  180. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token2_chunk.py +0 -0
  181. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ws/__init__.py +0 -0
  182. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  183. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/__init__.py +0 -0
  184. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/audio_assembler.py +0 -0
  185. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/doc2_chunk.py +0 -0
  186. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/document_assembler.py +0 -0
  187. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/embeddings_finisher.py +0 -0
  188. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/finisher.py +0 -0
  189. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/graph_finisher.py +0 -0
  190. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/has_recursive_fit.py +0 -0
  191. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/has_recursive_transform.py +0 -0
  192. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/image_assembler.py +0 -0
  193. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/light_pipeline.py +0 -0
  194. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/multi_document_assembler.py +0 -0
  195. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/recursive_pipeline.py +0 -0
  196. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/table_assembler.py +0 -0
  197. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/token_assembler.py +0 -0
  198. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/__init__.py +0 -0
  199. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/annotator_approach.py +0 -0
  200. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/annotator_model.py +0 -0
  201. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/annotator_properties.py +0 -0
  202. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/annotator_type.py +0 -0
  203. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/coverage_result.py +0 -0
  204. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/match_strategy.py +0 -0
  205. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/properties.py +0 -0
  206. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/read_as.py +0 -0
  207. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/recursive_annotator_approach.py +0 -0
  208. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/storage.py +0 -0
  209. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/utils.py +0 -0
  210. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/functions.py +0 -0
  211. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/annotator_java_ml.py +0 -0
  212. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/annotator_transformer.py +0 -0
  213. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/extended_java_wrapper.py +0 -0
  214. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/params_getters_setters.py +0 -0
  215. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/recursive.py +0 -0
  216. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/logging/__init__.py +0 -0
  217. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/logging/comet.py +0 -0
  218. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/pretrained/__init__.py +0 -0
  219. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  220. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/pretrained/resource_downloader.py +0 -0
  221. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/pretrained/utils.py +0 -0
  222. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/__init__.py +0 -0
  223. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  224. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  225. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  226. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  227. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  228. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  229. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  230. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  231. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  232. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  233. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  234. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  235. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  236. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  237. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  238. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  239. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  240. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  241. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  242. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  243. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  244. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  245. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  246. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/conll.py +0 -0
  247. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/conllu.py +0 -0
  248. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/pos.py +0 -0
  249. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/pub_tator.py +0 -0
  250. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/spacy_to_annotation.py +0 -0
  251. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/tfgraphs.py +0 -0
  252. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/upload_to_hub.py +0 -0
  253. {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 5.4.2
3
+ Version: 5.5.0rc1
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -198,7 +198,7 @@ To use Spark NLP you need the following requirements:
198
198
 
199
199
  **GPU (optional):**
200
200
 
201
- Spark NLP 5.4.2 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
201
+ Spark NLP 5.5.0-rc1 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
202
202
 
203
203
  - NVIDIA® GPU drivers version 450.80.02 or higher
204
204
  - CUDA® Toolkit 11.2
@@ -214,7 +214,7 @@ $ java -version
214
214
  $ conda create -n sparknlp python=3.7 -y
215
215
  $ conda activate sparknlp
216
216
  # spark-nlp by default is based on pyspark 3.x
217
- $ pip install spark-nlp==5.4.2 pyspark==3.3.1
217
+ $ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1
218
218
  ```
219
219
 
220
220
  In Python console or Jupyter `Python3` kernel:
@@ -259,7 +259,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
259
259
 
260
260
  ## Apache Spark Support
261
261
 
262
- Spark NLP *5.4.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
262
+ Spark NLP *5.5.0-rc1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
263
263
 
264
264
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
265
265
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -292,7 +292,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
292
292
 
293
293
  ## Databricks Support
294
294
 
295
- Spark NLP 5.4.2 has been tested and is compatible with the following runtimes:
295
+ Spark NLP 5.5.0-rc1 has been tested and is compatible with the following runtimes:
296
296
 
297
297
  **CPU:**
298
298
 
@@ -365,7 +365,7 @@ Spark NLP 5.4.2 has been tested and is compatible with the following runtimes:
365
365
 
366
366
  ## EMR Support
367
367
 
368
- Spark NLP 5.4.2 has been tested and is compatible with the following EMR releases:
368
+ Spark NLP 5.5.0-rc1 has been tested and is compatible with the following EMR releases:
369
369
 
370
370
  - emr-6.2.0
371
371
  - emr-6.3.0
@@ -415,11 +415,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
415
415
  ```sh
416
416
  # CPU
417
417
 
418
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
418
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
419
419
 
420
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
420
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
421
421
 
422
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
422
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
423
423
  ```
424
424
 
425
425
  The `spark-nlp` has been published to
@@ -428,11 +428,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
428
428
  ```sh
429
429
  # GPU
430
430
 
431
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.2
431
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
432
432
 
433
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.2
433
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
434
434
 
435
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.2
435
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
436
436
 
437
437
  ```
438
438
 
@@ -442,11 +442,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
442
442
  ```sh
443
443
  # AArch64
444
444
 
445
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.2
445
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
446
446
 
447
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.2
447
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
448
448
 
449
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.2
449
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
450
450
 
451
451
  ```
452
452
 
@@ -456,11 +456,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
456
456
  ```sh
457
457
  # M1/M2 (Apple Silicon)
458
458
 
459
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.2
459
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
460
460
 
461
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.2
461
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
462
462
 
463
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.2
463
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
464
464
 
465
465
  ```
466
466
 
@@ -474,7 +474,7 @@ set in your SparkSession:
474
474
  spark-shell \
475
475
  --driver-memory 16g \
476
476
  --conf spark.kryoserializer.buffer.max=2000M \
477
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
477
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
478
478
  ```
479
479
 
480
480
  ## Scala
@@ -492,7 +492,7 @@ coordinates:
492
492
  <dependency>
493
493
  <groupId>com.johnsnowlabs.nlp</groupId>
494
494
  <artifactId>spark-nlp_2.12</artifactId>
495
- <version>5.4.2</version>
495
+ <version>5.5.0-rc1</version>
496
496
  </dependency>
497
497
  ```
498
498
 
@@ -503,7 +503,7 @@ coordinates:
503
503
  <dependency>
504
504
  <groupId>com.johnsnowlabs.nlp</groupId>
505
505
  <artifactId>spark-nlp-gpu_2.12</artifactId>
506
- <version>5.4.2</version>
506
+ <version>5.5.0-rc1</version>
507
507
  </dependency>
508
508
  ```
509
509
 
@@ -514,7 +514,7 @@ coordinates:
514
514
  <dependency>
515
515
  <groupId>com.johnsnowlabs.nlp</groupId>
516
516
  <artifactId>spark-nlp-aarch64_2.12</artifactId>
517
- <version>5.4.2</version>
517
+ <version>5.5.0-rc1</version>
518
518
  </dependency>
519
519
  ```
520
520
 
@@ -525,7 +525,7 @@ coordinates:
525
525
  <dependency>
526
526
  <groupId>com.johnsnowlabs.nlp</groupId>
527
527
  <artifactId>spark-nlp-silicon_2.12</artifactId>
528
- <version>5.4.2</version>
528
+ <version>5.5.0-rc1</version>
529
529
  </dependency>
530
530
  ```
531
531
 
@@ -535,28 +535,28 @@ coordinates:
535
535
 
536
536
  ```sbtshell
537
537
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
538
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.2"
538
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.5.0-rc1"
539
539
  ```
540
540
 
541
541
  **spark-nlp-gpu:**
542
542
 
543
543
  ```sbtshell
544
544
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
545
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.2"
545
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.5.0-rc1"
546
546
  ```
547
547
 
548
548
  **spark-nlp-aarch64:**
549
549
 
550
550
  ```sbtshell
551
551
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
552
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.2"
552
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.5.0-rc1"
553
553
  ```
554
554
 
555
555
  **spark-nlp-silicon:**
556
556
 
557
557
  ```sbtshell
558
558
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
559
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.2"
559
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.5.0-rc1"
560
560
  ```
561
561
 
562
562
  Maven
@@ -578,7 +578,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
578
578
  Pip:
579
579
 
580
580
  ```bash
581
- pip install spark-nlp==5.4.2
581
+ pip install spark-nlp==5.5.0-rc1
582
582
  ```
583
583
 
584
584
  Conda:
@@ -607,7 +607,7 @@ spark = SparkSession.builder
607
607
  .config("spark.driver.memory", "16G")
608
608
  .config("spark.driver.maxResultSize", "0")
609
609
  .config("spark.kryoserializer.buffer.max", "2000M")
610
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2")
610
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
611
611
  .getOrCreate()
612
612
  ```
613
613
 
@@ -678,7 +678,7 @@ Use either one of the following options
678
678
  - Add the following Maven Coordinates to the interpreter's library list
679
679
 
680
680
  ```bash
681
- com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
681
+ com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
682
682
  ```
683
683
 
684
684
  - Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
@@ -689,7 +689,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
689
689
  Apart from the previous step, install the python module through pip
690
690
 
691
691
  ```bash
692
- pip install spark-nlp==5.4.2
692
+ pip install spark-nlp==5.5.0-rc1
693
693
  ```
694
694
 
695
695
  Or you can install `spark-nlp` from inside Zeppelin by using Conda:
@@ -717,7 +717,7 @@ launch the Jupyter from the same Python environment:
717
717
  $ conda create -n sparknlp python=3.8 -y
718
718
  $ conda activate sparknlp
719
719
  # spark-nlp by default is based on pyspark 3.x
720
- $ pip install spark-nlp==5.4.2 pyspark==3.3.1 jupyter
720
+ $ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1 jupyter
721
721
  $ jupyter notebook
722
722
  ```
723
723
 
@@ -734,7 +734,7 @@ export PYSPARK_PYTHON=python3
734
734
  export PYSPARK_DRIVER_PYTHON=jupyter
735
735
  export PYSPARK_DRIVER_PYTHON_OPTS=notebook
736
736
 
737
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
737
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
738
738
  ```
739
739
 
740
740
  Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
@@ -761,7 +761,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
761
761
  # -s is for spark-nlp
762
762
  # -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
763
763
  # by default they are set to the latest
764
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.2
764
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
765
765
  ```
766
766
 
767
767
  [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
@@ -784,7 +784,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
784
784
  # -s is for spark-nlp
785
785
  # -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
786
786
  # by default they are set to the latest
787
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.2
787
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
788
788
  ```
789
789
 
790
790
  [Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
@@ -803,9 +803,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
803
803
 
804
804
  3. In `Libraries` tab inside your cluster you need to follow these steps:
805
805
 
806
- 3.1. Install New -> PyPI -> `spark-nlp==5.4.2` -> Install
806
+ 3.1. Install New -> PyPI -> `spark-nlp==5.5.0-rc1` -> Install
807
807
 
808
- 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2` -> Install
808
+ 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1` -> Install
809
809
 
810
810
  4. Now you can attach your notebook to the cluster and use Spark NLP!
811
811
 
@@ -856,7 +856,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
856
856
  "spark.kryoserializer.buffer.max": "2000M",
857
857
  "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
858
858
  "spark.driver.maxResultSize": "0",
859
- "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2"
859
+ "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1"
860
860
  }
861
861
  }]
862
862
  ```
@@ -865,7 +865,7 @@ A sample of AWS CLI to launch EMR cluster:
865
865
 
866
866
  ```.sh
867
867
  aws emr create-cluster \
868
- --name "Spark NLP 5.4.2" \
868
+ --name "Spark NLP 5.5.0-rc1" \
869
869
  --release-label emr-6.2.0 \
870
870
  --applications Name=Hadoop Name=Spark Name=Hive \
871
871
  --instance-type m4.4xlarge \
@@ -929,7 +929,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
929
929
  --enable-component-gateway \
930
930
  --metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
931
931
  --initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
932
- --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
932
+ --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
933
933
  ```
934
934
 
935
935
  2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
@@ -972,7 +972,7 @@ spark = SparkSession.builder
972
972
  .config("spark.kryoserializer.buffer.max", "2000m")
973
973
  .config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
974
974
  .config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
975
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2")
975
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
976
976
  .getOrCreate()
977
977
  ```
978
978
 
@@ -986,7 +986,7 @@ spark-shell \
986
986
  --conf spark.kryoserializer.buffer.max=2000M \
987
987
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
988
988
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
989
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
989
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
990
990
  ```
991
991
 
992
992
  **pyspark:**
@@ -999,7 +999,7 @@ pyspark \
999
999
  --conf spark.kryoserializer.buffer.max=2000M \
1000
1000
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
1001
1001
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
1002
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
1002
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
1003
1003
  ```
1004
1004
 
1005
1005
  **Databricks:**
@@ -1271,7 +1271,7 @@ spark = SparkSession.builder
1271
1271
  .config("spark.driver.memory", "16G")
1272
1272
  .config("spark.driver.maxResultSize", "0")
1273
1273
  .config("spark.kryoserializer.buffer.max", "2000M")
1274
- .config("spark.jars", "/tmp/spark-nlp-assembly-5.4.2.jar")
1274
+ .config("spark.jars", "/tmp/spark-nlp-assembly-5.5.0-rc1.jar")
1275
1275
  .getOrCreate()
1276
1276
  ```
1277
1277
 
@@ -1280,7 +1280,7 @@ spark = SparkSession.builder
1280
1280
  version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
1281
1281
  - If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
1282
1282
  to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
1283
- i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.2.jar`)
1283
+ i.e., `hdfs:///tmp/spark-nlp-assembly-5.5.0-rc1.jar`)
1284
1284
 
1285
1285
  Example of using pretrained Models and Pipelines in offline:
1286
1286
 
@@ -166,7 +166,7 @@ To use Spark NLP you need the following requirements:
166
166
 
167
167
  **GPU (optional):**
168
168
 
169
- Spark NLP 5.4.2 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
169
+ Spark NLP 5.5.0-rc1 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
170
170
 
171
171
  - NVIDIA® GPU drivers version 450.80.02 or higher
172
172
  - CUDA® Toolkit 11.2
@@ -182,7 +182,7 @@ $ java -version
182
182
  $ conda create -n sparknlp python=3.7 -y
183
183
  $ conda activate sparknlp
184
184
  # spark-nlp by default is based on pyspark 3.x
185
- $ pip install spark-nlp==5.4.2 pyspark==3.3.1
185
+ $ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1
186
186
  ```
187
187
 
188
188
  In Python console or Jupyter `Python3` kernel:
@@ -227,7 +227,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
227
227
 
228
228
  ## Apache Spark Support
229
229
 
230
- Spark NLP *5.4.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
230
+ Spark NLP *5.5.0-rc1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
231
231
 
232
232
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
233
233
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -260,7 +260,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
260
260
 
261
261
  ## Databricks Support
262
262
 
263
- Spark NLP 5.4.2 has been tested and is compatible with the following runtimes:
263
+ Spark NLP 5.5.0-rc1 has been tested and is compatible with the following runtimes:
264
264
 
265
265
  **CPU:**
266
266
 
@@ -333,7 +333,7 @@ Spark NLP 5.4.2 has been tested and is compatible with the following runtimes:
333
333
 
334
334
  ## EMR Support
335
335
 
336
- Spark NLP 5.4.2 has been tested and is compatible with the following EMR releases:
336
+ Spark NLP 5.5.0-rc1 has been tested and is compatible with the following EMR releases:
337
337
 
338
338
  - emr-6.2.0
339
339
  - emr-6.3.0
@@ -383,11 +383,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
383
383
  ```sh
384
384
  # CPU
385
385
 
386
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
386
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
387
387
 
388
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
388
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
389
389
 
390
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
390
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
391
391
  ```
392
392
 
393
393
  The `spark-nlp` has been published to
@@ -396,11 +396,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
396
396
  ```sh
397
397
  # GPU
398
398
 
399
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.2
399
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
400
400
 
401
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.2
401
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
402
402
 
403
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.2
403
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
404
404
 
405
405
  ```
406
406
 
@@ -410,11 +410,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
410
410
  ```sh
411
411
  # AArch64
412
412
 
413
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.2
413
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
414
414
 
415
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.2
415
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
416
416
 
417
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.2
417
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
418
418
 
419
419
  ```
420
420
 
@@ -424,11 +424,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
424
424
  ```sh
425
425
  # M1/M2 (Apple Silicon)
426
426
 
427
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.2
427
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
428
428
 
429
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.2
429
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
430
430
 
431
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.2
431
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
432
432
 
433
433
  ```
434
434
 
@@ -442,7 +442,7 @@ set in your SparkSession:
442
442
  spark-shell \
443
443
  --driver-memory 16g \
444
444
  --conf spark.kryoserializer.buffer.max=2000M \
445
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
445
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
446
446
  ```
447
447
 
448
448
  ## Scala
@@ -460,7 +460,7 @@ coordinates:
460
460
  <dependency>
461
461
  <groupId>com.johnsnowlabs.nlp</groupId>
462
462
  <artifactId>spark-nlp_2.12</artifactId>
463
- <version>5.4.2</version>
463
+ <version>5.5.0-rc1</version>
464
464
  </dependency>
465
465
  ```
466
466
 
@@ -471,7 +471,7 @@ coordinates:
471
471
  <dependency>
472
472
  <groupId>com.johnsnowlabs.nlp</groupId>
473
473
  <artifactId>spark-nlp-gpu_2.12</artifactId>
474
- <version>5.4.2</version>
474
+ <version>5.5.0-rc1</version>
475
475
  </dependency>
476
476
  ```
477
477
 
@@ -482,7 +482,7 @@ coordinates:
482
482
  <dependency>
483
483
  <groupId>com.johnsnowlabs.nlp</groupId>
484
484
  <artifactId>spark-nlp-aarch64_2.12</artifactId>
485
- <version>5.4.2</version>
485
+ <version>5.5.0-rc1</version>
486
486
  </dependency>
487
487
  ```
488
488
 
@@ -493,7 +493,7 @@ coordinates:
493
493
  <dependency>
494
494
  <groupId>com.johnsnowlabs.nlp</groupId>
495
495
  <artifactId>spark-nlp-silicon_2.12</artifactId>
496
- <version>5.4.2</version>
496
+ <version>5.5.0-rc1</version>
497
497
  </dependency>
498
498
  ```
499
499
 
@@ -503,28 +503,28 @@ coordinates:
503
503
 
504
504
  ```sbtshell
505
505
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
506
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.2"
506
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.5.0-rc1"
507
507
  ```
508
508
 
509
509
  **spark-nlp-gpu:**
510
510
 
511
511
  ```sbtshell
512
512
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
513
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.2"
513
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.5.0-rc1"
514
514
  ```
515
515
 
516
516
  **spark-nlp-aarch64:**
517
517
 
518
518
  ```sbtshell
519
519
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
520
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.2"
520
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.5.0-rc1"
521
521
  ```
522
522
 
523
523
  **spark-nlp-silicon:**
524
524
 
525
525
  ```sbtshell
526
526
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
527
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.2"
527
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.5.0-rc1"
528
528
  ```
529
529
 
530
530
  Maven
@@ -546,7 +546,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
546
546
  Pip:
547
547
 
548
548
  ```bash
549
- pip install spark-nlp==5.4.2
549
+ pip install spark-nlp==5.5.0-rc1
550
550
  ```
551
551
 
552
552
  Conda:
@@ -575,7 +575,7 @@ spark = SparkSession.builder
575
575
  .config("spark.driver.memory", "16G")
576
576
  .config("spark.driver.maxResultSize", "0")
577
577
  .config("spark.kryoserializer.buffer.max", "2000M")
578
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2")
578
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
579
579
  .getOrCreate()
580
580
  ```
581
581
 
@@ -646,7 +646,7 @@ Use either one of the following options
646
646
  - Add the following Maven Coordinates to the interpreter's library list
647
647
 
648
648
  ```bash
649
- com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
649
+ com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
650
650
  ```
651
651
 
652
652
  - Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
@@ -657,7 +657,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
657
657
  Apart from the previous step, install the python module through pip
658
658
 
659
659
  ```bash
660
- pip install spark-nlp==5.4.2
660
+ pip install spark-nlp==5.5.0-rc1
661
661
  ```
662
662
 
663
663
  Or you can install `spark-nlp` from inside Zeppelin by using Conda:
@@ -685,7 +685,7 @@ launch the Jupyter from the same Python environment:
685
685
  $ conda create -n sparknlp python=3.8 -y
686
686
  $ conda activate sparknlp
687
687
  # spark-nlp by default is based on pyspark 3.x
688
- $ pip install spark-nlp==5.4.2 pyspark==3.3.1 jupyter
688
+ $ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1 jupyter
689
689
  $ jupyter notebook
690
690
  ```
691
691
 
@@ -702,7 +702,7 @@ export PYSPARK_PYTHON=python3
702
702
  export PYSPARK_DRIVER_PYTHON=jupyter
703
703
  export PYSPARK_DRIVER_PYTHON_OPTS=notebook
704
704
 
705
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
705
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
706
706
  ```
707
707
 
708
708
  Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
@@ -729,7 +729,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
729
729
  # -s is for spark-nlp
730
730
  # -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
731
731
  # by default they are set to the latest
732
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.2
732
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
733
733
  ```
734
734
 
735
735
  [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
@@ -752,7 +752,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
752
752
  # -s is for spark-nlp
753
753
  # -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
754
754
  # by default they are set to the latest
755
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.2
755
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
756
756
  ```
757
757
 
758
758
  [Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
@@ -771,9 +771,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
771
771
 
772
772
  3. In `Libraries` tab inside your cluster you need to follow these steps:
773
773
 
774
- 3.1. Install New -> PyPI -> `spark-nlp==5.4.2` -> Install
774
+ 3.1. Install New -> PyPI -> `spark-nlp==5.5.0-rc1` -> Install
775
775
 
776
- 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2` -> Install
776
+ 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1` -> Install
777
777
 
778
778
  4. Now you can attach your notebook to the cluster and use Spark NLP!
779
779
 
@@ -824,7 +824,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
824
824
  "spark.kryoserializer.buffer.max": "2000M",
825
825
  "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
826
826
  "spark.driver.maxResultSize": "0",
827
- "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2"
827
+ "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1"
828
828
  }
829
829
  }]
830
830
  ```
@@ -833,7 +833,7 @@ A sample of AWS CLI to launch EMR cluster:
833
833
 
834
834
  ```.sh
835
835
  aws emr create-cluster \
836
- --name "Spark NLP 5.4.2" \
836
+ --name "Spark NLP 5.5.0-rc1" \
837
837
  --release-label emr-6.2.0 \
838
838
  --applications Name=Hadoop Name=Spark Name=Hive \
839
839
  --instance-type m4.4xlarge \
@@ -897,7 +897,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
897
897
  --enable-component-gateway \
898
898
  --metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
899
899
  --initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
900
- --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
900
+ --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
901
901
  ```
902
902
 
903
903
  2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
@@ -940,7 +940,7 @@ spark = SparkSession.builder
940
940
  .config("spark.kryoserializer.buffer.max", "2000m")
941
941
  .config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
942
942
  .config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
943
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2")
943
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
944
944
  .getOrCreate()
945
945
  ```
946
946
 
@@ -954,7 +954,7 @@ spark-shell \
954
954
  --conf spark.kryoserializer.buffer.max=2000M \
955
955
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
956
956
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
957
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
957
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
958
958
  ```
959
959
 
960
960
  **pyspark:**
@@ -967,7 +967,7 @@ pyspark \
967
967
  --conf spark.kryoserializer.buffer.max=2000M \
968
968
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
969
969
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
970
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
970
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
971
971
  ```
972
972
 
973
973
  **Databricks:**
@@ -1239,7 +1239,7 @@ spark = SparkSession.builder
1239
1239
  .config("spark.driver.memory", "16G")
1240
1240
  .config("spark.driver.maxResultSize", "0")
1241
1241
  .config("spark.kryoserializer.buffer.max", "2000M")
1242
- .config("spark.jars", "/tmp/spark-nlp-assembly-5.4.2.jar")
1242
+ .config("spark.jars", "/tmp/spark-nlp-assembly-5.5.0-rc1.jar")
1243
1243
  .getOrCreate()
1244
1244
  ```
1245
1245
 
@@ -1248,7 +1248,7 @@ spark = SparkSession.builder
1248
1248
  version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
1249
1249
  - If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
1250
1250
  to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
1251
- i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.2.jar`)
1251
+ i.e., `hdfs:///tmp/spark-nlp-assembly-5.5.0-rc1.jar`)
1252
1252
 
1253
1253
  Example of using pretrained Models and Pipelines in offline:
1254
1254