spark-nlp 5.4.0__tar.gz → 5.4.0rc1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (241) hide show
  1. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/PKG-INFO +60 -50
  2. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/README.md +59 -49
  3. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/setup.py +1 -1
  4. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/PKG-INFO +60 -50
  5. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/SOURCES.txt +0 -5
  6. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/__init__.py +2 -3
  7. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/__init__.py +0 -1
  8. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +3 -3
  9. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -2
  10. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -2
  11. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -2
  12. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/openai/openai_embeddings.py +69 -43
  13. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/__init__.py +0 -2
  14. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/m2m100_transformer.py +2 -2
  15. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/__init__.py +142 -428
  16. spark-nlp-5.4.0/com/johnsnowlabs/ml/ai/__init__.py +0 -10
  17. spark-nlp-5.4.0/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -173
  18. spark-nlp-5.4.0/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -349
  19. spark-nlp-5.4.0/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -326
  20. spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  21. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/com/__init__.py +0 -0
  22. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/com/johnsnowlabs/__init__.py +0 -0
  23. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/com/johnsnowlabs/nlp/__init__.py +0 -0
  24. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/setup.cfg +0 -0
  25. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/.uuid +0 -0
  26. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/dependency_links.txt +0 -0
  27. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/top_level.txt +0 -0
  28. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotation.py +0 -0
  29. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotation_audio.py +0 -0
  30. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotation_image.py +0 -0
  31. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/__init__.py +0 -0
  32. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/__init__.py +0 -0
  33. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  34. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  35. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  36. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/chunk2_doc.py +0 -0
  37. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/chunker.py +0 -0
  38. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  39. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  40. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  41. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  42. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  43. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  44. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  45. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  46. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  47. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  48. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  49. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  50. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  51. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  52. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  53. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
  54. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  55. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  56. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  57. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  58. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  59. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  60. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  61. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  62. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  63. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  64. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  65. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  66. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  67. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  68. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  69. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  70. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  71. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  72. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  73. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  74. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  75. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/coref/__init__.py +0 -0
  76. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  77. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/__init__.py +0 -0
  78. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  79. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  80. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  81. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  82. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  83. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/date2_chunk.py +0 -0
  84. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/dependency/__init__.py +0 -0
  85. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  86. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  87. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  88. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_normalizer.py +0 -0
  89. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_token_splitter.py +0 -0
  90. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  91. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/__init__.py +0 -0
  92. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  93. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  94. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  95. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  96. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  97. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  98. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  99. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  100. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  101. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  102. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  103. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  104. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  105. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  106. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  107. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  108. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  109. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  110. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  111. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  112. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  113. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/er/__init__.py +0 -0
  114. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/er/entity_ruler.py +0 -0
  115. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/graph_extraction.py +0 -0
  116. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  117. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  118. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  119. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  120. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/lemmatizer.py +0 -0
  121. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/__init__.py +0 -0
  122. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  123. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  124. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  125. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  126. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  127. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/n_gram_generator.py +0 -0
  128. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/__init__.py +0 -0
  129. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_approach.py +0 -0
  130. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_converter.py +0 -0
  131. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_crf.py +0 -0
  132. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_dl.py +0 -0
  133. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  134. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  135. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/normalizer.py +0 -0
  136. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/openai/__init__.py +0 -0
  137. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/openai/openai_completion.py +0 -0
  138. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/param/__init__.py +0 -0
  139. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  140. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  141. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/pos/__init__.py +0 -0
  142. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/pos/perceptron.py +0 -0
  143. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentence/__init__.py +0 -0
  144. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  145. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  146. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentiment/__init__.py +0 -0
  147. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  148. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  149. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  150. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  151. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
  152. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  153. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  154. {spark-nlp-5.4.0/com/johnsnowlabs/ml → spark-nlp-5.4.0rc1/sparknlp/annotator/similarity}/__init__.py +0 -0
  155. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  156. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/__init__.py +0 -0
  157. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  158. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  159. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  160. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/stemmer.py +0 -0
  161. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  162. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  163. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/__init__.py +0 -0
  164. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  165. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  166. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  167. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/tokenizer.py +0 -0
  168. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token2_chunk.py +0 -0
  169. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ws/__init__.py +0 -0
  170. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  171. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/__init__.py +0 -0
  172. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/audio_assembler.py +0 -0
  173. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/doc2_chunk.py +0 -0
  174. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/document_assembler.py +0 -0
  175. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/embeddings_finisher.py +0 -0
  176. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/finisher.py +0 -0
  177. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/graph_finisher.py +0 -0
  178. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/has_recursive_fit.py +0 -0
  179. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/has_recursive_transform.py +0 -0
  180. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/image_assembler.py +0 -0
  181. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/light_pipeline.py +0 -0
  182. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/multi_document_assembler.py +0 -0
  183. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/recursive_pipeline.py +0 -0
  184. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/table_assembler.py +0 -0
  185. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/token_assembler.py +0 -0
  186. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/__init__.py +0 -0
  187. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_approach.py +0 -0
  188. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_model.py +0 -0
  189. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_properties.py +0 -0
  190. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_type.py +0 -0
  191. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/coverage_result.py +0 -0
  192. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/match_strategy.py +0 -0
  193. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/properties.py +0 -0
  194. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/read_as.py +0 -0
  195. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/recursive_annotator_approach.py +0 -0
  196. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/storage.py +0 -0
  197. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/utils.py +0 -0
  198. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/functions.py +0 -0
  199. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/annotator_java_ml.py +0 -0
  200. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/annotator_transformer.py +0 -0
  201. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/extended_java_wrapper.py +0 -0
  202. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/params_getters_setters.py +0 -0
  203. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/recursive.py +0 -0
  204. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/logging/__init__.py +0 -0
  205. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/logging/comet.py +0 -0
  206. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/__init__.py +0 -0
  207. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  208. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/resource_downloader.py +0 -0
  209. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/utils.py +0 -0
  210. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/__init__.py +0 -0
  211. {spark-nlp-5.4.0/sparknlp/annotator/similarity → spark-nlp-5.4.0rc1/sparknlp/training/_tf_graph_builders}/__init__.py +0 -0
  212. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  213. {spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders → spark-nlp-5.4.0rc1/sparknlp/training/_tf_graph_builders/ner_dl}/__init__.py +0 -0
  214. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  215. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  216. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  217. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  218. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  219. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  220. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  221. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  222. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  223. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  224. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  225. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  226. {spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders/ner_dl → spark-nlp-5.4.0rc1/sparknlp/training/_tf_graph_builders_1x}/__init__.py +0 -0
  227. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  228. {spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders_1x → spark-nlp-5.4.0rc1/sparknlp/training/_tf_graph_builders_1x/ner_dl}/__init__.py +0 -0
  229. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  230. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  231. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  232. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  233. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  234. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/conll.py +0 -0
  235. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/conllu.py +0 -0
  236. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/pos.py +0 -0
  237. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/pub_tator.py +0 -0
  238. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/spacy_to_annotation.py +0 -0
  239. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/tfgraphs.py +0 -0
  240. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/upload_to_hub.py +0 -0
  241. {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 5.4.0
3
+ Version: 5.4.0rc1
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -146,7 +146,6 @@ documentation and examples
146
146
  - INSTRUCTOR Embeddings (HuggingFace models)
147
147
  - E5 Embeddings (HuggingFace models)
148
148
  - MPNet Embeddings (HuggingFace models)
149
- - UAE Embeddings (HuggingFace models)
150
149
  - OpenAI Embeddings
151
150
  - Sentence & Chunk Embeddings
152
151
  - Unsupervised keywords extraction
@@ -171,7 +170,7 @@ documentation and examples
171
170
  - Text-To-Text Transfer Transformer (Google T5)
172
171
  - Generative Pre-trained Transformer 2 (OpenAI GPT2)
173
172
  - Seq2Seq for NLG, Translation, and Comprehension (Facebook BART)
174
- - Chat and Conversational LLMs (Facebook Llama-2)
173
+ - Chat and Conversational LLMs (Facebook Llama-22)
175
174
  - Vision Transformer (Google ViT)
176
175
  - Swin Image Classification (Microsoft Swin Transformer)
177
176
  - ConvNext Image Classification (Facebook ConvNext)
@@ -181,10 +180,10 @@ documentation and examples
181
180
  - Automatic Speech Recognition (HuBERT)
182
181
  - Automatic Speech Recognition (OpenAI Whisper)
183
182
  - Named entity recognition (Deep learning)
184
- - Easy ONNX, OpenVINO, and TensorFlow integrations
183
+ - Easy ONNX and TensorFlow integrations
185
184
  - GPU Support
186
185
  - Full integration with Spark ML functions
187
- - +31000 pre-trained models in +200 languages!
186
+ - +30000 pre-trained models in +200 languages!
188
187
  - +6000 pre-trained pipelines in +200 languages!
189
188
  - Multi-lingual NER models: Arabic, Bengali, Chinese, Danish, Dutch, English, Finnish, French, German, Hebrew, Italian,
190
189
  Japanese, Korean, Norwegian, Persian, Polish, Portuguese, Russian, Spanish, Swedish, Urdu, and more.
@@ -198,7 +197,7 @@ To use Spark NLP you need the following requirements:
198
197
 
199
198
  **GPU (optional):**
200
199
 
201
- Spark NLP 5.4.0 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
200
+ Spark NLP 5.4.0-rc1 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
202
201
 
203
202
  - NVIDIA® GPU drivers version 450.80.02 or higher
204
203
  - CUDA® Toolkit 11.2
@@ -214,7 +213,7 @@ $ java -version
214
213
  $ conda create -n sparknlp python=3.7 -y
215
214
  $ conda activate sparknlp
216
215
  # spark-nlp by default is based on pyspark 3.x
217
- $ pip install spark-nlp==5.4.0 pyspark==3.3.1
216
+ $ pip install spark-nlp==5.4.0-rc1 pyspark==3.3.1
218
217
  ```
219
218
 
220
219
  In Python console or Jupyter `Python3` kernel:
@@ -259,11 +258,10 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
259
258
 
260
259
  ## Apache Spark Support
261
260
 
262
- Spark NLP *5.4.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
261
+ Spark NLP *5.4.0-rc1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
263
262
 
264
263
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
265
264
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
266
- | 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
267
265
  | 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
268
266
  | 5.2.x | YES | YES | YES | YES | YES | YES | NO | NO |
269
267
  | 5.1.x | Partially | YES | YES | YES | YES | YES | NO | NO |
@@ -273,6 +271,12 @@ Spark NLP *5.4.0* has been built on top of Apache Spark 3.4 while fully supports
273
271
  | 4.2.x | NO | NO | YES | YES | YES | YES | NO | NO |
274
272
  | 4.1.x | NO | NO | YES | YES | YES | YES | NO | NO |
275
273
  | 4.0.x | NO | NO | YES | YES | YES | YES | NO | NO |
274
+ | 3.4.x | NO | NO | N/A | Partially | YES | YES | YES | YES |
275
+ | 3.3.x | NO | NO | NO | NO | YES | YES | YES | YES |
276
+ | 3.2.x | NO | NO | NO | NO | YES | YES | YES | YES |
277
+ | 3.1.x | NO | NO | NO | NO | YES | YES | YES | YES |
278
+ | 3.0.x | NO | NO | NO | NO | YES | YES | YES | YES |
279
+ | 2.7.x | NO | NO | NO | NO | NO | NO | YES | YES |
276
280
 
277
281
  Find out more about `Spark NLP` versions from our [release notes](https://github.com/JohnSnowLabs/spark-nlp/releases).
278
282
 
@@ -289,10 +293,16 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
289
293
  | 4.2.x | YES | YES | YES | YES | YES | NO | YES |
290
294
  | 4.1.x | YES | YES | YES | YES | NO | NO | YES |
291
295
  | 4.0.x | YES | YES | YES | YES | NO | NO | YES |
296
+ | 3.4.x | YES | YES | YES | YES | NO | YES | YES |
297
+ | 3.3.x | YES | YES | YES | NO | NO | YES | YES |
298
+ | 3.2.x | YES | YES | YES | NO | NO | YES | YES |
299
+ | 3.1.x | YES | YES | YES | NO | NO | YES | YES |
300
+ | 3.0.x | YES | YES | YES | NO | NO | YES | YES |
301
+ | 2.7.x | YES | YES | NO | NO | NO | YES | NO |
292
302
 
293
303
  ## Databricks Support
294
304
 
295
- Spark NLP 5.4.0 has been tested and is compatible with the following runtimes:
305
+ Spark NLP 5.4.0-rc1 has been tested and is compatible with the following runtimes:
296
306
 
297
307
  **CPU:**
298
308
 
@@ -365,7 +375,7 @@ Spark NLP 5.4.0 has been tested and is compatible with the following runtimes:
365
375
 
366
376
  ## EMR Support
367
377
 
368
- Spark NLP 5.4.0 has been tested and is compatible with the following EMR releases:
378
+ Spark NLP 5.4.0-rc1 has been tested and is compatible with the following EMR releases:
369
379
 
370
380
  - emr-6.2.0
371
381
  - emr-6.3.0
@@ -415,11 +425,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
415
425
  ```sh
416
426
  # CPU
417
427
 
418
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
428
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
419
429
 
420
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
430
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
421
431
 
422
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
432
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
423
433
  ```
424
434
 
425
435
  The `spark-nlp` has been published to
@@ -428,11 +438,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
428
438
  ```sh
429
439
  # GPU
430
440
 
431
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
441
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
432
442
 
433
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
443
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
434
444
 
435
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
445
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
436
446
 
437
447
  ```
438
448
 
@@ -442,11 +452,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
442
452
  ```sh
443
453
  # AArch64
444
454
 
445
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
455
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
446
456
 
447
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
457
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
448
458
 
449
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
459
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
450
460
 
451
461
  ```
452
462
 
@@ -456,11 +466,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
456
466
  ```sh
457
467
  # M1/M2 (Apple Silicon)
458
468
 
459
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
469
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
460
470
 
461
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
471
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
462
472
 
463
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
473
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
464
474
 
465
475
  ```
466
476
 
@@ -474,7 +484,7 @@ set in your SparkSession:
474
484
  spark-shell \
475
485
  --driver-memory 16g \
476
486
  --conf spark.kryoserializer.buffer.max=2000M \
477
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
487
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
478
488
  ```
479
489
 
480
490
  ## Scala
@@ -492,7 +502,7 @@ coordinates:
492
502
  <dependency>
493
503
  <groupId>com.johnsnowlabs.nlp</groupId>
494
504
  <artifactId>spark-nlp_2.12</artifactId>
495
- <version>5.4.0</version>
505
+ <version>5.4.0-rc1</version>
496
506
  </dependency>
497
507
  ```
498
508
 
@@ -503,7 +513,7 @@ coordinates:
503
513
  <dependency>
504
514
  <groupId>com.johnsnowlabs.nlp</groupId>
505
515
  <artifactId>spark-nlp-gpu_2.12</artifactId>
506
- <version>5.4.0</version>
516
+ <version>5.4.0-rc1</version>
507
517
  </dependency>
508
518
  ```
509
519
 
@@ -514,7 +524,7 @@ coordinates:
514
524
  <dependency>
515
525
  <groupId>com.johnsnowlabs.nlp</groupId>
516
526
  <artifactId>spark-nlp-aarch64_2.12</artifactId>
517
- <version>5.4.0</version>
527
+ <version>5.4.0-rc1</version>
518
528
  </dependency>
519
529
  ```
520
530
 
@@ -525,7 +535,7 @@ coordinates:
525
535
  <dependency>
526
536
  <groupId>com.johnsnowlabs.nlp</groupId>
527
537
  <artifactId>spark-nlp-silicon_2.12</artifactId>
528
- <version>5.4.0</version>
538
+ <version>5.4.0-rc1</version>
529
539
  </dependency>
530
540
  ```
531
541
 
@@ -535,28 +545,28 @@ coordinates:
535
545
 
536
546
  ```sbtshell
537
547
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
538
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.0"
548
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.0-rc1"
539
549
  ```
540
550
 
541
551
  **spark-nlp-gpu:**
542
552
 
543
553
  ```sbtshell
544
554
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
545
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.0"
555
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.0-rc1"
546
556
  ```
547
557
 
548
558
  **spark-nlp-aarch64:**
549
559
 
550
560
  ```sbtshell
551
561
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
552
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.0"
562
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.0-rc1"
553
563
  ```
554
564
 
555
565
  **spark-nlp-silicon:**
556
566
 
557
567
  ```sbtshell
558
568
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
559
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.0"
569
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.0-rc1"
560
570
  ```
561
571
 
562
572
  Maven
@@ -578,7 +588,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
578
588
  Pip:
579
589
 
580
590
  ```bash
581
- pip install spark-nlp==5.4.0
591
+ pip install spark-nlp==5.4.0-rc1
582
592
  ```
583
593
 
584
594
  Conda:
@@ -607,7 +617,7 @@ spark = SparkSession.builder
607
617
  .config("spark.driver.memory", "16G")
608
618
  .config("spark.driver.maxResultSize", "0")
609
619
  .config("spark.kryoserializer.buffer.max", "2000M")
610
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
620
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1")
611
621
  .getOrCreate()
612
622
  ```
613
623
 
@@ -678,7 +688,7 @@ Use either one of the following options
678
688
  - Add the following Maven Coordinates to the interpreter's library list
679
689
 
680
690
  ```bash
681
- com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
691
+ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
682
692
  ```
683
693
 
684
694
  - Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
@@ -689,7 +699,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
689
699
  Apart from the previous step, install the python module through pip
690
700
 
691
701
  ```bash
692
- pip install spark-nlp==5.4.0
702
+ pip install spark-nlp==5.4.0-rc1
693
703
  ```
694
704
 
695
705
  Or you can install `spark-nlp` from inside Zeppelin by using Conda:
@@ -717,7 +727,7 @@ launch the Jupyter from the same Python environment:
717
727
  $ conda create -n sparknlp python=3.8 -y
718
728
  $ conda activate sparknlp
719
729
  # spark-nlp by default is based on pyspark 3.x
720
- $ pip install spark-nlp==5.4.0 pyspark==3.3.1 jupyter
730
+ $ pip install spark-nlp==5.4.0-rc1 pyspark==3.3.1 jupyter
721
731
  $ jupyter notebook
722
732
  ```
723
733
 
@@ -734,7 +744,7 @@ export PYSPARK_PYTHON=python3
734
744
  export PYSPARK_DRIVER_PYTHON=jupyter
735
745
  export PYSPARK_DRIVER_PYTHON_OPTS=notebook
736
746
 
737
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
747
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
738
748
  ```
739
749
 
740
750
  Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
@@ -761,7 +771,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
761
771
  # -s is for spark-nlp
762
772
  # -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
763
773
  # by default they are set to the latest
764
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
774
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0-rc1
765
775
  ```
766
776
 
767
777
  [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
@@ -784,7 +794,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
784
794
  # -s is for spark-nlp
785
795
  # -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
786
796
  # by default they are set to the latest
787
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
797
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0-rc1
788
798
  ```
789
799
 
790
800
  [Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
@@ -803,9 +813,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
803
813
 
804
814
  3. In `Libraries` tab inside your cluster you need to follow these steps:
805
815
 
806
- 3.1. Install New -> PyPI -> `spark-nlp==5.4.0` -> Install
816
+ 3.1. Install New -> PyPI -> `spark-nlp==5.4.0-rc1` -> Install
807
817
 
808
- 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0` -> Install
818
+ 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1` -> Install
809
819
 
810
820
  4. Now you can attach your notebook to the cluster and use Spark NLP!
811
821
 
@@ -856,7 +866,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
856
866
  "spark.kryoserializer.buffer.max": "2000M",
857
867
  "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
858
868
  "spark.driver.maxResultSize": "0",
859
- "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0"
869
+ "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1"
860
870
  }
861
871
  }]
862
872
  ```
@@ -865,7 +875,7 @@ A sample of AWS CLI to launch EMR cluster:
865
875
 
866
876
  ```.sh
867
877
  aws emr create-cluster \
868
- --name "Spark NLP 5.4.0" \
878
+ --name "Spark NLP 5.4.0-rc1" \
869
879
  --release-label emr-6.2.0 \
870
880
  --applications Name=Hadoop Name=Spark Name=Hive \
871
881
  --instance-type m4.4xlarge \
@@ -929,7 +939,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
929
939
  --enable-component-gateway \
930
940
  --metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
931
941
  --initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
932
- --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
942
+ --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
933
943
  ```
934
944
 
935
945
  2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
@@ -972,7 +982,7 @@ spark = SparkSession.builder
972
982
  .config("spark.kryoserializer.buffer.max", "2000m")
973
983
  .config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
974
984
  .config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
975
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
985
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1")
976
986
  .getOrCreate()
977
987
  ```
978
988
 
@@ -986,7 +996,7 @@ spark-shell \
986
996
  --conf spark.kryoserializer.buffer.max=2000M \
987
997
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
988
998
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
989
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
999
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
990
1000
  ```
991
1001
 
992
1002
  **pyspark:**
@@ -999,7 +1009,7 @@ pyspark \
999
1009
  --conf spark.kryoserializer.buffer.max=2000M \
1000
1010
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
1001
1011
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
1002
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
1012
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
1003
1013
  ```
1004
1014
 
1005
1015
  **Databricks:**
@@ -1271,7 +1281,7 @@ spark = SparkSession.builder
1271
1281
  .config("spark.driver.memory", "16G")
1272
1282
  .config("spark.driver.maxResultSize", "0")
1273
1283
  .config("spark.kryoserializer.buffer.max", "2000M")
1274
- .config("spark.jars", "/tmp/spark-nlp-assembly-5.4.0.jar")
1284
+ .config("spark.jars", "/tmp/spark-nlp-assembly-5.4.0-rc1.jar")
1275
1285
  .getOrCreate()
1276
1286
  ```
1277
1287
 
@@ -1280,7 +1290,7 @@ spark = SparkSession.builder
1280
1290
  version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
1281
1291
  - If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
1282
1292
  to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
1283
- i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.0.jar`)
1293
+ i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.0-rc1.jar`)
1284
1294
 
1285
1295
  Example of using pretrained Models and Pipelines in offline:
1286
1296