spark-nlp 5.3.3__tar.gz → 5.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (241) hide show
  1. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/PKG-INFO +50 -60
  2. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/README.md +49 -59
  3. spark-nlp-5.4.0/com/johnsnowlabs/ml/ai/__init__.py +10 -0
  4. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/setup.py +1 -1
  5. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/PKG-INFO +50 -60
  6. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/SOURCES.txt +5 -0
  7. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/__init__.py +3 -2
  8. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/__init__.py +1 -0
  9. spark-nlp-5.4.0/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  10. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +3 -3
  11. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/bert_embeddings.py +4 -2
  12. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +4 -2
  13. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/bge_embeddings.py +2 -0
  14. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/e5_embeddings.py +6 -2
  15. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/mpnet_embeddings.py +2 -0
  16. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/roberta_embeddings.py +4 -2
  17. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +4 -2
  18. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/openai/openai_embeddings.py +43 -69
  19. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/__init__.py +2 -0
  20. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/llama2_transformer.py +2 -2
  21. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/m2m100_transformer.py +2 -2
  22. spark-nlp-5.4.0/sparknlp/annotator/seq2seq/mistral_transformer.py +349 -0
  23. spark-nlp-5.4.0/sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  24. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/__init__.py +434 -148
  25. spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  26. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/com/__init__.py +0 -0
  27. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/com/johnsnowlabs/__init__.py +0 -0
  28. {spark-nlp-5.3.3/sparknlp/annotator/similarity → spark-nlp-5.4.0/com/johnsnowlabs/ml}/__init__.py +0 -0
  29. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/com/johnsnowlabs/nlp/__init__.py +0 -0
  30. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/setup.cfg +0 -0
  31. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/.uuid +0 -0
  32. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/dependency_links.txt +0 -0
  33. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/top_level.txt +0 -0
  34. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotation.py +0 -0
  35. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotation_audio.py +0 -0
  36. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotation_image.py +0 -0
  37. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/__init__.py +0 -0
  38. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/audio/__init__.py +0 -0
  39. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  40. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  41. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  42. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/chunk2_doc.py +0 -0
  43. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/chunker.py +0 -0
  44. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  45. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  46. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  47. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  48. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  49. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  50. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  51. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  52. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  53. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  54. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  55. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  56. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  57. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  58. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  59. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
  60. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  61. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  62. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  63. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  64. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  65. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  66. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  67. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  68. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  69. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  70. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  71. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  72. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  73. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  74. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  75. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  76. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  77. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  78. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  79. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  80. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  81. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/coref/__init__.py +0 -0
  82. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  83. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/__init__.py +0 -0
  84. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  85. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  86. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  87. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  88. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  89. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/date2_chunk.py +0 -0
  90. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/dependency/__init__.py +0 -0
  91. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  92. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  93. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  94. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/document_normalizer.py +0 -0
  95. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/document_token_splitter.py +0 -0
  96. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  97. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/__init__.py +0 -0
  98. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  99. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  100. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  101. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  102. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  103. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  104. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  105. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  106. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  107. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  108. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  109. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  110. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  111. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  112. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  113. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  114. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  115. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/er/__init__.py +0 -0
  116. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/er/entity_ruler.py +0 -0
  117. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/graph_extraction.py +0 -0
  118. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  119. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  120. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  121. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  122. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/lemmatizer.py +0 -0
  123. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/__init__.py +0 -0
  124. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  125. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  126. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  127. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  128. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  129. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/n_gram_generator.py +0 -0
  130. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/__init__.py +0 -0
  131. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_approach.py +0 -0
  132. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_converter.py +0 -0
  133. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_crf.py +0 -0
  134. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_dl.py +0 -0
  135. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  136. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  137. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/normalizer.py +0 -0
  138. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/openai/__init__.py +0 -0
  139. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/openai/openai_completion.py +0 -0
  140. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/param/__init__.py +0 -0
  141. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  142. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  143. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/pos/__init__.py +0 -0
  144. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/pos/perceptron.py +0 -0
  145. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentence/__init__.py +0 -0
  146. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  147. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  148. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentiment/__init__.py +0 -0
  149. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  150. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  151. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  152. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  153. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  154. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  155. {spark-nlp-5.3.3/sparknlp/training/_tf_graph_builders → spark-nlp-5.4.0/sparknlp/annotator/similarity}/__init__.py +0 -0
  156. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  157. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/spell_check/__init__.py +0 -0
  158. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  159. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  160. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  161. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/stemmer.py +0 -0
  162. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  163. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  164. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/__init__.py +0 -0
  165. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  166. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  167. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  168. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/tokenizer.py +0 -0
  169. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token2_chunk.py +0 -0
  170. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ws/__init__.py +0 -0
  171. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  172. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/__init__.py +0 -0
  173. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/audio_assembler.py +0 -0
  174. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/doc2_chunk.py +0 -0
  175. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/document_assembler.py +0 -0
  176. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/embeddings_finisher.py +0 -0
  177. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/finisher.py +0 -0
  178. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/graph_finisher.py +0 -0
  179. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/has_recursive_fit.py +0 -0
  180. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/has_recursive_transform.py +0 -0
  181. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/image_assembler.py +0 -0
  182. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/light_pipeline.py +0 -0
  183. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/multi_document_assembler.py +0 -0
  184. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/recursive_pipeline.py +0 -0
  185. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/table_assembler.py +0 -0
  186. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/token_assembler.py +0 -0
  187. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/__init__.py +0 -0
  188. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/annotator_approach.py +0 -0
  189. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/annotator_model.py +0 -0
  190. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/annotator_properties.py +0 -0
  191. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/annotator_type.py +0 -0
  192. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/coverage_result.py +0 -0
  193. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/match_strategy.py +0 -0
  194. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/properties.py +0 -0
  195. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/read_as.py +0 -0
  196. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/recursive_annotator_approach.py +0 -0
  197. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/storage.py +0 -0
  198. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/utils.py +0 -0
  199. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/functions.py +0 -0
  200. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/annotator_java_ml.py +0 -0
  201. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/annotator_transformer.py +0 -0
  202. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/extended_java_wrapper.py +0 -0
  203. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/params_getters_setters.py +0 -0
  204. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/recursive.py +0 -0
  205. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/logging/__init__.py +0 -0
  206. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/logging/comet.py +0 -0
  207. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/pretrained/__init__.py +0 -0
  208. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  209. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/pretrained/resource_downloader.py +0 -0
  210. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/pretrained/utils.py +0 -0
  211. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/__init__.py +0 -0
  212. {spark-nlp-5.3.3/sparknlp/training/_tf_graph_builders/ner_dl → spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders}/__init__.py +0 -0
  213. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  214. {spark-nlp-5.3.3/sparknlp/training/_tf_graph_builders_1x → spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders/ner_dl}/__init__.py +0 -0
  215. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  216. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  217. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  218. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  219. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  220. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  221. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  222. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  223. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  224. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  225. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  226. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  227. {spark-nlp-5.3.3/sparknlp/training/_tf_graph_builders_1x/ner_dl → spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders_1x}/__init__.py +0 -0
  228. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  229. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  230. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  231. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  232. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  233. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  234. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/conll.py +0 -0
  235. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/conllu.py +0 -0
  236. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/pos.py +0 -0
  237. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/pub_tator.py +0 -0
  238. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/spacy_to_annotation.py +0 -0
  239. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/tfgraphs.py +0 -0
  240. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/upload_to_hub.py +0 -0
  241. {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 5.3.3
3
+ Version: 5.4.0
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -146,6 +146,7 @@ documentation and examples
146
146
  - INSTRUCTOR Embeddings (HuggingFace models)
147
147
  - E5 Embeddings (HuggingFace models)
148
148
  - MPNet Embeddings (HuggingFace models)
149
+ - UAE Embeddings (HuggingFace models)
149
150
  - OpenAI Embeddings
150
151
  - Sentence & Chunk Embeddings
151
152
  - Unsupervised keywords extraction
@@ -170,7 +171,7 @@ documentation and examples
170
171
  - Text-To-Text Transfer Transformer (Google T5)
171
172
  - Generative Pre-trained Transformer 2 (OpenAI GPT2)
172
173
  - Seq2Seq for NLG, Translation, and Comprehension (Facebook BART)
173
- - Chat and Conversational LLMs (Facebook Llama-22)
174
+ - Chat and Conversational LLMs (Facebook Llama-2)
174
175
  - Vision Transformer (Google ViT)
175
176
  - Swin Image Classification (Microsoft Swin Transformer)
176
177
  - ConvNext Image Classification (Facebook ConvNext)
@@ -180,10 +181,10 @@ documentation and examples
180
181
  - Automatic Speech Recognition (HuBERT)
181
182
  - Automatic Speech Recognition (OpenAI Whisper)
182
183
  - Named entity recognition (Deep learning)
183
- - Easy ONNX and TensorFlow integrations
184
+ - Easy ONNX, OpenVINO, and TensorFlow integrations
184
185
  - GPU Support
185
186
  - Full integration with Spark ML functions
186
- - +30000 pre-trained models in +200 languages!
187
+ - +31000 pre-trained models in +200 languages!
187
188
  - +6000 pre-trained pipelines in +200 languages!
188
189
  - Multi-lingual NER models: Arabic, Bengali, Chinese, Danish, Dutch, English, Finnish, French, German, Hebrew, Italian,
189
190
  Japanese, Korean, Norwegian, Persian, Polish, Portuguese, Russian, Spanish, Swedish, Urdu, and more.
@@ -197,7 +198,7 @@ To use Spark NLP you need the following requirements:
197
198
 
198
199
  **GPU (optional):**
199
200
 
200
- Spark NLP 5.3.3 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
201
+ Spark NLP 5.4.0 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
201
202
 
202
203
  - NVIDIA® GPU drivers version 450.80.02 or higher
203
204
  - CUDA® Toolkit 11.2
@@ -213,7 +214,7 @@ $ java -version
213
214
  $ conda create -n sparknlp python=3.7 -y
214
215
  $ conda activate sparknlp
215
216
  # spark-nlp by default is based on pyspark 3.x
216
- $ pip install spark-nlp==5.3.3 pyspark==3.3.1
217
+ $ pip install spark-nlp==5.4.0 pyspark==3.3.1
217
218
  ```
218
219
 
219
220
  In Python console or Jupyter `Python3` kernel:
@@ -258,10 +259,11 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
258
259
 
259
260
  ## Apache Spark Support
260
261
 
261
- Spark NLP *5.3.3* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
262
+ Spark NLP *5.4.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
262
263
 
263
264
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
264
265
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
266
+ | 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
265
267
  | 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
266
268
  | 5.2.x | YES | YES | YES | YES | YES | YES | NO | NO |
267
269
  | 5.1.x | Partially | YES | YES | YES | YES | YES | NO | NO |
@@ -271,12 +273,6 @@ Spark NLP *5.3.3* has been built on top of Apache Spark 3.4 while fully supports
271
273
  | 4.2.x | NO | NO | YES | YES | YES | YES | NO | NO |
272
274
  | 4.1.x | NO | NO | YES | YES | YES | YES | NO | NO |
273
275
  | 4.0.x | NO | NO | YES | YES | YES | YES | NO | NO |
274
- | 3.4.x | NO | NO | N/A | Partially | YES | YES | YES | YES |
275
- | 3.3.x | NO | NO | NO | NO | YES | YES | YES | YES |
276
- | 3.2.x | NO | NO | NO | NO | YES | YES | YES | YES |
277
- | 3.1.x | NO | NO | NO | NO | YES | YES | YES | YES |
278
- | 3.0.x | NO | NO | NO | NO | YES | YES | YES | YES |
279
- | 2.7.x | NO | NO | NO | NO | NO | NO | YES | YES |
280
276
 
281
277
  Find out more about `Spark NLP` versions from our [release notes](https://github.com/JohnSnowLabs/spark-nlp/releases).
282
278
 
@@ -293,16 +289,10 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
293
289
  | 4.2.x | YES | YES | YES | YES | YES | NO | YES |
294
290
  | 4.1.x | YES | YES | YES | YES | NO | NO | YES |
295
291
  | 4.0.x | YES | YES | YES | YES | NO | NO | YES |
296
- | 3.4.x | YES | YES | YES | YES | NO | YES | YES |
297
- | 3.3.x | YES | YES | YES | NO | NO | YES | YES |
298
- | 3.2.x | YES | YES | YES | NO | NO | YES | YES |
299
- | 3.1.x | YES | YES | YES | NO | NO | YES | YES |
300
- | 3.0.x | YES | YES | YES | NO | NO | YES | YES |
301
- | 2.7.x | YES | YES | NO | NO | NO | YES | NO |
302
292
 
303
293
  ## Databricks Support
304
294
 
305
- Spark NLP 5.3.3 has been tested and is compatible with the following runtimes:
295
+ Spark NLP 5.4.0 has been tested and is compatible with the following runtimes:
306
296
 
307
297
  **CPU:**
308
298
 
@@ -375,7 +365,7 @@ Spark NLP 5.3.3 has been tested and is compatible with the following runtimes:
375
365
 
376
366
  ## EMR Support
377
367
 
378
- Spark NLP 5.3.3 has been tested and is compatible with the following EMR releases:
368
+ Spark NLP 5.4.0 has been tested and is compatible with the following EMR releases:
379
369
 
380
370
  - emr-6.2.0
381
371
  - emr-6.3.0
@@ -425,11 +415,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
425
415
  ```sh
426
416
  # CPU
427
417
 
428
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
418
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
429
419
 
430
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
420
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
431
421
 
432
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
422
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
433
423
  ```
434
424
 
435
425
  The `spark-nlp` has been published to
@@ -438,11 +428,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
438
428
  ```sh
439
429
  # GPU
440
430
 
441
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.3.3
431
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
442
432
 
443
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.3.3
433
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
444
434
 
445
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.3.3
435
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
446
436
 
447
437
  ```
448
438
 
@@ -452,11 +442,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
452
442
  ```sh
453
443
  # AArch64
454
444
 
455
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.3.3
445
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
456
446
 
457
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.3.3
447
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
458
448
 
459
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.3.3
449
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
460
450
 
461
451
  ```
462
452
 
@@ -466,11 +456,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
466
456
  ```sh
467
457
  # M1/M2 (Apple Silicon)
468
458
 
469
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.3.3
459
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
470
460
 
471
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.3.3
461
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
472
462
 
473
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.3.3
463
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
474
464
 
475
465
  ```
476
466
 
@@ -484,7 +474,7 @@ set in your SparkSession:
484
474
  spark-shell \
485
475
  --driver-memory 16g \
486
476
  --conf spark.kryoserializer.buffer.max=2000M \
487
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
477
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
488
478
  ```
489
479
 
490
480
  ## Scala
@@ -502,7 +492,7 @@ coordinates:
502
492
  <dependency>
503
493
  <groupId>com.johnsnowlabs.nlp</groupId>
504
494
  <artifactId>spark-nlp_2.12</artifactId>
505
- <version>5.3.3</version>
495
+ <version>5.4.0</version>
506
496
  </dependency>
507
497
  ```
508
498
 
@@ -513,7 +503,7 @@ coordinates:
513
503
  <dependency>
514
504
  <groupId>com.johnsnowlabs.nlp</groupId>
515
505
  <artifactId>spark-nlp-gpu_2.12</artifactId>
516
- <version>5.3.3</version>
506
+ <version>5.4.0</version>
517
507
  </dependency>
518
508
  ```
519
509
 
@@ -524,7 +514,7 @@ coordinates:
524
514
  <dependency>
525
515
  <groupId>com.johnsnowlabs.nlp</groupId>
526
516
  <artifactId>spark-nlp-aarch64_2.12</artifactId>
527
- <version>5.3.3</version>
517
+ <version>5.4.0</version>
528
518
  </dependency>
529
519
  ```
530
520
 
@@ -535,7 +525,7 @@ coordinates:
535
525
  <dependency>
536
526
  <groupId>com.johnsnowlabs.nlp</groupId>
537
527
  <artifactId>spark-nlp-silicon_2.12</artifactId>
538
- <version>5.3.3</version>
528
+ <version>5.4.0</version>
539
529
  </dependency>
540
530
  ```
541
531
 
@@ -545,28 +535,28 @@ coordinates:
545
535
 
546
536
  ```sbtshell
547
537
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
548
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.3.3"
538
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.0"
549
539
  ```
550
540
 
551
541
  **spark-nlp-gpu:**
552
542
 
553
543
  ```sbtshell
554
544
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
555
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.3.3"
545
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.0"
556
546
  ```
557
547
 
558
548
  **spark-nlp-aarch64:**
559
549
 
560
550
  ```sbtshell
561
551
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
562
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.3.3"
552
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.0"
563
553
  ```
564
554
 
565
555
  **spark-nlp-silicon:**
566
556
 
567
557
  ```sbtshell
568
558
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
569
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.3.3"
559
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.0"
570
560
  ```
571
561
 
572
562
  Maven
@@ -588,7 +578,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
588
578
  Pip:
589
579
 
590
580
  ```bash
591
- pip install spark-nlp==5.3.3
581
+ pip install spark-nlp==5.4.0
592
582
  ```
593
583
 
594
584
  Conda:
@@ -617,7 +607,7 @@ spark = SparkSession.builder
617
607
  .config("spark.driver.memory", "16G")
618
608
  .config("spark.driver.maxResultSize", "0")
619
609
  .config("spark.kryoserializer.buffer.max", "2000M")
620
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3")
610
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
621
611
  .getOrCreate()
622
612
  ```
623
613
 
@@ -688,7 +678,7 @@ Use either one of the following options
688
678
  - Add the following Maven Coordinates to the interpreter's library list
689
679
 
690
680
  ```bash
691
- com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
681
+ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
692
682
  ```
693
683
 
694
684
  - Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
@@ -699,7 +689,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
699
689
  Apart from the previous step, install the python module through pip
700
690
 
701
691
  ```bash
702
- pip install spark-nlp==5.3.3
692
+ pip install spark-nlp==5.4.0
703
693
  ```
704
694
 
705
695
  Or you can install `spark-nlp` from inside Zeppelin by using Conda:
@@ -727,7 +717,7 @@ launch the Jupyter from the same Python environment:
727
717
  $ conda create -n sparknlp python=3.8 -y
728
718
  $ conda activate sparknlp
729
719
  # spark-nlp by default is based on pyspark 3.x
730
- $ pip install spark-nlp==5.3.3 pyspark==3.3.1 jupyter
720
+ $ pip install spark-nlp==5.4.0 pyspark==3.3.1 jupyter
731
721
  $ jupyter notebook
732
722
  ```
733
723
 
@@ -744,7 +734,7 @@ export PYSPARK_PYTHON=python3
744
734
  export PYSPARK_DRIVER_PYTHON=jupyter
745
735
  export PYSPARK_DRIVER_PYTHON_OPTS=notebook
746
736
 
747
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
737
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
748
738
  ```
749
739
 
750
740
  Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
@@ -771,7 +761,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
771
761
  # -s is for spark-nlp
772
762
  # -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
773
763
  # by default they are set to the latest
774
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.3.3
764
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
775
765
  ```
776
766
 
777
767
  [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
@@ -794,7 +784,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
794
784
  # -s is for spark-nlp
795
785
  # -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
796
786
  # by default they are set to the latest
797
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.3.3
787
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
798
788
  ```
799
789
 
800
790
  [Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
@@ -813,9 +803,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
813
803
 
814
804
  3. In `Libraries` tab inside your cluster you need to follow these steps:
815
805
 
816
- 3.1. Install New -> PyPI -> `spark-nlp==5.3.3` -> Install
806
+ 3.1. Install New -> PyPI -> `spark-nlp==5.4.0` -> Install
817
807
 
818
- 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3` -> Install
808
+ 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0` -> Install
819
809
 
820
810
  4. Now you can attach your notebook to the cluster and use Spark NLP!
821
811
 
@@ -866,7 +856,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
866
856
  "spark.kryoserializer.buffer.max": "2000M",
867
857
  "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
868
858
  "spark.driver.maxResultSize": "0",
869
- "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3"
859
+ "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0"
870
860
  }
871
861
  }]
872
862
  ```
@@ -875,7 +865,7 @@ A sample of AWS CLI to launch EMR cluster:
875
865
 
876
866
  ```.sh
877
867
  aws emr create-cluster \
878
- --name "Spark NLP 5.3.3" \
868
+ --name "Spark NLP 5.4.0" \
879
869
  --release-label emr-6.2.0 \
880
870
  --applications Name=Hadoop Name=Spark Name=Hive \
881
871
  --instance-type m4.4xlarge \
@@ -939,7 +929,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
939
929
  --enable-component-gateway \
940
930
  --metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
941
931
  --initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
942
- --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
932
+ --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
943
933
  ```
944
934
 
945
935
  2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
@@ -982,7 +972,7 @@ spark = SparkSession.builder
982
972
  .config("spark.kryoserializer.buffer.max", "2000m")
983
973
  .config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
984
974
  .config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
985
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3")
975
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
986
976
  .getOrCreate()
987
977
  ```
988
978
 
@@ -996,7 +986,7 @@ spark-shell \
996
986
  --conf spark.kryoserializer.buffer.max=2000M \
997
987
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
998
988
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
999
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
989
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
1000
990
  ```
1001
991
 
1002
992
  **pyspark:**
@@ -1009,7 +999,7 @@ pyspark \
1009
999
  --conf spark.kryoserializer.buffer.max=2000M \
1010
1000
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
1011
1001
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
1012
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
1002
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
1013
1003
  ```
1014
1004
 
1015
1005
  **Databricks:**
@@ -1281,7 +1271,7 @@ spark = SparkSession.builder
1281
1271
  .config("spark.driver.memory", "16G")
1282
1272
  .config("spark.driver.maxResultSize", "0")
1283
1273
  .config("spark.kryoserializer.buffer.max", "2000M")
1284
- .config("spark.jars", "/tmp/spark-nlp-assembly-5.3.3.jar")
1274
+ .config("spark.jars", "/tmp/spark-nlp-assembly-5.4.0.jar")
1285
1275
  .getOrCreate()
1286
1276
  ```
1287
1277
 
@@ -1290,7 +1280,7 @@ spark = SparkSession.builder
1290
1280
  version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
1291
1281
  - If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
1292
1282
  to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
1293
- i.e., `hdfs:///tmp/spark-nlp-assembly-5.3.3.jar`)
1283
+ i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.0.jar`)
1294
1284
 
1295
1285
  Example of using pretrained Models and Pipelines in offline:
1296
1286