spark-nlp 5.5.2__tar.gz → 5.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (260) hide show
  1. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/PKG-INFO +8 -8
  2. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/README.md +7 -7
  3. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/setup.py +1 -1
  4. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/spark_nlp.egg-info/PKG-INFO +8 -8
  5. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/__init__.py +2 -2
  6. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/bge_embeddings.py +7 -3
  7. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/properties.py +27 -0
  8. spark-nlp-5.5.3/sparknlp/reader/sparknlp_reader.py +113 -0
  9. spark-nlp-5.5.2/sparknlp/reader/sparknlp_reader.py +0 -121
  10. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/com/__init__.py +0 -0
  11. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/com/johnsnowlabs/__init__.py +0 -0
  12. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/com/johnsnowlabs/ml/__init__.py +0 -0
  13. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
  14. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/com/johnsnowlabs/nlp/__init__.py +0 -0
  15. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/setup.cfg +0 -0
  16. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/spark_nlp.egg-info/.uuid +0 -0
  17. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/spark_nlp.egg-info/SOURCES.txt +0 -0
  18. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/spark_nlp.egg-info/dependency_links.txt +0 -0
  19. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/spark_nlp.egg-info/top_level.txt +0 -0
  20. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotation.py +0 -0
  21. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotation_audio.py +0 -0
  22. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotation_image.py +0 -0
  23. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/__init__.py +0 -0
  24. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/audio/__init__.py +0 -0
  25. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  26. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  27. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  28. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/chunk2_doc.py +0 -0
  29. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/chunker.py +0 -0
  30. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
  31. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  32. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  33. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  34. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
  35. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  36. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +0 -0
  37. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  38. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  39. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  40. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  41. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  42. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  43. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  44. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
  45. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  46. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  47. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  48. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  49. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
  50. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  51. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  52. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  53. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  54. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  55. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  56. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  57. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  58. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  59. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
  60. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  61. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  62. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  63. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  64. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  65. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  66. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  67. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  68. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  69. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
  70. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  71. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  72. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  73. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/coref/__init__.py +0 -0
  74. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  75. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/cv/__init__.py +0 -0
  76. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
  77. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  78. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  79. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  80. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  81. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  82. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/date2_chunk.py +0 -0
  83. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/dependency/__init__.py +0 -0
  84. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  85. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  86. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  87. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/document_normalizer.py +0 -0
  88. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/document_token_splitter.py +0 -0
  89. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  90. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/__init__.py +0 -0
  91. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  92. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +0 -0
  93. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  94. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  95. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  96. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  97. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  98. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  99. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  100. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
  101. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  102. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  103. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  104. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
  105. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
  106. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
  107. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  108. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  109. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  110. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
  111. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  112. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  113. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  114. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  115. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  116. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  117. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  118. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/er/__init__.py +0 -0
  119. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/er/entity_ruler.py +0 -0
  120. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/graph_extraction.py +0 -0
  121. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  122. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  123. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  124. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  125. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/lemmatizer.py +0 -0
  126. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/matcher/__init__.py +0 -0
  127. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  128. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  129. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  130. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  131. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  132. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/n_gram_generator.py +0 -0
  133. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ner/__init__.py +0 -0
  134. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ner/ner_approach.py +0 -0
  135. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ner/ner_converter.py +0 -0
  136. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ner/ner_crf.py +0 -0
  137. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ner/ner_dl.py +0 -0
  138. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  139. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  140. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/normalizer.py +0 -0
  141. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/openai/__init__.py +0 -0
  142. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/openai/openai_completion.py +0 -0
  143. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
  144. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/param/__init__.py +0 -0
  145. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  146. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  147. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/pos/__init__.py +0 -0
  148. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/pos/perceptron.py +0 -0
  149. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/sentence/__init__.py +0 -0
  150. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  151. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  152. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/sentiment/__init__.py +0 -0
  153. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  154. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  155. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/__init__.py +0 -0
  156. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/auto_gguf_model.py +0 -0
  157. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  158. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
  159. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  160. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
  161. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/llama3_transformer.py +0 -0
  162. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
  163. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  164. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
  165. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
  166. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
  167. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
  168. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
  169. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
  170. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  171. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/similarity/__init__.py +0 -0
  172. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  173. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/spell_check/__init__.py +0 -0
  174. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  175. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  176. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  177. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/stemmer.py +0 -0
  178. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  179. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  180. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/token/__init__.py +0 -0
  181. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  182. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  183. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  184. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/token/tokenizer.py +0 -0
  185. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/token2_chunk.py +0 -0
  186. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ws/__init__.py +0 -0
  187. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  188. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/__init__.py +0 -0
  189. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/audio_assembler.py +0 -0
  190. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/doc2_chunk.py +0 -0
  191. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/document_assembler.py +0 -0
  192. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/embeddings_finisher.py +0 -0
  193. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/finisher.py +0 -0
  194. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/graph_finisher.py +0 -0
  195. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/has_recursive_fit.py +0 -0
  196. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/has_recursive_transform.py +0 -0
  197. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/image_assembler.py +0 -0
  198. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/light_pipeline.py +0 -0
  199. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/multi_document_assembler.py +0 -0
  200. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/prompt_assembler.py +0 -0
  201. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/recursive_pipeline.py +0 -0
  202. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/table_assembler.py +0 -0
  203. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/base/token_assembler.py +0 -0
  204. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/__init__.py +0 -0
  205. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/annotator_approach.py +0 -0
  206. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/annotator_model.py +0 -0
  207. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/annotator_properties.py +0 -0
  208. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/annotator_type.py +0 -0
  209. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/coverage_result.py +0 -0
  210. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/match_strategy.py +0 -0
  211. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/read_as.py +0 -0
  212. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/recursive_annotator_approach.py +0 -0
  213. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/storage.py +0 -0
  214. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/common/utils.py +0 -0
  215. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/functions.py +0 -0
  216. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/internal/__init__.py +0 -0
  217. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/internal/annotator_java_ml.py +0 -0
  218. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/internal/annotator_transformer.py +0 -0
  219. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/internal/extended_java_wrapper.py +0 -0
  220. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/internal/params_getters_setters.py +0 -0
  221. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/internal/recursive.py +0 -0
  222. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/logging/__init__.py +0 -0
  223. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/logging/comet.py +0 -0
  224. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/pretrained/__init__.py +0 -0
  225. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  226. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/pretrained/resource_downloader.py +0 -0
  227. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/pretrained/utils.py +0 -0
  228. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/reader/__init__.py +0 -0
  229. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/__init__.py +0 -0
  230. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  231. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  232. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  233. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  234. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  235. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  236. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  237. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  238. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  239. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  240. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  241. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  242. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  243. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  244. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  245. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  246. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  247. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  248. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  249. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  250. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  251. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  252. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  253. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/conll.py +0 -0
  254. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/conllu.py +0 -0
  255. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/pos.py +0 -0
  256. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/pub_tator.py +0 -0
  257. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/spacy_to_annotation.py +0 -0
  258. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/training/tfgraphs.py +0 -0
  259. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/upload_to_hub.py +0 -0
  260. {spark-nlp-5.5.2 → spark-nlp-5.5.3}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 5.5.2
3
+ Version: 5.5.3
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -95,7 +95,7 @@ $ java -version
95
95
  $ conda create -n sparknlp python=3.7 -y
96
96
  $ conda activate sparknlp
97
97
  # spark-nlp by default is based on pyspark 3.x
98
- $ pip install spark-nlp==5.5.2 pyspark==3.3.1
98
+ $ pip install spark-nlp==5.5.3 pyspark==3.3.1
99
99
  ```
100
100
 
101
101
  In Python console or Jupyter `Python3` kernel:
@@ -161,7 +161,7 @@ For a quick example of using pipelines and models take a look at our official [d
161
161
 
162
162
  ### Apache Spark Support
163
163
 
164
- Spark NLP *5.5.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
164
+ Spark NLP *5.5.3* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
165
165
 
166
166
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
167
167
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -189,7 +189,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
189
189
 
190
190
  ### Databricks Support
191
191
 
192
- Spark NLP 5.5.2 has been tested and is compatible with the following runtimes:
192
+ Spark NLP 5.5.3 has been tested and is compatible with the following runtimes:
193
193
 
194
194
  | **CPU** | **GPU** |
195
195
  |--------------------|--------------------|
@@ -206,7 +206,7 @@ We are compatible with older runtimes. For a full list check databricks support
206
206
 
207
207
  ### EMR Support
208
208
 
209
- Spark NLP 5.5.2 has been tested and is compatible with the following EMR releases:
209
+ Spark NLP 5.5.3 has been tested and is compatible with the following EMR releases:
210
210
 
211
211
  | **EMR Release** |
212
212
  |--------------------|
@@ -237,7 +237,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
237
237
  from our official documentation.
238
238
 
239
239
  If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
240
- projects [Spark NLP SBT S5.5.2r](https://github.com/maziyarpanahi/spark-nlp-starter)
240
+ projects [Spark NLP SBT S5.5.3r](https://github.com/maziyarpanahi/spark-nlp-starter)
241
241
 
242
242
  ### Python
243
243
 
@@ -282,7 +282,7 @@ In Spark NLP we can define S3 locations to:
282
282
 
283
283
  Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
284
284
 
285
- ## Document5.5.2
285
+ ## Document5.5.3
286
286
 
287
287
  ### Examples
288
288
 
@@ -315,7 +315,7 @@ the Spark NLP library:
315
315
  keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
316
316
  abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
317
317
  }
318
- }5.5.2
318
+ }5.5.3
319
319
  ```
320
320
 
321
321
  ## Community support
@@ -63,7 +63,7 @@ $ java -version
63
63
  $ conda create -n sparknlp python=3.7 -y
64
64
  $ conda activate sparknlp
65
65
  # spark-nlp by default is based on pyspark 3.x
66
- $ pip install spark-nlp==5.5.2 pyspark==3.3.1
66
+ $ pip install spark-nlp==5.5.3 pyspark==3.3.1
67
67
  ```
68
68
 
69
69
  In Python console or Jupyter `Python3` kernel:
@@ -129,7 +129,7 @@ For a quick example of using pipelines and models take a look at our official [d
129
129
 
130
130
  ### Apache Spark Support
131
131
 
132
- Spark NLP *5.5.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
132
+ Spark NLP *5.5.3* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
133
133
 
134
134
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
135
135
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -157,7 +157,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
157
157
 
158
158
  ### Databricks Support
159
159
 
160
- Spark NLP 5.5.2 has been tested and is compatible with the following runtimes:
160
+ Spark NLP 5.5.3 has been tested and is compatible with the following runtimes:
161
161
 
162
162
  | **CPU** | **GPU** |
163
163
  |--------------------|--------------------|
@@ -174,7 +174,7 @@ We are compatible with older runtimes. For a full list check databricks support
174
174
 
175
175
  ### EMR Support
176
176
 
177
- Spark NLP 5.5.2 has been tested and is compatible with the following EMR releases:
177
+ Spark NLP 5.5.3 has been tested and is compatible with the following EMR releases:
178
178
 
179
179
  | **EMR Release** |
180
180
  |--------------------|
@@ -205,7 +205,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
205
205
  from our official documentation.
206
206
 
207
207
  If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
208
- projects [Spark NLP SBT S5.5.2r](https://github.com/maziyarpanahi/spark-nlp-starter)
208
+ projects [Spark NLP SBT S5.5.3r](https://github.com/maziyarpanahi/spark-nlp-starter)
209
209
 
210
210
  ### Python
211
211
 
@@ -250,7 +250,7 @@ In Spark NLP we can define S3 locations to:
250
250
 
251
251
  Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
252
252
 
253
- ## Document5.5.2
253
+ ## Document5.5.3
254
254
 
255
255
  ### Examples
256
256
 
@@ -283,7 +283,7 @@ the Spark NLP library:
283
283
  keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
284
284
  abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
285
285
  }
286
- }5.5.2
286
+ }5.5.3
287
287
  ```
288
288
 
289
289
  ## Community support
@@ -41,7 +41,7 @@ setup(
41
41
  # project code, see
42
42
  # https://packaging.python.org/en/latest/single_source_version.html
43
43
 
44
- version='5.5.2', # Required
44
+ version='5.5.3', # Required
45
45
 
46
46
  # This is a one-line description or tagline of what your project does. This
47
47
  # corresponds to the 'Summary' metadata field:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 5.5.2
3
+ Version: 5.5.3
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -95,7 +95,7 @@ $ java -version
95
95
  $ conda create -n sparknlp python=3.7 -y
96
96
  $ conda activate sparknlp
97
97
  # spark-nlp by default is based on pyspark 3.x
98
- $ pip install spark-nlp==5.5.2 pyspark==3.3.1
98
+ $ pip install spark-nlp==5.5.3 pyspark==3.3.1
99
99
  ```
100
100
 
101
101
  In Python console or Jupyter `Python3` kernel:
@@ -161,7 +161,7 @@ For a quick example of using pipelines and models take a look at our official [d
161
161
 
162
162
  ### Apache Spark Support
163
163
 
164
- Spark NLP *5.5.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
164
+ Spark NLP *5.5.3* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
165
165
 
166
166
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
167
167
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -189,7 +189,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
189
189
 
190
190
  ### Databricks Support
191
191
 
192
- Spark NLP 5.5.2 has been tested and is compatible with the following runtimes:
192
+ Spark NLP 5.5.3 has been tested and is compatible with the following runtimes:
193
193
 
194
194
  | **CPU** | **GPU** |
195
195
  |--------------------|--------------------|
@@ -206,7 +206,7 @@ We are compatible with older runtimes. For a full list check databricks support
206
206
 
207
207
  ### EMR Support
208
208
 
209
- Spark NLP 5.5.2 has been tested and is compatible with the following EMR releases:
209
+ Spark NLP 5.5.3 has been tested and is compatible with the following EMR releases:
210
210
 
211
211
  | **EMR Release** |
212
212
  |--------------------|
@@ -237,7 +237,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
237
237
  from our official documentation.
238
238
 
239
239
  If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
240
- projects [Spark NLP SBT S5.5.2r](https://github.com/maziyarpanahi/spark-nlp-starter)
240
+ projects [Spark NLP SBT S5.5.3r](https://github.com/maziyarpanahi/spark-nlp-starter)
241
241
 
242
242
  ### Python
243
243
 
@@ -282,7 +282,7 @@ In Spark NLP we can define S3 locations to:
282
282
 
283
283
  Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
284
284
 
285
- ## Document5.5.2
285
+ ## Document5.5.3
286
286
 
287
287
  ### Examples
288
288
 
@@ -315,7 +315,7 @@ the Spark NLP library:
315
315
  keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
316
316
  abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
317
317
  }
318
- }5.5.2
318
+ }5.5.3
319
319
  ```
320
320
 
321
321
  ## Community support
@@ -132,7 +132,7 @@ def start(gpu=False,
132
132
  The initiated Spark session.
133
133
 
134
134
  """
135
- current_version = "5.5.2"
135
+ current_version = "5.5.3"
136
136
 
137
137
  if params is None:
138
138
  params = {}
@@ -316,4 +316,4 @@ def version():
316
316
  str
317
317
  The current Spark NLP version.
318
318
  """
319
- return '5.5.2'
319
+ return '5.5.3'
@@ -21,7 +21,8 @@ class BGEEmbeddings(AnnotatorModel,
21
21
  HasCaseSensitiveProperties,
22
22
  HasStorageRef,
23
23
  HasBatchedAnnotate,
24
- HasMaxSentenceLengthLimit):
24
+ HasMaxSentenceLengthLimit,
25
+ HasClsTokenProperties):
25
26
  """Sentence embeddings using BGE.
26
27
 
27
28
  BGE, or BAAI General Embeddings, a model that can map any text to a low-dimensional dense
@@ -60,6 +61,8 @@ class BGEEmbeddings(AnnotatorModel,
60
61
  Max sentence length to process, by default 512
61
62
  configProtoBytes
62
63
  ConfigProto from tensorflow, serialized into byte array.
64
+ useCLSToken
65
+ Whether to use the CLS token for sentence embeddings, by default True
63
66
 
64
67
  References
65
68
  ----------
@@ -148,6 +151,7 @@ class BGEEmbeddings(AnnotatorModel,
148
151
  batchSize=8,
149
152
  maxSentenceLength=512,
150
153
  caseSensitive=False,
154
+ useCLSToken=True
151
155
  )
152
156
 
153
157
  @staticmethod
@@ -171,13 +175,13 @@ class BGEEmbeddings(AnnotatorModel,
171
175
  return BGEEmbeddings(java_model=jModel)
172
176
 
173
177
  @staticmethod
174
- def pretrained(name="bge_base", lang="en", remote_loc=None):
178
+ def pretrained(name="bge_small_en_v1.5", lang="en", remote_loc=None):
175
179
  """Downloads and loads a pretrained model.
176
180
 
177
181
  Parameters
178
182
  ----------
179
183
  name : str, optional
180
- Name of the pretrained model, by default "bge_base"
184
+ Name of the pretrained model, by default "bge_small_en_v1.5"
181
185
  lang : str, optional
182
186
  Language of the pretrained model, by default "en"
183
187
  remote_loc : str, optional
@@ -67,6 +67,33 @@ class HasCaseSensitiveProperties:
67
67
  return self.getOrDefault(self.caseSensitive)
68
68
 
69
69
 
70
+ class HasClsTokenProperties:
71
+ useCLSToken = Param(Params._dummy(),
72
+ "useCLSToken",
73
+ "Whether to use CLS token for pooling (true) or attention-based average pooling (false)",
74
+ typeConverter=TypeConverters.toBoolean)
75
+
76
+ def setUseCLSToken(self, value):
77
+ """Sets whether to ignore case in tokens for embeddings matching.
78
+
79
+ Parameters
80
+ ----------
81
+ value : bool
82
+ Whether to use CLS token for pooling (true) or attention-based average pooling (false)
83
+ """
84
+ return self._set(useCLSToken=value)
85
+
86
+ def getUseCLSToken(self):
87
+ """Gets whether to use CLS token for pooling (true) or attention-based average pooling (false)
88
+
89
+ Returns
90
+ -------
91
+ bool
92
+ Whether to use CLS token for pooling (true) or attention-based average pooling (false)
93
+ """
94
+ return self.getOrDefault(self.useCLSToken)
95
+
96
+
70
97
  class HasClassifierActivationProperties:
71
98
  activation = Param(Params._dummy(),
72
99
  "activation",
@@ -0,0 +1,113 @@
1
+ # Copyright 2017-2024 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from sparknlp.internal import ExtendedJavaWrapper
15
+
16
+
17
+ class SparkNLPReader(ExtendedJavaWrapper):
18
+ """Instantiates class to read HTML, email, and document files.
19
+
20
+ Two types of input paths are supported:
21
+
22
+ - `htmlPath`: A path to a directory of HTML files or a single HTML file (e.g., `"path/html/files"`).
23
+ - `url`: A single URL or a set of URLs (e.g., `"https://www.wikipedia.org"`).
24
+
25
+ Parameters
26
+ ----------
27
+ spark : SparkSession
28
+ The active Spark session.
29
+ params : dict, optional
30
+ A dictionary with custom configurations.
31
+ """
32
+
33
+ def __init__(self, spark, params=None):
34
+ if params is None:
35
+ params = {}
36
+ super(SparkNLPReader, self).__init__("com.johnsnowlabs.reader.SparkNLPReader", params)
37
+ self.spark = spark
38
+
39
+ def html(self, htmlPath):
40
+ """Reads HTML files or URLs and returns a Spark DataFrame.
41
+
42
+ Parameters
43
+ ----------
44
+ htmlPath : str or list of str
45
+ Path(s) to HTML file(s) or a list of URLs.
46
+
47
+ Returns
48
+ -------
49
+ pyspark.sql.DataFrame
50
+ A DataFrame containing the parsed HTML content.
51
+
52
+ Examples
53
+ --------
54
+ >>> from sparknlp.reader import SparkNLPReader
55
+ >>> html_df = SparkNLPReader(spark).html("https://www.wikipedia.org")
56
+
57
+ You can also use SparkNLP to simplify the process:
58
+
59
+ >>> import sparknlp
60
+ >>> html_df = sparknlp.read().html("https://www.wikipedia.org")
61
+ >>> html_df.show(truncate=False)
62
+ """
63
+ if not isinstance(htmlPath, (str, list)) or (isinstance(htmlPath, list) and not all(isinstance(item, str) for item in htmlPath)):
64
+ raise TypeError("htmlPath must be a string or a list of strings")
65
+ jdf = self._java_obj.html(htmlPath)
66
+ return self.getDataFrame(self.spark, jdf)
67
+
68
+ def email(self, filePath):
69
+ """Reads email files and returns a Spark DataFrame.
70
+
71
+ Parameters
72
+ ----------
73
+ filePath : str
74
+ Path to an email file or a directory containing emails.
75
+
76
+ Returns
77
+ -------
78
+ pyspark.sql.DataFrame
79
+ A DataFrame containing parsed email data.
80
+
81
+ Examples
82
+ --------
83
+ >>> from sparknlp.reader import SparkNLPReader
84
+ >>> email_df = SparkNLPReader(spark).email("home/user/emails-directory")
85
+
86
+ Using SparkNLP:
87
+
88
+ >>> import sparknlp
89
+ >>> email_df = sparknlp.read().email("home/user/emails-directory")
90
+ >>> email_df.show(truncate=False)
91
+ """
92
+ if not isinstance(filePath, str):
93
+ raise TypeError("filePath must be a string")
94
+ jdf = self._java_obj.email(filePath)
95
+ return self.getDataFrame(self.spark, jdf)
96
+
97
+ def doc(self, docPath):
98
+ """Reads document files and returns a Spark DataFrame.
99
+
100
+ Parameters
101
+ ----------
102
+ docPath : str
103
+ Path to a document file.
104
+
105
+ Returns
106
+ -------
107
+ pyspark.sql.DataFrame
108
+ A DataFrame containing parsed document content.
109
+ """
110
+ if not isinstance(docPath, str):
111
+ raise TypeError("docPath must be a string")
112
+ jdf = self._java_obj.doc(docPath)
113
+ return self.getDataFrame(self.spark, jdf)
@@ -1,121 +0,0 @@
1
- # Copyright 2017-2024 John Snow Labs
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- from sparknlp.internal import ExtendedJavaWrapper
15
-
16
-
17
- class SparkNLPReader(ExtendedJavaWrapper):
18
- """Instantiates class to read HTML files.
19
-
20
- Two types of input paths are supported,
21
-
22
- htmlPath: this is a path to a directory of HTML files or a path to an HTML file
23
- E.g. "path/html/files"
24
-
25
- url: this is the URL or set of URLs of a website . E.g., "https://www.wikipedia.org"
26
-
27
- Parameters
28
- ----------
29
- params : spark
30
- Spark session
31
- params : dict, optional
32
- Parameter with custom configuration
33
-
34
- Examples
35
- --------
36
- >>> from sparknlp.reader import SparkNLPReader
37
- >>> html_df = SparkNLPReader().html(spark, "https://www.wikipedia.org")
38
-
39
- You can use SparkNLP for one line of code
40
- >>> import sparknlp
41
- >>> html_df = sparknlp.read().html("https://www.wikipedia.org")
42
- >>> html_df.show(truncate=False)
43
-
44
- +--------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
45
- |url |html |
46
- +--------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
47
- |https://example.com/|[{Title, Example Domain, {pageNumber -> 1}}, {NarrativeText, 0, This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission., {pageNumber -> 1}}, {NarrativeText, 0, More information... More information..., {pageNumber -> 1}}] |
48
- +--------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
49
- >>> html_df.printSchema()
50
-
51
- root
52
- |-- url: string (nullable = true)
53
- |-- html: array (nullable = true)
54
- | |-- element: struct (containsNull = true)
55
- | | |-- elementType: string (nullable = true)
56
- | | |-- content: string (nullable = true)
57
- | | |-- metadata: map (nullable = true)
58
- | | | |-- key: string
59
- | | | |-- value: string (valueContainsNull = true)
60
-
61
-
62
-
63
- Instantiates class to read email files.
64
-
65
- emailPath: this is a path to a directory of HTML files or a path to an HTML file E.g.
66
- "path/html/emails"
67
-
68
- Examples
69
- --------
70
- >>> from sparknlp.reader import SparkNLPReader
71
- >>> email_df = SparkNLPReader().email(spark, "home/user/emails-directory")
72
-
73
- You can use SparkNLP for one line of code
74
- >>> import sparknlp
75
- >>> email_df = sparknlp.read().email("home/user/emails-directory")
76
- >>> email_df.show(truncate=False)
77
- +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
78
- |email |
79
- +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
80
- |[{Title, Email Text Attachments, {sent_to -> Danilo Burbano <danilo@johnsnowlabs.com>, sent_from -> Danilo Burbano <danilo@johnsnowlabs.com>}}, {NarrativeText, Email test with two text attachments\r\n\r\nCheers,\r\n\r\n, {sent_to -> Danilo Burbano <danilo@johnsnowlabs.com>, sent_from -> Danilo Burbano <danilo@johnsnowlabs.com>, mimeType -> text/plain}}, {NarrativeText, <html>\r\n<head>\r\n<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">\r\n<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>\r\n</head>\r\n<body dir="ltr">\r\n<span style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">Email&nbsp; test with two text attachments</span>\r\n<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">\r\n<br>\r\n</div>\r\n<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">\r\nCheers,</div>\r\n<div class="elementToProof" style="font-family: Aptos, Aptos_EmbeddedFont, Aptos_MSFontService, Calibri, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">\r\n<br>\r\n</div>\r\n</body>\r\n</html>\r\n, {sent_to -> Danilo Burbano <danilo@johnsnowlabs.com>, sent_from -> Danilo Burbano <danilo@johnsnowlabs.com>, mimeType -> text/html}}, {Attachment, filename.txt, {sent_to -> Danilo Burbano <danilo@johnsnowlabs.com>, sent_from -> Danilo Burbano <danilo@johnsnowlabs.com>, contentType -> text/plain; name="filename.txt"}}, {NarrativeText, This is the content of the file.\n, {sent_to -> Danilo Burbano <danilo@johnsnowlabs.com>, sent_from -> Danilo Burbano <danilo@johnsnowlabs.com>, mimeType -> text/plain}}, {Attachment, filename2.txt, {sent_to -> Danilo Burbano <danilo@johnsnowlabs.com>, sent_from -> Danilo Burbano <danilo@johnsnowlabs.com>, contentType -> text/plain; name="filename2.txt"}}, {NarrativeText, This is an additional content file.\n, {sent_to -> Danilo Burbano <danilo@johnsnowlabs.com>, sent_from -> Danilo Burbano <danilo@johnsnowlabs.com>, mimeType -> text/plain}}]|
81
- +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
82
- email_df.printSchema()
83
- root
84
- |-- path: string (nullable = true)
85
- |-- content: array (nullable = true)
86
- |-- email: array (nullable = true)
87
- | |-- element: struct (containsNull = true)
88
- | | |-- elementType: string (nullable = true)
89
- | | |-- content: string (nullable = true)
90
- | | |-- metadata: map (nullable = true)
91
- | | | |-- key: string
92
- | | | |-- value: string (valueContainsNull = true)
93
-
94
- """
95
-
96
- def __init__(self, spark, params=None):
97
- if params is None:
98
- params = {}
99
- super(SparkNLPReader, self).__init__("com.johnsnowlabs.reader.SparkNLPReader", params)
100
- self.spark = spark
101
-
102
- def html(self, htmlPath):
103
- if not isinstance(htmlPath, (str, list)) or (isinstance(htmlPath, list) and not all(isinstance(item, str) for item in htmlPath)):
104
- raise TypeError("htmlPath must be a string or a list of strings")
105
- jdf = self._java_obj.html(htmlPath)
106
- dataframe = self.getDataFrame(self.spark, jdf)
107
- return dataframe
108
-
109
- def email(self, filePath):
110
- if not isinstance(filePath, str):
111
- raise TypeError("filePath must be a string")
112
- jdf = self._java_obj.email(filePath)
113
- dataframe = self.getDataFrame(self.spark, jdf)
114
- return dataframe
115
-
116
- def doc(self, docPath):
117
- if not isinstance(docPath, str):
118
- raise TypeError("docPath must be a string")
119
- jdf = self._java_obj.doc(docPath)
120
- dataframe = self.getDataFrame(self.spark, jdf)
121
- return dataframe
File without changes
File without changes