spark-nlp 6.1.5__tar.gz → 6.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (296) hide show
  1. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/PKG-INFO +6 -6
  2. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/README.md +5 -5
  3. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/setup.py +1 -1
  4. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/spark_nlp.egg-info/PKG-INFO +6 -6
  5. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/spark_nlp.egg-info/SOURCES.txt +1 -0
  6. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/__init__.py +1 -1
  7. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/document_normalizer.py +36 -0
  8. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +5 -0
  9. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/er/entity_ruler.py +35 -0
  10. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/auto_gguf_model.py +6 -4
  11. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/auto_gguf_reranker.py +5 -0
  12. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +6 -1
  13. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/__init__.py +1 -0
  14. spark_nlp-6.2.0/sparknlp/common/completion_post_processing.py +37 -0
  15. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/__init__.py +0 -0
  16. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/johnsnowlabs/__init__.py +0 -0
  17. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/johnsnowlabs/ml/__init__.py +0 -0
  18. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
  19. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/johnsnowlabs/nlp/__init__.py +0 -0
  20. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/setup.cfg +0 -0
  21. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/spark_nlp.egg-info/dependency_links.txt +0 -0
  22. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/spark_nlp.egg-info/top_level.txt +0 -0
  23. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotation.py +0 -0
  24. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotation_audio.py +0 -0
  25. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotation_image.py +0 -0
  26. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/__init__.py +0 -0
  27. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/audio/__init__.py +0 -0
  28. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  29. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  30. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  31. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/chunk2_doc.py +0 -0
  32. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/chunker.py +0 -0
  33. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
  34. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +0 -0
  35. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  36. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  37. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  38. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
  39. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  40. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +0 -0
  41. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  42. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  43. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  44. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  45. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  46. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  47. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  48. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
  49. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  50. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  51. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  52. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  53. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
  54. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  55. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  56. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  57. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  58. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +0 -0
  59. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  60. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  61. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  62. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  63. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  64. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
  65. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  66. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +0 -0
  67. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  68. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  69. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  70. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  71. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  72. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  73. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +0 -0
  74. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  75. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  76. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
  77. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  78. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  79. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  80. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cleaners/__init__.py +0 -0
  81. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cleaners/cleaner.py +0 -0
  82. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cleaners/extractor.py +0 -0
  83. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/coref/__init__.py +0 -0
  84. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  85. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/__init__.py +0 -0
  86. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
  87. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  88. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  89. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/florence2_transformer.py +0 -0
  90. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/gemma3_for_multimodal.py +0 -0
  91. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/internvl_for_multimodal.py +0 -0
  92. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/janus_for_multimodal.py +0 -0
  93. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/llava_for_multimodal.py +0 -0
  94. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/mllama_for_multimodal.py +0 -0
  95. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/paligemma_for_multimodal.py +0 -0
  96. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/phi3_vision_for_multimodal.py +0 -0
  97. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/qwen2vl_transformer.py +0 -0
  98. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/smolvlm_transformer.py +0 -0
  99. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  100. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  101. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  102. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/dataframe_optimizer.py +0 -0
  103. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/date2_chunk.py +0 -0
  104. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/dependency/__init__.py +0 -0
  105. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  106. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  107. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  108. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/document_token_splitter.py +0 -0
  109. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  110. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/__init__.py +0 -0
  111. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  112. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  113. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  114. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
  115. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  116. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  117. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  118. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  119. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  120. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
  121. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/e5v_embeddings.py +0 -0
  122. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  123. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  124. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  125. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/minilm_embeddings.py +0 -0
  126. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
  127. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
  128. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
  129. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  130. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  131. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  132. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
  133. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  134. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  135. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  136. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  137. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  138. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  139. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  140. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/er/__init__.py +0 -0
  141. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/graph_extraction.py +0 -0
  142. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  143. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  144. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  145. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  146. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/lemmatizer.py +0 -0
  147. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/__init__.py +0 -0
  148. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  149. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  150. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  151. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  152. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  153. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/n_gram_generator.py +0 -0
  154. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/__init__.py +0 -0
  155. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_approach.py +0 -0
  156. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_converter.py +0 -0
  157. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_crf.py +0 -0
  158. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_dl.py +0 -0
  159. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_dl_graph_checker.py +0 -0
  160. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  161. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  162. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/normalizer.py +0 -0
  163. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/openai/__init__.py +0 -0
  164. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/openai/openai_completion.py +0 -0
  165. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
  166. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/param/__init__.py +0 -0
  167. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  168. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  169. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/pos/__init__.py +0 -0
  170. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/pos/perceptron.py +0 -0
  171. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentence/__init__.py +0 -0
  172. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  173. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  174. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentiment/__init__.py +0 -0
  175. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  176. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  177. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/__init__.py +0 -0
  178. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  179. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/cohere_transformer.py +0 -0
  180. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
  181. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  182. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
  183. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/llama3_transformer.py +0 -0
  184. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
  185. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  186. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
  187. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
  188. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/olmo_transformer.py +0 -0
  189. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
  190. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
  191. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/phi4_transformer.py +0 -0
  192. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
  193. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
  194. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  195. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/similarity/__init__.py +0 -0
  196. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  197. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/spell_check/__init__.py +0 -0
  198. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  199. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  200. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  201. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/stemmer.py +0 -0
  202. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  203. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  204. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/__init__.py +0 -0
  205. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  206. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  207. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  208. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/tokenizer.py +0 -0
  209. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token2_chunk.py +0 -0
  210. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ws/__init__.py +0 -0
  211. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  212. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/__init__.py +0 -0
  213. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/audio_assembler.py +0 -0
  214. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/doc2_chunk.py +0 -0
  215. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/document_assembler.py +0 -0
  216. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/embeddings_finisher.py +0 -0
  217. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/finisher.py +0 -0
  218. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/gguf_ranking_finisher.py +0 -0
  219. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/graph_finisher.py +0 -0
  220. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/has_recursive_fit.py +0 -0
  221. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/has_recursive_transform.py +0 -0
  222. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/image_assembler.py +0 -0
  223. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/light_pipeline.py +0 -0
  224. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/multi_document_assembler.py +0 -0
  225. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/prompt_assembler.py +0 -0
  226. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/recursive_pipeline.py +0 -0
  227. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/table_assembler.py +0 -0
  228. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/token_assembler.py +0 -0
  229. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/annotator_approach.py +0 -0
  230. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/annotator_model.py +0 -0
  231. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/annotator_properties.py +0 -0
  232. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/annotator_type.py +0 -0
  233. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/coverage_result.py +0 -0
  234. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/match_strategy.py +0 -0
  235. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/properties.py +0 -0
  236. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/read_as.py +0 -0
  237. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/recursive_annotator_approach.py +0 -0
  238. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/storage.py +0 -0
  239. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/utils.py +0 -0
  240. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/functions.py +0 -0
  241. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/__init__.py +0 -0
  242. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/annotator_java_ml.py +0 -0
  243. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/annotator_transformer.py +0 -0
  244. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/extended_java_wrapper.py +0 -0
  245. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/params_getters_setters.py +0 -0
  246. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/recursive.py +0 -0
  247. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/logging/__init__.py +0 -0
  248. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/logging/comet.py +0 -0
  249. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/partition/__init__.py +0 -0
  250. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/partition/partition.py +0 -0
  251. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/partition/partition_properties.py +0 -0
  252. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/partition/partition_transformer.py +0 -0
  253. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/pretrained/__init__.py +0 -0
  254. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  255. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/pretrained/resource_downloader.py +0 -0
  256. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/pretrained/utils.py +0 -0
  257. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/__init__.py +0 -0
  258. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/enums.py +0 -0
  259. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/pdf_to_text.py +0 -0
  260. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/reader2doc.py +0 -0
  261. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/reader2image.py +0 -0
  262. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/reader2table.py +0 -0
  263. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/reader_assembler.py +0 -0
  264. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/sparknlp_reader.py +0 -0
  265. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/__init__.py +0 -0
  266. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  267. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  268. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  269. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  270. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  271. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  272. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  273. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  274. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  275. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  276. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  277. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  278. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  279. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  280. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  281. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  282. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  283. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  284. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  285. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  286. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  287. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  288. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  289. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/conll.py +0 -0
  290. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/conllu.py +0 -0
  291. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/pos.py +0 -0
  292. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/pub_tator.py +0 -0
  293. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/spacy_to_annotation.py +0 -0
  294. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/tfgraphs.py +0 -0
  295. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/upload_to_hub.py +0 -0
  296. {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-nlp
3
- Version: 6.1.5
3
+ Version: 6.2.0
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -102,7 +102,7 @@ $ java -version
102
102
  $ conda create -n sparknlp python=3.7 -y
103
103
  $ conda activate sparknlp
104
104
  # spark-nlp by default is based on pyspark 3.x
105
- $ pip install spark-nlp==6.1.5 pyspark==3.3.1
105
+ $ pip install spark-nlp==6.2.0 pyspark==3.3.1
106
106
  ```
107
107
 
108
108
  In Python console or Jupyter `Python3` kernel:
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
168
168
 
169
169
  ### Apache Spark Support
170
170
 
171
- Spark NLP *6.1.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
171
+ Spark NLP *6.2.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
172
172
 
173
173
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
174
174
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
198
198
 
199
199
  ### Databricks Support
200
200
 
201
- Spark NLP 6.1.5 has been tested and is compatible with the following runtimes:
201
+ Spark NLP 6.2.0 has been tested and is compatible with the following runtimes:
202
202
 
203
203
  | **CPU** | **GPU** |
204
204
  |--------------------|--------------------|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
216
216
 
217
217
  ### EMR Support
218
218
 
219
- Spark NLP 6.1.5 has been tested and is compatible with the following EMR releases:
219
+ Spark NLP 6.2.0 has been tested and is compatible with the following EMR releases:
220
220
 
221
221
  | **EMR Release** |
222
222
  |--------------------|
@@ -306,7 +306,7 @@ Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integr
306
306
  Need more **examples**? Check out our dedicated [Spark NLP Examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
307
307
  repository to showcase all Spark NLP use cases!
308
308
 
309
- Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/demo) built by Streamlit.
309
+ Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/demos) built by Streamlit.
310
310
 
311
311
  #### All examples: [spark-nlp/examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
312
312
 
@@ -63,7 +63,7 @@ $ java -version
63
63
  $ conda create -n sparknlp python=3.7 -y
64
64
  $ conda activate sparknlp
65
65
  # spark-nlp by default is based on pyspark 3.x
66
- $ pip install spark-nlp==6.1.5 pyspark==3.3.1
66
+ $ pip install spark-nlp==6.2.0 pyspark==3.3.1
67
67
  ```
68
68
 
69
69
  In Python console or Jupyter `Python3` kernel:
@@ -129,7 +129,7 @@ For a quick example of using pipelines and models take a look at our official [d
129
129
 
130
130
  ### Apache Spark Support
131
131
 
132
- Spark NLP *6.1.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
132
+ Spark NLP *6.2.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
133
133
 
134
134
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
135
135
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -159,7 +159,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
159
159
 
160
160
  ### Databricks Support
161
161
 
162
- Spark NLP 6.1.5 has been tested and is compatible with the following runtimes:
162
+ Spark NLP 6.2.0 has been tested and is compatible with the following runtimes:
163
163
 
164
164
  | **CPU** | **GPU** |
165
165
  |--------------------|--------------------|
@@ -177,7 +177,7 @@ We are compatible with older runtimes. For a full list check databricks support
177
177
 
178
178
  ### EMR Support
179
179
 
180
- Spark NLP 6.1.5 has been tested and is compatible with the following EMR releases:
180
+ Spark NLP 6.2.0 has been tested and is compatible with the following EMR releases:
181
181
 
182
182
  | **EMR Release** |
183
183
  |--------------------|
@@ -267,7 +267,7 @@ Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integr
267
267
  Need more **examples**? Check out our dedicated [Spark NLP Examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
268
268
  repository to showcase all Spark NLP use cases!
269
269
 
270
- Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/demo) built by Streamlit.
270
+ Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/demos) built by Streamlit.
271
271
 
272
272
  #### All examples: [spark-nlp/examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
273
273
 
@@ -41,7 +41,7 @@ setup(
41
41
  # project code, see
42
42
  # https://packaging.python.org/en/latest/single_source_version.html
43
43
 
44
- version='6.1.5', # Required
44
+ version='6.2.0', # Required
45
45
 
46
46
  # This is a one-line description or tagline of what your project does. This
47
47
  # corresponds to the 'Summary' metadata field:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-nlp
3
- Version: 6.1.5
3
+ Version: 6.2.0
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -102,7 +102,7 @@ $ java -version
102
102
  $ conda create -n sparknlp python=3.7 -y
103
103
  $ conda activate sparknlp
104
104
  # spark-nlp by default is based on pyspark 3.x
105
- $ pip install spark-nlp==6.1.5 pyspark==3.3.1
105
+ $ pip install spark-nlp==6.2.0 pyspark==3.3.1
106
106
  ```
107
107
 
108
108
  In Python console or Jupyter `Python3` kernel:
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
168
168
 
169
169
  ### Apache Spark Support
170
170
 
171
- Spark NLP *6.1.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
171
+ Spark NLP *6.2.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
172
172
 
173
173
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
174
174
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
198
198
 
199
199
  ### Databricks Support
200
200
 
201
- Spark NLP 6.1.5 has been tested and is compatible with the following runtimes:
201
+ Spark NLP 6.2.0 has been tested and is compatible with the following runtimes:
202
202
 
203
203
  | **CPU** | **GPU** |
204
204
  |--------------------|--------------------|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
216
216
 
217
217
  ### EMR Support
218
218
 
219
- Spark NLP 6.1.5 has been tested and is compatible with the following EMR releases:
219
+ Spark NLP 6.2.0 has been tested and is compatible with the following EMR releases:
220
220
 
221
221
  | **EMR Release** |
222
222
  |--------------------|
@@ -306,7 +306,7 @@ Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integr
306
306
  Need more **examples**? Check out our dedicated [Spark NLP Examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
307
307
  repository to showcase all Spark NLP use cases!
308
308
 
309
- Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/demo) built by Streamlit.
309
+ Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/demos) built by Streamlit.
310
310
 
311
311
  #### All examples: [spark-nlp/examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
312
312
 
@@ -231,6 +231,7 @@ sparknlp/common/annotator_approach.py
231
231
  sparknlp/common/annotator_model.py
232
232
  sparknlp/common/annotator_properties.py
233
233
  sparknlp/common/annotator_type.py
234
+ sparknlp/common/completion_post_processing.py
234
235
  sparknlp/common/coverage_result.py
235
236
  sparknlp/common/match_strategy.py
236
237
  sparknlp/common/properties.py
@@ -66,7 +66,7 @@ sys.modules['com.johnsnowlabs.ml.ai'] = annotator
66
66
  annotators = annotator
67
67
  embeddings = annotator
68
68
 
69
- __version__ = "6.1.5"
69
+ __version__ = "6.2.0"
70
70
 
71
71
 
72
72
  def start(gpu=False,
@@ -122,6 +122,21 @@ class DocumentNormalizer(AnnotatorModel):
122
122
  "file encoding to apply on normalized documents",
123
123
  typeConverter=TypeConverters.toString)
124
124
 
125
+ presetPattern = Param(
126
+ Params._dummy(),
127
+ "presetPattern",
128
+ "Selects a single text cleaning function from the functional presets (e.g., 'CLEAN_BULLETS', 'CLEAN_DASHES', etc.).",
129
+ typeConverter=TypeConverters.toString
130
+ )
131
+
132
+ autoMode = Param(
133
+ Params._dummy(),
134
+ "autoMode",
135
+ "Enables a predefined cleaning mode combining multiple text cleaner functions (e.g., 'light_clean', 'document_clean', 'html_clean', 'full_auto').",
136
+ typeConverter=TypeConverters.toString
137
+ )
138
+
139
+
125
140
  @keyword_only
126
141
  def __init__(self):
127
142
  super(DocumentNormalizer, self).__init__(classname="com.johnsnowlabs.nlp.annotators.DocumentNormalizer")
@@ -197,3 +212,24 @@ class DocumentNormalizer(AnnotatorModel):
197
212
  File encoding to apply on normalized documents, by default "UTF-8"
198
213
  """
199
214
  return self._set(encoding=value)
215
+
216
+ def setPresetPattern(self, value):
217
+ """Sets a single text cleaning preset pattern.
218
+
219
+ Parameters
220
+ ----------
221
+ value : str
222
+ Preset cleaning pattern name, e.g., 'CLEAN_BULLETS', 'CLEAN_DASHES'.
223
+ """
224
+ return self._set(presetPattern=value)
225
+
226
+
227
+ def setAutoMode(self, value):
228
+ """Sets an automatic text cleaning mode using predefined groups of cleaning functions.
229
+
230
+ Parameters
231
+ ----------
232
+ value : str
233
+ Auto cleaning mode, e.g., 'light_clean', 'document_clean', 'social_clean', 'html_clean', 'full_auto'.
234
+ """
235
+ return self._set(autoMode=value)
@@ -532,3 +532,8 @@ class AutoGGUFEmbeddings(AnnotatorModel, HasBatchedAnnotate):
532
532
  return ResourceDownloader.downloadModel(
533
533
  AutoGGUFEmbeddings, name, lang, remote_loc
534
534
  )
535
+
536
+ def close(self):
537
+ """Closes the llama.cpp model backend freeing resources. The model is reloaded when used again.
538
+ """
539
+ self._java_obj.close()
@@ -215,6 +215,20 @@ class EntityRulerModel(AnnotatorModel, HasStorageModel):
215
215
 
216
216
  outputAnnotatorType = AnnotatorType.CHUNK
217
217
 
218
+ autoMode = Param(
219
+ Params._dummy(),
220
+ "autoMode",
221
+ "Enable built-in regex presets that combine related entity patterns (e.g., 'communication_entities', 'network_entities', 'media_entities', etc.).",
222
+ typeConverter=TypeConverters.toString
223
+ )
224
+
225
+ extractEntities = Param(
226
+ Params._dummy(),
227
+ "extractEntities",
228
+ "List of entity types to extract. If not set, all entities in the active autoMode or from regexPatterns are used.",
229
+ typeConverter=TypeConverters.toListString
230
+ )
231
+
218
232
  def __init__(self, classname="com.johnsnowlabs.nlp.annotators.er.EntityRulerModel", java_model=None):
219
233
  super(EntityRulerModel, self).__init__(
220
234
  classname=classname,
@@ -230,3 +244,24 @@ class EntityRulerModel(AnnotatorModel, HasStorageModel):
230
244
  def loadStorage(path, spark, storage_ref):
231
245
  HasStorageModel.loadStorages(path, spark, storage_ref, EntityRulerModel.database)
232
246
 
247
+
248
+ def setAutoMode(self, value):
249
+ """Sets the auto mode for predefined regex entity groups.
250
+
251
+ Parameters
252
+ ----------
253
+ value : str
254
+ Name of the auto mode to activate (e.g., 'communication_entities', 'network_entities', etc.)
255
+ """
256
+ return self._set(autoMode=value)
257
+
258
+
259
+ def setExtractEntities(self, value):
260
+ """Sets specific entities to extract, filtering only those defined in regexPatterns or autoMode.
261
+
262
+ Parameters
263
+ ----------
264
+ value : list[str]
265
+ List of entity names to extract, e.g., ['EMAIL_ADDRESS_PATTERN', 'IPV4_PATTERN'].
266
+ """
267
+ return self._set(extractEntities=value)
@@ -12,12 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  """Contains classes for the AutoGGUFModel."""
15
- from typing import List, Dict
16
-
17
15
  from sparknlp.common import *
18
16
 
19
17
 
20
- class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties):
18
+ class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties, CompletionPostProcessing):
21
19
  """
22
20
  Annotator that uses the llama.cpp library to generate text completions with large language
23
21
  models.
@@ -243,7 +241,6 @@ class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties):
243
241
  inputAnnotatorTypes = [AnnotatorType.DOCUMENT]
244
242
  outputAnnotatorType = AnnotatorType.DOCUMENT
245
243
 
246
-
247
244
  @keyword_only
248
245
  def __init__(self, classname="com.johnsnowlabs.nlp.annotators.seq2seq.AutoGGUFModel", java_model=None):
249
246
  super(AutoGGUFModel, self).__init__(
@@ -300,3 +297,8 @@ class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties):
300
297
  """
301
298
  from sparknlp.pretrained import ResourceDownloader
302
299
  return ResourceDownloader.downloadModel(AutoGGUFModel, name, lang, remote_loc)
300
+
301
+ def close(self):
302
+ """Closes the llama.cpp model backend freeing resources. The model is reloaded when used again.
303
+ """
304
+ self._java_obj.close()
@@ -327,3 +327,8 @@ class AutoGGUFReranker(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties
327
327
  """
328
328
  from sparknlp.pretrained import ResourceDownloader
329
329
  return ResourceDownloader.downloadModel(AutoGGUFReranker, name, lang, remote_loc)
330
+
331
+ def close(self):
332
+ """Closes the llama.cpp model backend freeing resources. The model is reloaded when used again.
333
+ """
334
+ self._java_obj.close()
@@ -15,7 +15,7 @@
15
15
  from sparknlp.common import *
16
16
 
17
17
 
18
- class AutoGGUFVisionModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties):
18
+ class AutoGGUFVisionModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties, CompletionPostProcessing):
19
19
  """Multimodal annotator that uses the llama.cpp library to generate text completions with large
20
20
  language models. It supports ingesting images for captioning.
21
21
 
@@ -329,3 +329,8 @@ class AutoGGUFVisionModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppPropert
329
329
  """
330
330
  from sparknlp.pretrained import ResourceDownloader
331
331
  return ResourceDownloader.downloadModel(AutoGGUFVisionModel, name, lang, remote_loc)
332
+
333
+ def close(self):
334
+ """Closes the llama.cpp model backend freeing resources. The model is reloaded when used again.
335
+ """
336
+ self._java_obj.close()
@@ -23,3 +23,4 @@ from sparknlp.common.storage import *
23
23
  from sparknlp.common.utils import *
24
24
  from sparknlp.common.annotator_type import *
25
25
  from sparknlp.common.match_strategy import *
26
+ from sparknlp.common.completion_post_processing import *
@@ -0,0 +1,37 @@
1
+ # Copyright 2017-2025 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from pyspark.ml.param import Param, Params, TypeConverters
15
+
16
+
17
+ class CompletionPostProcessing:
18
+ removeThinkingTag = Param(
19
+ Params._dummy(),
20
+ "removeThinkingTag",
21
+ "Set a thinking tag (e.g. think) to be removed from output. Will match <TAG>...</TAG>",
22
+ typeConverter=TypeConverters.toString,
23
+ )
24
+
25
+ def setRemoveThinkingTag(self, value: str):
26
+ """Set a thinking tag (e.g. `think`) to be removed from output.
27
+ Will produce the regex: `(?s)<$TAG>.+?</$TAG>`
28
+ """
29
+ self._set(removeThinkingTag=value)
30
+ return self
31
+
32
+ def getRemoveThinkingTag(self):
33
+ """Get the thinking tag to be removed from output."""
34
+ value = None
35
+ if self.removeThinkingTag in self._paramMap:
36
+ value = self._paramMap[self.removeThinkingTag]
37
+ return value
File without changes
File without changes