spark-nlp 6.2.2__tar.gz → 6.2.2.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (296) hide show
  1. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/PKG-INFO +5 -5
  2. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/README.md +4 -4
  3. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/setup.py +1 -1
  4. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/spark_nlp.egg-info/PKG-INFO +5 -5
  5. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/__init__.py +11 -6
  6. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ner/ner_dl.py +0 -5
  7. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ner/ner_dl_graph_checker.py +15 -71
  8. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/partition/partition_properties.py +6 -146
  9. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/reader/reader2doc.py +1 -18
  10. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/reader/reader2table.py +1 -2
  11. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/com/__init__.py +0 -0
  12. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/com/johnsnowlabs/__init__.py +0 -0
  13. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/com/johnsnowlabs/ml/__init__.py +0 -0
  14. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
  15. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/com/johnsnowlabs/nlp/__init__.py +0 -0
  16. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/setup.cfg +0 -0
  17. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/spark_nlp.egg-info/SOURCES.txt +0 -0
  18. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/spark_nlp.egg-info/dependency_links.txt +0 -0
  19. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/spark_nlp.egg-info/top_level.txt +0 -0
  20. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotation.py +0 -0
  21. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotation_audio.py +0 -0
  22. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotation_image.py +0 -0
  23. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/__init__.py +0 -0
  24. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/audio/__init__.py +0 -0
  25. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  26. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  27. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  28. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/chunk2_doc.py +0 -0
  29. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/chunker.py +0 -0
  30. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
  31. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +0 -0
  32. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  33. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  34. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  35. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
  36. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  37. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +0 -0
  38. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  39. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  40. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  41. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  42. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  43. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  44. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  45. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
  46. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  47. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  48. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  49. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  50. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
  51. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  52. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  53. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  54. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  55. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +0 -0
  56. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  57. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  58. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  59. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  60. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  61. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
  62. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  63. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +0 -0
  64. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  65. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  66. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  67. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  68. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  69. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  70. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +0 -0
  71. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  72. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  73. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
  74. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  75. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  76. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  77. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cleaners/__init__.py +0 -0
  78. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cleaners/cleaner.py +0 -0
  79. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cleaners/extractor.py +0 -0
  80. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/coref/__init__.py +0 -0
  81. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  82. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/__init__.py +0 -0
  83. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
  84. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  85. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  86. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/florence2_transformer.py +0 -0
  87. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/gemma3_for_multimodal.py +0 -0
  88. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/internvl_for_multimodal.py +0 -0
  89. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/janus_for_multimodal.py +0 -0
  90. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/llava_for_multimodal.py +0 -0
  91. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/mllama_for_multimodal.py +0 -0
  92. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/paligemma_for_multimodal.py +0 -0
  93. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/phi3_vision_for_multimodal.py +0 -0
  94. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/qwen2vl_transformer.py +0 -0
  95. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/smolvlm_transformer.py +0 -0
  96. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  97. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  98. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  99. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/dataframe_optimizer.py +0 -0
  100. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/date2_chunk.py +0 -0
  101. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/dependency/__init__.py +0 -0
  102. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  103. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  104. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  105. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/document_normalizer.py +0 -0
  106. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/document_token_splitter.py +0 -0
  107. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  108. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/__init__.py +0 -0
  109. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  110. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +0 -0
  111. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  112. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  113. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
  114. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  115. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  116. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  117. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  118. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  119. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
  120. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/e5v_embeddings.py +0 -0
  121. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  122. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  123. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  124. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/minilm_embeddings.py +0 -0
  125. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
  126. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
  127. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
  128. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  129. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  130. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  131. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
  132. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  133. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  134. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  135. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  136. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  137. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  138. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  139. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/er/__init__.py +0 -0
  140. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/er/entity_ruler.py +0 -0
  141. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/graph_extraction.py +0 -0
  142. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  143. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  144. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  145. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  146. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/lemmatizer.py +0 -0
  147. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/matcher/__init__.py +0 -0
  148. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  149. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  150. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  151. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  152. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  153. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/n_gram_generator.py +0 -0
  154. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ner/__init__.py +0 -0
  155. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ner/ner_approach.py +0 -0
  156. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ner/ner_converter.py +0 -0
  157. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ner/ner_crf.py +0 -0
  158. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  159. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  160. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/normalizer.py +0 -0
  161. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/openai/__init__.py +0 -0
  162. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/openai/openai_completion.py +0 -0
  163. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
  164. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/param/__init__.py +0 -0
  165. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  166. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  167. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/pos/__init__.py +0 -0
  168. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/pos/perceptron.py +0 -0
  169. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/sentence/__init__.py +0 -0
  170. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  171. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  172. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/sentiment/__init__.py +0 -0
  173. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  174. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  175. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/__init__.py +0 -0
  176. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/auto_gguf_model.py +0 -0
  177. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/auto_gguf_reranker.py +0 -0
  178. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +0 -0
  179. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  180. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/cohere_transformer.py +0 -0
  181. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
  182. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  183. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
  184. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/llama3_transformer.py +0 -0
  185. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
  186. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  187. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
  188. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
  189. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/olmo_transformer.py +0 -0
  190. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
  191. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
  192. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/phi4_transformer.py +0 -0
  193. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
  194. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
  195. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  196. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/similarity/__init__.py +0 -0
  197. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  198. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/spell_check/__init__.py +0 -0
  199. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  200. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  201. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  202. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/stemmer.py +0 -0
  203. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  204. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  205. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/token/__init__.py +0 -0
  206. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  207. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  208. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  209. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/token/tokenizer.py +0 -0
  210. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/token2_chunk.py +0 -0
  211. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ws/__init__.py +0 -0
  212. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  213. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/__init__.py +0 -0
  214. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/audio_assembler.py +0 -0
  215. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/doc2_chunk.py +0 -0
  216. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/document_assembler.py +0 -0
  217. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/embeddings_finisher.py +0 -0
  218. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/finisher.py +0 -0
  219. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/gguf_ranking_finisher.py +0 -0
  220. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/graph_finisher.py +0 -0
  221. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/has_recursive_fit.py +0 -0
  222. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/has_recursive_transform.py +0 -0
  223. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/image_assembler.py +0 -0
  224. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/light_pipeline.py +0 -0
  225. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/multi_document_assembler.py +0 -0
  226. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/prompt_assembler.py +0 -0
  227. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/recursive_pipeline.py +0 -0
  228. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/table_assembler.py +0 -0
  229. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/base/token_assembler.py +0 -0
  230. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/__init__.py +0 -0
  231. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/annotator_approach.py +0 -0
  232. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/annotator_model.py +0 -0
  233. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/annotator_properties.py +0 -0
  234. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/annotator_type.py +0 -0
  235. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/completion_post_processing.py +0 -0
  236. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/coverage_result.py +0 -0
  237. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/match_strategy.py +0 -0
  238. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/properties.py +0 -0
  239. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/read_as.py +0 -0
  240. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/recursive_annotator_approach.py +0 -0
  241. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/storage.py +0 -0
  242. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/common/utils.py +0 -0
  243. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/functions.py +0 -0
  244. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/internal/__init__.py +0 -0
  245. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/internal/annotator_java_ml.py +0 -0
  246. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/internal/annotator_transformer.py +0 -0
  247. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/internal/extended_java_wrapper.py +0 -0
  248. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/internal/params_getters_setters.py +0 -0
  249. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/internal/recursive.py +0 -0
  250. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/logging/__init__.py +0 -0
  251. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/logging/comet.py +0 -0
  252. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/partition/__init__.py +0 -0
  253. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/partition/partition.py +0 -0
  254. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/partition/partition_transformer.py +0 -0
  255. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/pretrained/__init__.py +0 -0
  256. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  257. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/pretrained/resource_downloader.py +0 -0
  258. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/pretrained/utils.py +0 -0
  259. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/reader/__init__.py +0 -0
  260. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/reader/enums.py +0 -0
  261. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/reader/pdf_to_text.py +0 -0
  262. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/reader/reader2image.py +0 -0
  263. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/reader/reader_assembler.py +0 -0
  264. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/reader/sparknlp_reader.py +0 -0
  265. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/__init__.py +0 -0
  266. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  267. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  268. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  269. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  270. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  271. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  272. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  273. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  274. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  275. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  276. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  277. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  278. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  279. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  280. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  281. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  282. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  283. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  284. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  285. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  286. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  287. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  288. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  289. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/conll.py +0 -0
  290. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/conllu.py +0 -0
  291. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/pos.py +0 -0
  292. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/pub_tator.py +0 -0
  293. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/spacy_to_annotation.py +0 -0
  294. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/training/tfgraphs.py +0 -0
  295. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/upload_to_hub.py +0 -0
  296. {spark_nlp-6.2.2 → spark_nlp-6.2.2.dev2}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-nlp
3
- Version: 6.2.2
3
+ Version: 6.2.2.dev2
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -102,7 +102,7 @@ $ java -version
102
102
  $ conda create -n sparknlp python=3.7 -y
103
103
  $ conda activate sparknlp
104
104
  # spark-nlp by default is based on pyspark 3.x
105
- $ pip install spark-nlp==6.2.2 pyspark==3.3.1
105
+ $ pip install spark-nlp==6.2.0 pyspark==3.3.1
106
106
  ```
107
107
 
108
108
  In Python console or Jupyter `Python3` kernel:
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
168
168
 
169
169
  ### Apache Spark Support
170
170
 
171
- Spark NLP *6.2.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
171
+ Spark NLP *6.2.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
172
172
 
173
173
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
174
174
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
198
198
 
199
199
  ### Databricks Support
200
200
 
201
- Spark NLP 6.2.2 has been tested and is compatible with the following runtimes:
201
+ Spark NLP 6.2.0 has been tested and is compatible with the following runtimes:
202
202
 
203
203
  | **CPU** | **GPU** |
204
204
  |--------------------|--------------------|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
216
216
 
217
217
  ### EMR Support
218
218
 
219
- Spark NLP 6.2.2 has been tested and is compatible with the following EMR releases:
219
+ Spark NLP 6.2.0 has been tested and is compatible with the following EMR releases:
220
220
 
221
221
  | **EMR Release** |
222
222
  |--------------------|
@@ -63,7 +63,7 @@ $ java -version
63
63
  $ conda create -n sparknlp python=3.7 -y
64
64
  $ conda activate sparknlp
65
65
  # spark-nlp by default is based on pyspark 3.x
66
- $ pip install spark-nlp==6.2.2 pyspark==3.3.1
66
+ $ pip install spark-nlp==6.2.0 pyspark==3.3.1
67
67
  ```
68
68
 
69
69
  In Python console or Jupyter `Python3` kernel:
@@ -129,7 +129,7 @@ For a quick example of using pipelines and models take a look at our official [d
129
129
 
130
130
  ### Apache Spark Support
131
131
 
132
- Spark NLP *6.2.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
132
+ Spark NLP *6.2.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
133
133
 
134
134
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
135
135
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -159,7 +159,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
159
159
 
160
160
  ### Databricks Support
161
161
 
162
- Spark NLP 6.2.2 has been tested and is compatible with the following runtimes:
162
+ Spark NLP 6.2.0 has been tested and is compatible with the following runtimes:
163
163
 
164
164
  | **CPU** | **GPU** |
165
165
  |--------------------|--------------------|
@@ -177,7 +177,7 @@ We are compatible with older runtimes. For a full list check databricks support
177
177
 
178
178
  ### EMR Support
179
179
 
180
- Spark NLP 6.2.2 has been tested and is compatible with the following EMR releases:
180
+ Spark NLP 6.2.0 has been tested and is compatible with the following EMR releases:
181
181
 
182
182
  | **EMR Release** |
183
183
  |--------------------|
@@ -41,7 +41,7 @@ setup(
41
41
  # project code, see
42
42
  # https://packaging.python.org/en/latest/single_source_version.html
43
43
 
44
- version='6.2.2', # Required
44
+ version='6.2.2dev2', # Required
45
45
 
46
46
  # This is a one-line description or tagline of what your project does. This
47
47
  # corresponds to the 'Summary' metadata field:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-nlp
3
- Version: 6.2.2
3
+ Version: 6.2.2.dev2
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -102,7 +102,7 @@ $ java -version
102
102
  $ conda create -n sparknlp python=3.7 -y
103
103
  $ conda activate sparknlp
104
104
  # spark-nlp by default is based on pyspark 3.x
105
- $ pip install spark-nlp==6.2.2 pyspark==3.3.1
105
+ $ pip install spark-nlp==6.2.0 pyspark==3.3.1
106
106
  ```
107
107
 
108
108
  In Python console or Jupyter `Python3` kernel:
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
168
168
 
169
169
  ### Apache Spark Support
170
170
 
171
- Spark NLP *6.2.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
171
+ Spark NLP *6.2.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
172
172
 
173
173
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
174
174
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
198
198
 
199
199
  ### Databricks Support
200
200
 
201
- Spark NLP 6.2.2 has been tested and is compatible with the following runtimes:
201
+ Spark NLP 6.2.0 has been tested and is compatible with the following runtimes:
202
202
 
203
203
  | **CPU** | **GPU** |
204
204
  |--------------------|--------------------|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
216
216
 
217
217
  ### EMR Support
218
218
 
219
- Spark NLP 6.2.2 has been tested and is compatible with the following EMR releases:
219
+ Spark NLP 6.2.0 has been tested and is compatible with the following EMR releases:
220
220
 
221
221
  | **EMR Release** |
222
222
  |--------------------|
@@ -66,7 +66,7 @@ sys.modules['com.johnsnowlabs.ml.ai'] = annotator
66
66
  annotators = annotator
67
67
  embeddings = annotator
68
68
 
69
- __version__ = "6.2.2"
69
+ __version__ = "6.2.2-dev2"
70
70
 
71
71
 
72
72
  def start(gpu=False,
@@ -78,7 +78,8 @@ def start(gpu=False,
78
78
  cluster_tmp_dir="",
79
79
  params=None,
80
80
  real_time_output=False,
81
- output_level=1):
81
+ output_level=1,
82
+ scala213=False):
82
83
  """Starts a PySpark instance with default parameters for Spark NLP.
83
84
 
84
85
  The default parameters would result in the equivalent of:
@@ -122,6 +123,8 @@ def start(gpu=False,
122
123
  Whether to read and print JVM output in real time, by default False
123
124
  output_level : int, optional
124
125
  Output level for logs, by default 1
126
+ scala213 : bool, optional
127
+ Whether to use Scala 2.13 build of Spark NLP, by default False (Scala 2.12)
125
128
 
126
129
  Notes
127
130
  -----
@@ -159,12 +162,13 @@ def start(gpu=False,
159
162
  self.serializer, self.serializer_max_buffer = "org.apache.spark.serializer.KryoSerializer", "2000M"
160
163
  self.driver_max_result_size = "0"
161
164
  # Spark NLP on CPU or GPU
162
- self.maven_spark3 = "com.johnsnowlabs.nlp:spark-nlp_2.12:{}".format(current_version)
163
- self.maven_gpu_spark3 = "com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:{}".format(current_version)
165
+ scala_version = "2.13" if scala213 else "2.12"
166
+ self.maven_spark3 = f"com.johnsnowlabs.nlp:spark-nlp_{scala_version}:{current_version}"
167
+ self.maven_gpu_spark3 = f"com.johnsnowlabs.nlp:spark-nlp-gpu_{scala_version}:{current_version}"
164
168
  # Spark NLP on Apple Silicon
165
- self.maven_silicon = "com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:{}".format(current_version)
169
+ self.maven_silicon = f"com.johnsnowlabs.nlp:spark-nlp-silicon_{scala_version}:{current_version}"
166
170
  # Spark NLP on Linux Aarch64
167
- self.maven_aarch64 = "com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:{}".format(current_version)
171
+ self.maven_aarch64 = f"com.johnsnowlabs.nlp:spark-nlp-aarch64_{scala_version}:{current_version}"
168
172
 
169
173
  def start_without_realtime_output():
170
174
  builder = SparkSession.builder \
@@ -318,4 +322,5 @@ def version():
318
322
  str
319
323
  The current Spark NLP version.
320
324
  """
325
+
321
326
  return __version__
@@ -41,11 +41,6 @@ class NerDLApproach(AnnotatorApproach, NerApproach, EvaluationDLParams):
41
41
  - a WordEmbeddingsModel (any embeddings can be chosen, e.g. BertEmbeddings
42
42
  for BERT based embeddings).
43
43
 
44
- By default, collects all data points into memory for training. For larger datasets, use
45
- ``setEnableMemoryOptimizer(true)``. This will optimize memory usage during training at the cost
46
- of speed. Note that this annotator will use as much memory as the largest partition of the
47
- input dataset, so we recommend repartitioning to batch sizes.
48
-
49
44
  Setting a test dataset to monitor model metrics can be done with
50
45
  ``.setTestDataset``. The method expects a path to a parquet file containing a
51
46
  dataframe that has the same required columns as the training dataframe. The
@@ -13,10 +13,10 @@
13
13
  # limitations under the License.
14
14
  """Contains classes for NerDL."""
15
15
 
16
- from pyspark.ml.util import JavaMLReadable
17
-
18
- import sparknlp.internal as _internal
19
16
  from sparknlp.common import *
17
+ import sparknlp.internal as _internal
18
+ from pyspark.ml.util import JavaMLWritable
19
+ from pyspark.ml.wrapper import JavaEstimator
20
20
 
21
21
 
22
22
  class NerDLGraphChecker(
@@ -28,9 +28,6 @@ class NerDLGraphChecker(
28
28
  computations/training is done. This annotator is useful for custom training cases, where
29
29
  specialized graphs are needed.
30
30
 
31
- This annotator will fill graph hyperparameters as metadata in the label column, which will be
32
- available for NerDLApproach, saving computations.
33
-
34
31
  Important: This annotator should be used or positioned before any embedding or NerDLApproach
35
32
  annotators in the pipeline and will process the whole dataset to extract the required graph parameters.
36
33
 
@@ -205,18 +202,17 @@ class NerDLGraphChecker(
205
202
  # self._setDefault()
206
203
 
207
204
  def _create_model(self, java_model):
208
- return NerDLGraphCheckerModel(java_model=java_model)
205
+ return NerDLGraphCheckerModel()
209
206
 
210
207
 
211
208
  class NerDLGraphCheckerModel(
212
209
  JavaModel,
213
210
  JavaMLWritable,
214
- JavaMLReadable,
215
211
  _internal.ParamsGettersSetters,
216
212
  ):
217
- """Resulting model from `NerDLGraphChecker`, that updates dataframe metadata (label column)
218
- with NerDLGraph parameters. It does not perform any actual data transformations, as the
219
- checks/computations are done during the `fit` phase.
213
+ """
214
+ Resulting model from NerDLGraphChecker, that does not perform any transformations, as the
215
+ checks are done during the ``fit`` phase. It acts as the identity.
220
216
 
221
217
  This annotator should never be used directly.
222
218
  """
@@ -228,66 +224,14 @@ class NerDLGraphCheckerModel(
228
224
 
229
225
  @keyword_only
230
226
  def __init__(
231
- self,
232
- classname="com.johnsnowlabs.nlp.annotators.ner.dl.NerDLGraphCheckerModel",
233
- java_model=None,
227
+ self,
228
+ classname="com.johnsnowlabs.nlp.annotators.ner.dl.NerDLGraphCheckerModel",
229
+ java_model=None,
234
230
  ):
235
- # Custom init, different from AnnotatorModel
236
- # We don't have a output annotation column, so we inherit directly from JavaModel
231
+ super(NerDLGraphCheckerModel, self).__init__(java_model=java_model)
232
+ if classname and not java_model:
233
+ self.__class__._java_class_name = classname
234
+ self._java_obj = self._new_java_obj(classname, self.uid)
237
235
  if java_model is not None:
238
- super(NerDLGraphCheckerModel, self).__init__(java_model=java_model)
239
- self._java_obj = java_model
240
236
  self._transfer_params_from_java()
241
- elif classname:
242
- super(NerDLGraphCheckerModel, self).__init__()
243
- self.__class__._java_class_name = classname
244
- self._java_obj = self._new_java_obj(classname)
245
-
246
- # Metadata keys for graph parameters
247
- graphParamsMetadataKey = "NerDLGraphCheckerParams"
248
- embeddingsDimKey = "embeddingsDim"
249
- labelsKey = "labels"
250
- charsKey = "chars"
251
- dsLenKey = "dsLen"
252
-
253
- labelColumn = Param(
254
- Params._dummy(),
255
- "labelColumn",
256
- "Column with label per each token",
257
- typeConverter=TypeConverters.toString,
258
- )
259
-
260
- embeddingsDim = Param(
261
- Params._dummy(),
262
- "embeddingsDim",
263
- "Dimensionality of embeddings",
264
- typeConverter=TypeConverters.toInt,
265
- )
266
-
267
- labels = Param(
268
- Params._dummy(),
269
- "labels",
270
- "Labels in the dataset",
271
- typeConverter=TypeConverters.toListString,
272
- )
273
-
274
- chars = Param(
275
- Params._dummy(),
276
- "chars",
277
- "Set of characters in the dataset",
278
- typeConverter=TypeConverters.toListString,
279
- )
280
-
281
- graphFolder = Param(
282
- Params._dummy(),
283
- "graphFolder",
284
- "Folder path that contain external graph files",
285
- typeConverter=TypeConverters.toString,
286
- )
287
-
288
- dsLen = Param(
289
- Params._dummy(),
290
- "dsLen",
291
- "Length of the training dataset.",
292
- typeConverter=TypeConverters.toInt,
293
- )
237
+ # self._setDefault(lazyAnnotator=False)
@@ -17,6 +17,7 @@ from pyspark.ml.param import Param, Params, TypeConverters
17
17
 
18
18
 
19
19
  class HasReaderProperties(Params):
20
+
20
21
  inputCol = Param(
21
22
  Params._dummy(),
22
23
  "inputCol",
@@ -244,8 +245,8 @@ class HasReaderProperties(Params):
244
245
  """
245
246
  return self._set(outputAsDocument=value)
246
247
 
247
-
248
248
  class HasEmailReaderProperties(Params):
249
+
249
250
  addAttachmentContent = Param(
250
251
  Params._dummy(),
251
252
  "addAttachmentContent",
@@ -277,6 +278,7 @@ class HasEmailReaderProperties(Params):
277
278
 
278
279
 
279
280
  class HasExcelReaderProperties(Params):
281
+
280
282
  cellSeparator = Param(
281
283
  Params._dummy(),
282
284
  "cellSeparator",
@@ -335,8 +337,8 @@ class HasExcelReaderProperties(Params):
335
337
  """
336
338
  return self.getOrDefault(self.appendCells)
337
339
 
338
-
339
340
  class HasHTMLReaderProperties(Params):
341
+
340
342
  timeout = Param(
341
343
  Params._dummy(),
342
344
  "timeout",
@@ -393,8 +395,8 @@ class HasHTMLReaderProperties(Params):
393
395
  """
394
396
  return self._set(outputFormat=value)
395
397
 
396
-
397
398
  class HasPowerPointProperties(Params):
399
+
398
400
  includeSlideNotes = Param(
399
401
  Params._dummy(),
400
402
  "includeSlideNotes",
@@ -424,8 +426,8 @@ class HasPowerPointProperties(Params):
424
426
  """
425
427
  return self.getOrDefault(self.includeSlideNotes)
426
428
 
427
-
428
429
  class HasTextReaderProperties(Params):
430
+
429
431
  titleLengthSize = Param(
430
432
  Params._dummy(),
431
433
  "titleLengthSize",
@@ -434,28 +436,9 @@ class HasTextReaderProperties(Params):
434
436
  )
435
437
 
436
438
  def setTitleLengthSize(self, value):
437
- """Set the maximum character length used to identify title blocks.
438
-
439
- Parameters
440
- ----------
441
- value : int
442
- Maximum number of characters a text block can have to be considered a title.
443
-
444
- Returns
445
- -------
446
- self
447
- The instance with updated `titleLengthSize` parameter.
448
- """
449
439
  return self._set(titleLengthSize=value)
450
440
 
451
441
  def getTitleLengthSize(self):
452
- """Get the configured maximum title length.
453
-
454
- Returns
455
- -------
456
- int
457
- The maximum character length used to detect title blocks.
458
- """
459
442
  return self.getOrDefault(self.titleLengthSize)
460
443
 
461
444
  groupBrokenParagraphs = Param(
@@ -466,28 +449,9 @@ class HasTextReaderProperties(Params):
466
449
  )
467
450
 
468
451
  def setGroupBrokenParagraphs(self, value):
469
- """Enable or disable grouping of broken paragraphs.
470
-
471
- Parameters
472
- ----------
473
- value : bool
474
- True to merge fragmented lines into paragraphs, False to leave lines as-is.
475
-
476
- Returns
477
- -------
478
- self
479
- The instance with updated `groupBrokenParagraphs` parameter.
480
- """
481
452
  return self._set(groupBrokenParagraphs=value)
482
453
 
483
454
  def getGroupBrokenParagraphs(self):
484
- """Get whether broken paragraph grouping is enabled.
485
-
486
- Returns
487
- -------
488
- bool
489
- True if grouping of broken paragraphs is enabled, False otherwise.
490
- """
491
455
  return self.getOrDefault(self.groupBrokenParagraphs)
492
456
 
493
457
  paragraphSplit = Param(
@@ -498,28 +462,9 @@ class HasTextReaderProperties(Params):
498
462
  )
499
463
 
500
464
  def setParagraphSplit(self, value):
501
- """Set the regex pattern used to split paragraphs when grouping broken paragraphs.
502
-
503
- Parameters
504
- ----------
505
- value : str
506
- Regular expression string used to detect paragraph boundaries.
507
-
508
- Returns
509
- -------
510
- self
511
- The instance with updated `paragraphSplit` parameter.
512
- """
513
465
  return self._set(paragraphSplit=value)
514
466
 
515
467
  def getParagraphSplit(self):
516
- """Get the paragraph-splitting regex pattern.
517
-
518
- Returns
519
- -------
520
- str
521
- The regex pattern used to detect paragraph boundaries.
522
- """
523
468
  return self.getOrDefault(self.paragraphSplit)
524
469
 
525
470
  shortLineWordThreshold = Param(
@@ -530,28 +475,9 @@ class HasTextReaderProperties(Params):
530
475
  )
531
476
 
532
477
  def setShortLineWordThreshold(self, value):
533
- """Set the maximum word count for a line to be considered short.
534
-
535
- Parameters
536
- ----------
537
- value : int
538
- Number of words under which a line is considered 'short'.
539
-
540
- Returns
541
- -------
542
- self
543
- The instance with updated `shortLineWordThreshold` parameter.
544
- """
545
478
  return self._set(shortLineWordThreshold=value)
546
479
 
547
480
  def getShortLineWordThreshold(self):
548
- """Get the short line word threshold.
549
-
550
- Returns
551
- -------
552
- int
553
- Word count threshold for short lines used in paragraph grouping.
554
- """
555
481
  return self.getOrDefault(self.shortLineWordThreshold)
556
482
 
557
483
  maxLineCount = Param(
@@ -562,28 +488,9 @@ class HasTextReaderProperties(Params):
562
488
  )
563
489
 
564
490
  def setMaxLineCount(self, value):
565
- """Set the maximum number of lines to inspect when estimating paragraph layout.
566
-
567
- Parameters
568
- ----------
569
- value : int
570
- Maximum number of lines to evaluate for layout heuristics.
571
-
572
- Returns
573
- -------
574
- self
575
- The instance with updated `maxLineCount` parameter.
576
- """
577
491
  return self._set(maxLineCount=value)
578
492
 
579
493
  def getMaxLineCount(self):
580
- """Get the maximum number of lines used for layout heuristics.
581
-
582
- Returns
583
- -------
584
- int
585
- The configured maximum number of lines to consider.
586
- """
587
494
  return self.getOrDefault(self.maxLineCount)
588
495
 
589
496
  threshold = Param(
@@ -594,58 +501,11 @@ class HasTextReaderProperties(Params):
594
501
  )
595
502
 
596
503
  def setThreshold(self, value):
597
- """Set the empty-line ratio threshold for paragraph grouping decision.
598
-
599
- Parameters
600
- ----------
601
- value : float
602
- Ratio (0.0-1.0) of empty lines used to switch grouping strategies.
603
-
604
- Returns
605
- -------
606
- self
607
- The instance with updated `threshold` parameter.
608
- """
609
504
  return self._set(threshold=value)
610
505
 
611
506
  def getThreshold(self):
612
- """Get the configured empty-line threshold ratio.
613
-
614
- Returns
615
- -------
616
- float
617
- The ratio used to decide paragraph grouping strategy.
618
- """
619
507
  return self.getOrDefault(self.threshold)
620
508
 
621
- extractTagAttributes = Param(
622
- Params._dummy(),
623
- "extractTagAttributes",
624
- "Extract attribute values into separate lines when parsing tag-based formats (e.g., HTML or XML).",
625
- typeConverter=TypeConverters.toListString
626
- )
627
-
628
- def setExtractTagAttributes(self, attributes: list[str]):
629
- """
630
- Specify which tag attributes should have their values extracted as text when parsing
631
- tag-based formats (e.g., HTML or XML).
632
-
633
- :param attributes: list of attribute names to extract
634
- :return: this instance with the updated `extractTagAttributes` parameter
635
- """
636
- return self._set(extractTagAttributes=attributes)
637
-
638
- def getExtractTagAttributes(self):
639
- """Get the list of tag attribute names configured to be extracted.
640
-
641
- Returns
642
- -------
643
- list[str]
644
- The attribute names whose values will be extracted as text.
645
- """
646
- return self.getOrDefault(self.extractTagAttributes)
647
-
648
-
649
509
  class HasChunkerProperties(Params):
650
510
 
651
511
  chunkingStrategy = Param(
@@ -91,19 +91,6 @@ class Reader2Doc(
91
91
  """
92
92
  return self._set(excludeNonText=value)
93
93
 
94
- joinString = Param(
95
- Params._dummy(),
96
- "joinString",
97
- "If outputAsDocument is true, specifies the string used to join elements into a single document.",
98
- typeConverter=TypeConverters.toString
99
- )
100
-
101
- def setJoinString(self, value):
102
- """
103
- If outputAsDocument is true, specifies the string used to join elements into a single
104
- """
105
- return self._set(joinString=value)
106
-
107
94
  @keyword_only
108
95
  def __init__(self):
109
96
  super(Reader2Doc, self).__init__(classname="com.johnsnowlabs.reader.Reader2Doc")
@@ -112,12 +99,8 @@ class Reader2Doc(
112
99
  explodeDocs=False,
113
100
  contentType="",
114
101
  flattenOutput=False,
115
- outputAsDocument=True,
116
- outputFormat="plain-text",
117
- excludeNonText=False,
118
- joinString="\n"
102
+ titleThreshold=18
119
103
  )
120
-
121
104
  @keyword_only
122
105
  def setParams(self):
123
106
  kwargs = self._input_kwargs
@@ -35,8 +35,7 @@ class Reader2Table(
35
35
  @keyword_only
36
36
  def __init__(self):
37
37
  super(Reader2Table, self).__init__(classname="com.johnsnowlabs.reader.Reader2Table")
38
- self._setDefault(outputCol="document", outputFormat="json-table", inferTableStructure=True,
39
- outputAsDocument=False)
38
+ self._setDefault(outputCol="document")
40
39
 
41
40
  @keyword_only
42
41
  def setParams(self):
File without changes