spark-nlp 6.1.4__tar.gz → 6.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (295) hide show
  1. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/PKG-INFO +5 -5
  2. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/README.md +4 -4
  3. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/setup.py +1 -1
  4. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/spark_nlp.egg-info/PKG-INFO +5 -5
  5. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/spark_nlp.egg-info/SOURCES.txt +1 -0
  6. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/__init__.py +1 -1
  7. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/partition/partition_properties.py +77 -10
  8. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/reader2doc.py +12 -65
  9. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/reader2table.py +0 -34
  10. spark_nlp-6.1.5/sparknlp/reader/reader_assembler.py +159 -0
  11. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/__init__.py +0 -0
  12. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/johnsnowlabs/__init__.py +0 -0
  13. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/johnsnowlabs/ml/__init__.py +0 -0
  14. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
  15. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/johnsnowlabs/nlp/__init__.py +0 -0
  16. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/setup.cfg +0 -0
  17. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/spark_nlp.egg-info/dependency_links.txt +0 -0
  18. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/spark_nlp.egg-info/top_level.txt +0 -0
  19. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotation.py +0 -0
  20. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotation_audio.py +0 -0
  21. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotation_image.py +0 -0
  22. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/__init__.py +0 -0
  23. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/audio/__init__.py +0 -0
  24. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  25. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  26. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  27. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/chunk2_doc.py +0 -0
  28. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/chunker.py +0 -0
  29. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
  30. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +0 -0
  31. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  32. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  33. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  34. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
  35. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  36. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +0 -0
  37. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  38. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  39. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  40. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  41. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  42. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  43. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  44. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
  45. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  46. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  47. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  48. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  49. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
  50. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  51. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  52. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  53. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  54. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +0 -0
  55. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  56. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  57. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  58. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  59. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  60. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
  61. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  62. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +0 -0
  63. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  64. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  65. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  66. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  67. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  68. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  69. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +0 -0
  70. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  71. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  72. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
  73. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  74. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  75. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  76. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cleaners/__init__.py +0 -0
  77. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cleaners/cleaner.py +0 -0
  78. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cleaners/extractor.py +0 -0
  79. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/coref/__init__.py +0 -0
  80. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  81. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/__init__.py +0 -0
  82. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
  83. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  84. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  85. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/florence2_transformer.py +0 -0
  86. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/gemma3_for_multimodal.py +0 -0
  87. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/internvl_for_multimodal.py +0 -0
  88. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/janus_for_multimodal.py +0 -0
  89. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/llava_for_multimodal.py +0 -0
  90. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/mllama_for_multimodal.py +0 -0
  91. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/paligemma_for_multimodal.py +0 -0
  92. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/phi3_vision_for_multimodal.py +0 -0
  93. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/qwen2vl_transformer.py +0 -0
  94. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/smolvlm_transformer.py +0 -0
  95. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  96. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  97. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  98. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/dataframe_optimizer.py +0 -0
  99. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/date2_chunk.py +0 -0
  100. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/dependency/__init__.py +0 -0
  101. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  102. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  103. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  104. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/document_normalizer.py +0 -0
  105. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/document_token_splitter.py +0 -0
  106. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  107. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/__init__.py +0 -0
  108. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  109. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +0 -0
  110. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  111. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  112. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
  113. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  114. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  115. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  116. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  117. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  118. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
  119. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/e5v_embeddings.py +0 -0
  120. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  121. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  122. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  123. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/minilm_embeddings.py +0 -0
  124. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
  125. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
  126. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
  127. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  128. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  129. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  130. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
  131. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  132. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  133. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  134. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  135. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  136. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  137. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  138. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/er/__init__.py +0 -0
  139. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/er/entity_ruler.py +0 -0
  140. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/graph_extraction.py +0 -0
  141. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  142. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  143. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  144. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  145. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/lemmatizer.py +0 -0
  146. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/__init__.py +0 -0
  147. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  148. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  149. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  150. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  151. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  152. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/n_gram_generator.py +0 -0
  153. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/__init__.py +0 -0
  154. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_approach.py +0 -0
  155. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_converter.py +0 -0
  156. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_crf.py +0 -0
  157. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_dl.py +0 -0
  158. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_dl_graph_checker.py +0 -0
  159. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  160. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  161. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/normalizer.py +0 -0
  162. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/openai/__init__.py +0 -0
  163. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/openai/openai_completion.py +0 -0
  164. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
  165. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/param/__init__.py +0 -0
  166. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  167. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  168. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/pos/__init__.py +0 -0
  169. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/pos/perceptron.py +0 -0
  170. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentence/__init__.py +0 -0
  171. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  172. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  173. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentiment/__init__.py +0 -0
  174. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  175. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  176. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/__init__.py +0 -0
  177. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/auto_gguf_model.py +0 -0
  178. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/auto_gguf_reranker.py +0 -0
  179. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +0 -0
  180. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  181. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/cohere_transformer.py +0 -0
  182. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
  183. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  184. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
  185. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/llama3_transformer.py +0 -0
  186. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
  187. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  188. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
  189. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
  190. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/olmo_transformer.py +0 -0
  191. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
  192. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
  193. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/phi4_transformer.py +0 -0
  194. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
  195. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
  196. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  197. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/similarity/__init__.py +0 -0
  198. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  199. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/spell_check/__init__.py +0 -0
  200. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  201. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  202. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  203. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/stemmer.py +0 -0
  204. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  205. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  206. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/__init__.py +0 -0
  207. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  208. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  209. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  210. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/tokenizer.py +0 -0
  211. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token2_chunk.py +0 -0
  212. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ws/__init__.py +0 -0
  213. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  214. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/__init__.py +0 -0
  215. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/audio_assembler.py +0 -0
  216. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/doc2_chunk.py +0 -0
  217. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/document_assembler.py +0 -0
  218. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/embeddings_finisher.py +0 -0
  219. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/finisher.py +0 -0
  220. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/gguf_ranking_finisher.py +0 -0
  221. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/graph_finisher.py +0 -0
  222. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/has_recursive_fit.py +0 -0
  223. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/has_recursive_transform.py +0 -0
  224. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/image_assembler.py +0 -0
  225. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/light_pipeline.py +0 -0
  226. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/multi_document_assembler.py +0 -0
  227. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/prompt_assembler.py +0 -0
  228. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/recursive_pipeline.py +0 -0
  229. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/table_assembler.py +0 -0
  230. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/token_assembler.py +0 -0
  231. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/__init__.py +0 -0
  232. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/annotator_approach.py +0 -0
  233. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/annotator_model.py +0 -0
  234. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/annotator_properties.py +0 -0
  235. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/annotator_type.py +0 -0
  236. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/coverage_result.py +0 -0
  237. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/match_strategy.py +0 -0
  238. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/properties.py +0 -0
  239. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/read_as.py +0 -0
  240. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/recursive_annotator_approach.py +0 -0
  241. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/storage.py +0 -0
  242. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/utils.py +0 -0
  243. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/functions.py +0 -0
  244. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/__init__.py +0 -0
  245. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/annotator_java_ml.py +0 -0
  246. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/annotator_transformer.py +0 -0
  247. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/extended_java_wrapper.py +0 -0
  248. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/params_getters_setters.py +0 -0
  249. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/recursive.py +0 -0
  250. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/logging/__init__.py +0 -0
  251. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/logging/comet.py +0 -0
  252. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/partition/__init__.py +0 -0
  253. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/partition/partition.py +0 -0
  254. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/partition/partition_transformer.py +0 -0
  255. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/pretrained/__init__.py +0 -0
  256. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  257. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/pretrained/resource_downloader.py +0 -0
  258. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/pretrained/utils.py +0 -0
  259. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/__init__.py +0 -0
  260. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/enums.py +0 -0
  261. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/pdf_to_text.py +0 -0
  262. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/reader2image.py +0 -0
  263. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/sparknlp_reader.py +0 -0
  264. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/__init__.py +0 -0
  265. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  266. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  267. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  268. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  269. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  270. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  271. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  272. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  273. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  274. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  275. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  276. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  277. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  278. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  279. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  280. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  281. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  282. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  283. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  284. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  285. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  286. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  287. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  288. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/conll.py +0 -0
  289. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/conllu.py +0 -0
  290. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/pos.py +0 -0
  291. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/pub_tator.py +0 -0
  292. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/spacy_to_annotation.py +0 -0
  293. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/tfgraphs.py +0 -0
  294. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/upload_to_hub.py +0 -0
  295. {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-nlp
3
- Version: 6.1.4
3
+ Version: 6.1.5
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -102,7 +102,7 @@ $ java -version
102
102
  $ conda create -n sparknlp python=3.7 -y
103
103
  $ conda activate sparknlp
104
104
  # spark-nlp by default is based on pyspark 3.x
105
- $ pip install spark-nlp==6.1.4 pyspark==3.3.1
105
+ $ pip install spark-nlp==6.1.5 pyspark==3.3.1
106
106
  ```
107
107
 
108
108
  In Python console or Jupyter `Python3` kernel:
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
168
168
 
169
169
  ### Apache Spark Support
170
170
 
171
- Spark NLP *6.1.4* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
171
+ Spark NLP *6.1.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
172
172
 
173
173
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
174
174
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
198
198
 
199
199
  ### Databricks Support
200
200
 
201
- Spark NLP 6.1.4 has been tested and is compatible with the following runtimes:
201
+ Spark NLP 6.1.5 has been tested and is compatible with the following runtimes:
202
202
 
203
203
  | **CPU** | **GPU** |
204
204
  |--------------------|--------------------|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
216
216
 
217
217
  ### EMR Support
218
218
 
219
- Spark NLP 6.1.4 has been tested and is compatible with the following EMR releases:
219
+ Spark NLP 6.1.5 has been tested and is compatible with the following EMR releases:
220
220
 
221
221
  | **EMR Release** |
222
222
  |--------------------|
@@ -63,7 +63,7 @@ $ java -version
63
63
  $ conda create -n sparknlp python=3.7 -y
64
64
  $ conda activate sparknlp
65
65
  # spark-nlp by default is based on pyspark 3.x
66
- $ pip install spark-nlp==6.1.4 pyspark==3.3.1
66
+ $ pip install spark-nlp==6.1.5 pyspark==3.3.1
67
67
  ```
68
68
 
69
69
  In Python console or Jupyter `Python3` kernel:
@@ -129,7 +129,7 @@ For a quick example of using pipelines and models take a look at our official [d
129
129
 
130
130
  ### Apache Spark Support
131
131
 
132
- Spark NLP *6.1.4* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
132
+ Spark NLP *6.1.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
133
133
 
134
134
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
135
135
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -159,7 +159,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
159
159
 
160
160
  ### Databricks Support
161
161
 
162
- Spark NLP 6.1.4 has been tested and is compatible with the following runtimes:
162
+ Spark NLP 6.1.5 has been tested and is compatible with the following runtimes:
163
163
 
164
164
  | **CPU** | **GPU** |
165
165
  |--------------------|--------------------|
@@ -177,7 +177,7 @@ We are compatible with older runtimes. For a full list check databricks support
177
177
 
178
178
  ### EMR Support
179
179
 
180
- Spark NLP 6.1.4 has been tested and is compatible with the following EMR releases:
180
+ Spark NLP 6.1.5 has been tested and is compatible with the following EMR releases:
181
181
 
182
182
  | **EMR Release** |
183
183
  |--------------------|
@@ -41,7 +41,7 @@ setup(
41
41
  # project code, see
42
42
  # https://packaging.python.org/en/latest/single_source_version.html
43
43
 
44
- version='6.1.4', # Required
44
+ version='6.1.5', # Required
45
45
 
46
46
  # This is a one-line description or tagline of what your project does. This
47
47
  # corresponds to the 'Summary' metadata field:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-nlp
3
- Version: 6.1.4
3
+ Version: 6.1.5
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -102,7 +102,7 @@ $ java -version
102
102
  $ conda create -n sparknlp python=3.7 -y
103
103
  $ conda activate sparknlp
104
104
  # spark-nlp by default is based on pyspark 3.x
105
- $ pip install spark-nlp==6.1.4 pyspark==3.3.1
105
+ $ pip install spark-nlp==6.1.5 pyspark==3.3.1
106
106
  ```
107
107
 
108
108
  In Python console or Jupyter `Python3` kernel:
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
168
168
 
169
169
  ### Apache Spark Support
170
170
 
171
- Spark NLP *6.1.4* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
171
+ Spark NLP *6.1.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
172
172
 
173
173
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
174
174
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
198
198
 
199
199
  ### Databricks Support
200
200
 
201
- Spark NLP 6.1.4 has been tested and is compatible with the following runtimes:
201
+ Spark NLP 6.1.5 has been tested and is compatible with the following runtimes:
202
202
 
203
203
  | **CPU** | **GPU** |
204
204
  |--------------------|--------------------|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
216
216
 
217
217
  ### EMR Support
218
218
 
219
- Spark NLP 6.1.4 has been tested and is compatible with the following EMR releases:
219
+ Spark NLP 6.1.5 has been tested and is compatible with the following EMR releases:
220
220
 
221
221
  | **EMR Release** |
222
222
  |--------------------|
@@ -260,6 +260,7 @@ sparknlp/reader/pdf_to_text.py
260
260
  sparknlp/reader/reader2doc.py
261
261
  sparknlp/reader/reader2image.py
262
262
  sparknlp/reader/reader2table.py
263
+ sparknlp/reader/reader_assembler.py
263
264
  sparknlp/reader/sparknlp_reader.py
264
265
  sparknlp/training/__init__.py
265
266
  sparknlp/training/conll.py
@@ -66,7 +66,7 @@ sys.modules['com.johnsnowlabs.ml.ai'] = annotator
66
66
  annotators = annotator
67
67
  embeddings = annotator
68
68
 
69
- __version__ = "6.1.4"
69
+ __version__ = "6.1.5"
70
70
 
71
71
 
72
72
  def start(gpu=False,
@@ -18,6 +18,23 @@ from pyspark.ml.param import Param, Params, TypeConverters
18
18
 
19
19
  class HasReaderProperties(Params):
20
20
 
21
+ inputCol = Param(
22
+ Params._dummy(),
23
+ "inputCol",
24
+ "input column name",
25
+ typeConverter=TypeConverters.toString
26
+ )
27
+
28
+ def setInputCol(self, value):
29
+ """Sets input column name.
30
+
31
+ Parameters
32
+ ----------
33
+ value : str
34
+ Name of the Input Column
35
+ """
36
+ return self._set(inputCol=value)
37
+
21
38
  outputCol = Param(
22
39
  Params._dummy(),
23
40
  "outputCol",
@@ -25,6 +42,16 @@ class HasReaderProperties(Params):
25
42
  typeConverter=TypeConverters.toString
26
43
  )
27
44
 
45
+ def setOutputCol(self, value):
46
+ """Sets output column name.
47
+
48
+ Parameters
49
+ ----------
50
+ value : str
51
+ Name of the Output Column
52
+ """
53
+ return self._set(outputCol=value)
54
+
28
55
  contentPath = Param(
29
56
  Params._dummy(),
30
57
  "contentPath",
@@ -167,6 +194,56 @@ class HasReaderProperties(Params):
167
194
  """
168
195
  return self._set(explodeDocs=value)
169
196
 
197
+ flattenOutput = Param(
198
+ Params._dummy(),
199
+ "flattenOutput",
200
+ "If true, output is flattened to plain text with minimal metadata",
201
+ typeConverter=TypeConverters.toBoolean
202
+ )
203
+
204
+ def setFlattenOutput(self, value):
205
+ """Sets whether to flatten the output to plain text with minimal metadata.
206
+
207
+ ParametersF
208
+ ----------
209
+ value : bool
210
+ If true, output is flattened to plain text with minimal metadata
211
+ """
212
+ return self._set(flattenOutput=value)
213
+
214
+ titleThreshold = Param(
215
+ Params._dummy(),
216
+ "titleThreshold",
217
+ "Minimum font size threshold for title detection in PDF docs",
218
+ typeConverter=TypeConverters.toFloat
219
+ )
220
+
221
+ def setTitleThreshold(self, value):
222
+ """Sets the minimum font size threshold for title detection in PDF documents.
223
+
224
+ Parameters
225
+ ----------
226
+ value : float
227
+ Minimum font size threshold for title detection in PDF docs
228
+ """
229
+ return self._set(titleThreshold=value)
230
+
231
+ outputAsDocument = Param(
232
+ Params._dummy(),
233
+ "outputAsDocument",
234
+ "Whether to return all sentences joined into a single document",
235
+ typeConverter=TypeConverters.toBoolean
236
+ )
237
+
238
+ def setOutputAsDocument(self, value):
239
+ """Sets whether to return all sentences joined into a single document.
240
+
241
+ Parameters
242
+ ----------
243
+ value : bool
244
+ Whether to return all sentences joined into a single document
245
+ """
246
+ return self._set(outputAsDocument=value)
170
247
 
171
248
  class HasEmailReaderProperties(Params):
172
249
 
@@ -683,13 +760,3 @@ class HasPdfProperties(Params):
683
760
  True to read as images, False otherwise.
684
761
  """
685
762
  return self._set(readAsImage=value)
686
-
687
- def setOutputCol(self, value):
688
- """Sets output column name.
689
-
690
- Parameters
691
- ----------
692
- value : str
693
- Name of the Output Column
694
- """
695
- return self._set(outputCol=value)
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from pyspark import keyword_only
15
- from pyspark.ml.param import TypeConverters, Params, Param
16
15
 
17
16
  from sparknlp.common import AnnotatorType
18
17
  from sparknlp.internal import AnnotatorTransformer
@@ -69,32 +68,11 @@ class Reader2Doc(
69
68
  |[{'document', 15, 38, 'This is a narrative text', {'pageNumber': 1, 'elementType': 'NarrativeText', 'fileName': 'pdf-title.pdf'}, []}]|
70
69
  |[{'document', 39, 68, 'This is another narrative text', {'pageNumber': 1, 'elementType': 'NarrativeText', 'fileName': 'pdf-title.pdf'}, []}]|
71
70
  +------------------------------------------------------------------------------------------------------------------------------------+
72
- """
71
+ """
73
72
 
74
73
  name = "Reader2Doc"
75
- outputAnnotatorType = AnnotatorType.DOCUMENT
76
-
77
-
78
- flattenOutput = Param(
79
- Params._dummy(),
80
- "flattenOutput",
81
- "If true, output is flattened to plain text with minimal metadata",
82
- typeConverter=TypeConverters.toBoolean
83
- )
84
74
 
85
- titleThreshold = Param(
86
- Params._dummy(),
87
- "titleThreshold",
88
- "Minimum font size threshold for title detection in PDF docs",
89
- typeConverter=TypeConverters.toFloat
90
- )
91
-
92
- outputAsDocument = Param(
93
- Params._dummy(),
94
- "outputAsDocument",
95
- "Whether to return all sentences joined into a single document",
96
- typeConverter=TypeConverters.toBoolean
97
- )
75
+ outputAnnotatorType = AnnotatorType.DOCUMENT
98
76
 
99
77
  excludeNonText = Param(
100
78
  Params._dummy(),
@@ -103,6 +81,16 @@ class Reader2Doc(
103
81
  typeConverter=TypeConverters.toBoolean
104
82
  )
105
83
 
84
+ def setExcludeNonText(self, value):
85
+ """Sets whether to exclude non-text content from the output.
86
+
87
+ Parameters
88
+ ----------
89
+ value : bool
90
+ Whether to exclude non-text content from the output. Default is False.
91
+ """
92
+ return self._set(excludeNonText=value)
93
+
106
94
  @keyword_only
107
95
  def __init__(self):
108
96
  super(Reader2Doc, self).__init__(classname="com.johnsnowlabs.reader.Reader2Doc")
@@ -117,44 +105,3 @@ class Reader2Doc(
117
105
  def setParams(self):
118
106
  kwargs = self._input_kwargs
119
107
  return self._set(**kwargs)
120
-
121
-
122
- def setFlattenOutput(self, value):
123
- """Sets whether to flatten the output to plain text with minimal metadata.
124
-
125
- ParametersF
126
- ----------
127
- value : bool
128
- If true, output is flattened to plain text with minimal metadata
129
- """
130
- return self._set(flattenOutput=value)
131
-
132
- def setTitleThreshold(self, value):
133
- """Sets the minimum font size threshold for title detection in PDF documents.
134
-
135
- Parameters
136
- ----------
137
- value : float
138
- Minimum font size threshold for title detection in PDF docs
139
- """
140
- return self._set(titleThreshold=value)
141
-
142
- def setOutputAsDocument(self, value):
143
- """Sets whether to return all sentences joined into a single document.
144
-
145
- Parameters
146
- ----------
147
- value : bool
148
- Whether to return all sentences joined into a single document
149
- """
150
- return self._set(outputAsDocument=value)
151
-
152
- def setExcludeNonText(self, value):
153
- """Sets whether to exclude non-text content from the output.
154
-
155
- Parameters
156
- ----------
157
- value : bool
158
- Whether to exclude non-text content from the output. Default is False.
159
- """
160
- return self._set(excludeNonText=value)
@@ -32,20 +32,6 @@ class Reader2Table(
32
32
 
33
33
  outputAnnotatorType = AnnotatorType.DOCUMENT
34
34
 
35
- flattenOutput = Param(
36
- Params._dummy(),
37
- "flattenOutput",
38
- "If true, output is flattened to plain text with minimal metadata",
39
- typeConverter=TypeConverters.toBoolean
40
- )
41
-
42
- titleThreshold = Param(
43
- Params._dummy(),
44
- "titleThreshold",
45
- "Minimum font size threshold for title detection in PDF docs",
46
- typeConverter=TypeConverters.toFloat
47
- )
48
-
49
35
  @keyword_only
50
36
  def __init__(self):
51
37
  super(Reader2Table, self).__init__(classname="com.johnsnowlabs.reader.Reader2Table")
@@ -55,23 +41,3 @@ class Reader2Table(
55
41
  def setParams(self):
56
42
  kwargs = self._input_kwargs
57
43
  return self._set(**kwargs)
58
-
59
- def setFlattenOutput(self, value):
60
- """Sets whether to flatten the output to plain text with minimal metadata.
61
-
62
- Parameters
63
- ----------
64
- value : bool
65
- If true, output is flattened to plain text with minimal metadata
66
- """
67
- return self._set(flattenOutput=value)
68
-
69
- def setTitleThreshold(self, value):
70
- """Sets the minimum font size threshold for title detection in PDF documents.
71
-
72
- Parameters
73
- ----------
74
- value : float
75
- Minimum font size threshold for title detection in PDF docs
76
- """
77
- return self._set(titleThreshold=value)
@@ -0,0 +1,159 @@
1
+ # Copyright 2017-2025 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from pyspark import keyword_only
16
+
17
+ from sparknlp.common import AnnotatorType
18
+ from sparknlp.internal import AnnotatorTransformer
19
+ from sparknlp.partition.partition_properties import *
20
+
21
+ class ReaderAssembler(
22
+ AnnotatorTransformer,
23
+ HasReaderProperties,
24
+ HasHTMLReaderProperties,
25
+ HasEmailReaderProperties,
26
+ HasExcelReaderProperties,
27
+ HasPowerPointProperties,
28
+ HasTextReaderProperties,
29
+ HasPdfProperties
30
+ ):
31
+ """
32
+ The ReaderAssembler annotator provides a unified interface for combining multiple Spark NLP
33
+ readers (such as Reader2Doc, Reader2Table, and Reader2Image) into a single, configurable
34
+ component. It automatically orchestrates the execution of different readers based on input type,
35
+ configured priorities, and fallback strategies allowing you to handle diverse content formats
36
+ without manually chaining multiple readers in your pipeline.
37
+
38
+ ReaderAssembler simplifies the process of building flexible pipelines capable of ingesting and
39
+ processing documents, tables, and images in a consistent way. It handles reader selection,
40
+ ordering, and fault-tolerance internally, ensuring that pipelines remain concise, robust, and
41
+ easy to maintain.
42
+
43
+ Examples
44
+ --------
45
+ >>> from johnsnowlabs.reader import ReaderAssembler
46
+ >>> from pyspark.ml import Pipeline
47
+ >>>
48
+ >>> reader_assembler = ReaderAssembler() \\
49
+ ... .setContentType("text/html") \\
50
+ ... .setContentPath("/table-image.html") \\
51
+ ... .setOutputCol("document")
52
+ >>>
53
+ >>> pipeline = Pipeline(stages=[reader_assembler])
54
+ >>> pipeline_model = pipeline.fit(empty_data_set)
55
+ >>> result_df = pipeline_model.transform(empty_data_set)
56
+ >>>
57
+ >>> result_df.show()
58
+ +--------+--------------------+--------------------+--------------------+---------+
59
+ |fileName| document_text| document_table| document_image|exception|
60
+ +--------+--------------------+--------------------+--------------------+---------+
61
+ | null|[{'document', 0, 26...|[{'document', 0, 50...|[{'image', , 5, 5, ...| null|
62
+ +--------+--------------------+--------------------+--------------------+---------+
63
+
64
+ This annotator is especially useful when working with heterogeneous input data — for example,
65
+ when a dataset includes PDFs, spreadsheets, and images — allowing Spark NLP to automatically
66
+ invoke the appropriate reader for each file type while preserving a unified schema in the output.
67
+ """
68
+
69
+
70
+ name = 'ReaderAssembler'
71
+
72
+ outputAnnotatorType = AnnotatorType.DOCUMENT
73
+
74
+ excludeNonText = Param(
75
+ Params._dummy(),
76
+ "excludeNonText",
77
+ "Whether to exclude non-text content from the output. Default is False.",
78
+ typeConverter=TypeConverters.toBoolean
79
+ )
80
+
81
+ userMessage = Param(
82
+ Params._dummy(),
83
+ "userMessage",
84
+ "Custom user message.",
85
+ typeConverter=TypeConverters.toString
86
+ )
87
+
88
+ promptTemplate = Param(
89
+ Params._dummy(),
90
+ "promptTemplate",
91
+ "Format of the output prompt.",
92
+ typeConverter=TypeConverters.toString
93
+ )
94
+
95
+ customPromptTemplate = Param(
96
+ Params._dummy(),
97
+ "customPromptTemplate",
98
+ "Custom prompt template for image models.",
99
+ typeConverter=TypeConverters.toString
100
+ )
101
+
102
+ @keyword_only
103
+ def __init__(self):
104
+ super(ReaderAssembler, self).__init__(classname="com.johnsnowlabs.reader.ReaderAssembler")
105
+ self._setDefault(contentType="",
106
+ explodeDocs=False,
107
+ userMessage="Describe this image",
108
+ promptTemplate="qwen2vl-chat",
109
+ readAsImage=True,
110
+ customPromptTemplate="",
111
+ ignoreExceptions=True,
112
+ flattenOutput=False,
113
+ titleThreshold=18)
114
+
115
+
116
+ @keyword_only
117
+ def setParams(self):
118
+ kwargs = self._input_kwargs
119
+ return self._set(**kwargs)
120
+
121
+ def setExcludeNonText(self, value):
122
+ """Sets whether to exclude non-text content from the output.
123
+
124
+ Parameters
125
+ ----------
126
+ value : bool
127
+ Whether to exclude non-text content from the output. Default is False.
128
+ """
129
+ return self._set(excludeNonText=value)
130
+
131
+ def setUserMessage(self, value: str):
132
+ """Sets custom user message.
133
+
134
+ Parameters
135
+ ----------
136
+ value : str
137
+ Custom user message to include.
138
+ """
139
+ return self._set(userMessage=value)
140
+
141
+ def setPromptTemplate(self, value: str):
142
+ """Sets format of the output prompt.
143
+
144
+ Parameters
145
+ ----------
146
+ value : str
147
+ Prompt template format.
148
+ """
149
+ return self._set(promptTemplate=value)
150
+
151
+ def setCustomPromptTemplate(self, value: str):
152
+ """Sets custom prompt template for image models.
153
+
154
+ Parameters
155
+ ----------
156
+ value : str
157
+ Custom prompt template string.
158
+ """
159
+ return self._set(customPromptTemplate=value)
File without changes
File without changes