spark-nlp 6.0.0__tar.gz → 6.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (278) hide show
  1. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/PKG-INFO +6 -6
  2. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/README.md +5 -5
  3. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/setup.py +1 -1
  4. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/spark_nlp.egg-info/PKG-INFO +6 -6
  5. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/spark_nlp.egg-info/SOURCES.txt +4 -0
  6. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/__init__.py +2 -2
  7. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +1 -1
  8. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/__init__.py +3 -0
  9. spark_nlp-6.0.1/sparknlp/annotator/cv/gemma3_for_multimodal.py +351 -0
  10. spark_nlp-6.0.1/sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  11. spark_nlp-6.0.1/sparknlp/annotator/cv/smolvlm_transformer.py +432 -0
  12. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/internal/__init__.py +27 -0
  13. spark_nlp-6.0.1/sparknlp/reader/enums.py +19 -0
  14. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/reader/pdf_to_text.py +47 -1
  15. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/com/__init__.py +0 -0
  16. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/com/johnsnowlabs/__init__.py +0 -0
  17. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/com/johnsnowlabs/ml/__init__.py +0 -0
  18. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
  19. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/com/johnsnowlabs/nlp/__init__.py +0 -0
  20. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/setup.cfg +0 -0
  21. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/spark_nlp.egg-info/dependency_links.txt +0 -0
  22. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/spark_nlp.egg-info/top_level.txt +0 -0
  23. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotation.py +0 -0
  24. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotation_audio.py +0 -0
  25. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotation_image.py +0 -0
  26. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/__init__.py +0 -0
  27. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/audio/__init__.py +0 -0
  28. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  29. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  30. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  31. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/chunk2_doc.py +0 -0
  32. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/chunker.py +0 -0
  33. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
  34. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +0 -0
  35. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  36. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  37. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  38. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
  39. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  40. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +0 -0
  41. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  42. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  43. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  44. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  45. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  46. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  47. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  48. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
  49. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  50. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  51. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  52. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  53. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
  54. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  55. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  56. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  57. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  58. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +0 -0
  59. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  60. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  61. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  62. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  63. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  64. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
  65. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  66. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  67. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  68. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  69. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  70. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  71. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  72. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +0 -0
  73. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  74. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  75. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
  76. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  77. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  78. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  79. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cleaners/__init__.py +0 -0
  80. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cleaners/cleaner.py +0 -0
  81. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cleaners/extractor.py +0 -0
  82. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/coref/__init__.py +0 -0
  83. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  84. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
  85. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  86. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  87. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/janus_for_multimodal.py +0 -0
  88. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/llava_for_multimodal.py +0 -0
  89. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/mllama_for_multimodal.py +0 -0
  90. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/phi3_vision_for_multimodal.py +0 -0
  91. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/qwen2vl_transformer.py +0 -0
  92. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  93. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  94. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  95. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/date2_chunk.py +0 -0
  96. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/dependency/__init__.py +0 -0
  97. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  98. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  99. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  100. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/document_normalizer.py +0 -0
  101. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/document_token_splitter.py +0 -0
  102. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  103. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/__init__.py +0 -0
  104. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  105. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +0 -0
  106. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  107. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  108. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
  109. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  110. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  111. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  112. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  113. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  114. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
  115. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  116. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  117. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  118. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
  119. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
  120. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
  121. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  122. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  123. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  124. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
  125. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  126. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  127. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  128. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  129. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  130. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  131. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  132. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/er/__init__.py +0 -0
  133. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/er/entity_ruler.py +0 -0
  134. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/graph_extraction.py +0 -0
  135. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  136. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  137. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  138. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  139. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/lemmatizer.py +0 -0
  140. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/__init__.py +0 -0
  141. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  142. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  143. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  144. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  145. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  146. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/n_gram_generator.py +0 -0
  147. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ner/__init__.py +0 -0
  148. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_approach.py +0 -0
  149. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_converter.py +0 -0
  150. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_crf.py +0 -0
  151. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_dl.py +0 -0
  152. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  153. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  154. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/normalizer.py +0 -0
  155. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/openai/__init__.py +0 -0
  156. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/openai/openai_completion.py +0 -0
  157. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
  158. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/param/__init__.py +0 -0
  159. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  160. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  161. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/pos/__init__.py +0 -0
  162. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/pos/perceptron.py +0 -0
  163. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/sentence/__init__.py +0 -0
  164. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  165. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  166. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/sentiment/__init__.py +0 -0
  167. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  168. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  169. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/__init__.py +0 -0
  170. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/auto_gguf_model.py +0 -0
  171. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +0 -0
  172. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  173. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/cohere_transformer.py +0 -0
  174. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
  175. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  176. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
  177. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/llama3_transformer.py +0 -0
  178. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
  179. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  180. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
  181. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
  182. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/olmo_transformer.py +0 -0
  183. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
  184. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
  185. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
  186. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
  187. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  188. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/similarity/__init__.py +0 -0
  189. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  190. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/spell_check/__init__.py +0 -0
  191. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  192. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  193. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  194. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/stemmer.py +0 -0
  195. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  196. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  197. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/token/__init__.py +0 -0
  198. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  199. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  200. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  201. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/token/tokenizer.py +0 -0
  202. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/token2_chunk.py +0 -0
  203. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ws/__init__.py +0 -0
  204. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  205. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/__init__.py +0 -0
  206. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/audio_assembler.py +0 -0
  207. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/doc2_chunk.py +0 -0
  208. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/document_assembler.py +0 -0
  209. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/embeddings_finisher.py +0 -0
  210. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/finisher.py +0 -0
  211. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/graph_finisher.py +0 -0
  212. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/has_recursive_fit.py +0 -0
  213. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/has_recursive_transform.py +0 -0
  214. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/image_assembler.py +0 -0
  215. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/light_pipeline.py +0 -0
  216. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/multi_document_assembler.py +0 -0
  217. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/prompt_assembler.py +0 -0
  218. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/recursive_pipeline.py +0 -0
  219. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/table_assembler.py +0 -0
  220. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/base/token_assembler.py +0 -0
  221. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/__init__.py +0 -0
  222. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/annotator_approach.py +0 -0
  223. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/annotator_model.py +0 -0
  224. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/annotator_properties.py +0 -0
  225. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/annotator_type.py +0 -0
  226. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/coverage_result.py +0 -0
  227. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/match_strategy.py +0 -0
  228. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/properties.py +0 -0
  229. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/read_as.py +0 -0
  230. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/recursive_annotator_approach.py +0 -0
  231. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/storage.py +0 -0
  232. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/common/utils.py +0 -0
  233. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/functions.py +0 -0
  234. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/internal/annotator_java_ml.py +0 -0
  235. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/internal/annotator_transformer.py +0 -0
  236. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/internal/extended_java_wrapper.py +0 -0
  237. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/internal/params_getters_setters.py +0 -0
  238. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/internal/recursive.py +0 -0
  239. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/logging/__init__.py +0 -0
  240. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/logging/comet.py +0 -0
  241. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/pretrained/__init__.py +0 -0
  242. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  243. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/pretrained/resource_downloader.py +0 -0
  244. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/pretrained/utils.py +0 -0
  245. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/reader/__init__.py +0 -0
  246. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/reader/sparknlp_reader.py +0 -0
  247. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/__init__.py +0 -0
  248. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  249. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  250. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  251. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  252. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  253. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  254. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  255. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  256. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  257. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  258. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  259. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  260. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  261. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  262. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  263. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  264. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  265. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  266. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  267. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  268. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  269. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  270. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  271. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/conll.py +0 -0
  272. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/conllu.py +0 -0
  273. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/pos.py +0 -0
  274. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/pub_tator.py +0 -0
  275. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/spacy_to_annotation.py +0 -0
  276. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/training/tfgraphs.py +0 -0
  277. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/upload_to_hub.py +0 -0
  278. {spark-nlp-6.0.0 → spark_nlp-6.0.1}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 6.0.0
3
+ Version: 6.0.1
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -95,7 +95,7 @@ $ java -version
95
95
  $ conda create -n sparknlp python=3.7 -y
96
96
  $ conda activate sparknlp
97
97
  # spark-nlp by default is based on pyspark 3.x
98
- $ pip install spark-nlp==6.0.0 pyspark==3.3.1
98
+ $ pip install spark-nlp==6.0.1 pyspark==3.3.1
99
99
  ```
100
100
 
101
101
  In Python console or Jupyter `Python3` kernel:
@@ -161,7 +161,7 @@ For a quick example of using pipelines and models take a look at our official [d
161
161
 
162
162
  ### Apache Spark Support
163
163
 
164
- Spark NLP *6.0.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
164
+ Spark NLP *6.0.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
165
165
 
166
166
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
167
167
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -191,7 +191,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
191
191
 
192
192
  ### Databricks Support
193
193
 
194
- Spark NLP 6.0.0 has been tested and is compatible with the following runtimes:
194
+ Spark NLP 6.0.1 has been tested and is compatible with the following runtimes:
195
195
 
196
196
  | **CPU** | **GPU** |
197
197
  |--------------------|--------------------|
@@ -208,7 +208,7 @@ We are compatible with older runtimes. For a full list check databricks support
208
208
 
209
209
  ### EMR Support
210
210
 
211
- Spark NLP 6.0.0 has been tested and is compatible with the following EMR releases:
211
+ Spark NLP 6.0.1 has been tested and is compatible with the following EMR releases:
212
212
 
213
213
  | **EMR Release** |
214
214
  |--------------------|
@@ -246,7 +246,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
246
246
  from our official documentation.
247
247
 
248
248
  If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
249
- projects [Spark NLP SBT S6.0.0r](https://github.com/maziyarpanahi/spark-nlp-starter)
249
+ projects [Spark NLP Starter](https://github.com/maziyarpanahi/spark-nlp-starter)
250
250
 
251
251
  ### Python
252
252
 
@@ -63,7 +63,7 @@ $ java -version
63
63
  $ conda create -n sparknlp python=3.7 -y
64
64
  $ conda activate sparknlp
65
65
  # spark-nlp by default is based on pyspark 3.x
66
- $ pip install spark-nlp==6.0.0 pyspark==3.3.1
66
+ $ pip install spark-nlp==6.0.1 pyspark==3.3.1
67
67
  ```
68
68
 
69
69
  In Python console or Jupyter `Python3` kernel:
@@ -129,7 +129,7 @@ For a quick example of using pipelines and models take a look at our official [d
129
129
 
130
130
  ### Apache Spark Support
131
131
 
132
- Spark NLP *6.0.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
132
+ Spark NLP *6.0.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
133
133
 
134
134
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
135
135
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -159,7 +159,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
159
159
 
160
160
  ### Databricks Support
161
161
 
162
- Spark NLP 6.0.0 has been tested and is compatible with the following runtimes:
162
+ Spark NLP 6.0.1 has been tested and is compatible with the following runtimes:
163
163
 
164
164
  | **CPU** | **GPU** |
165
165
  |--------------------|--------------------|
@@ -176,7 +176,7 @@ We are compatible with older runtimes. For a full list check databricks support
176
176
 
177
177
  ### EMR Support
178
178
 
179
- Spark NLP 6.0.0 has been tested and is compatible with the following EMR releases:
179
+ Spark NLP 6.0.1 has been tested and is compatible with the following EMR releases:
180
180
 
181
181
  | **EMR Release** |
182
182
  |--------------------|
@@ -214,7 +214,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
214
214
  from our official documentation.
215
215
 
216
216
  If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
217
- projects [Spark NLP SBT S6.0.0r](https://github.com/maziyarpanahi/spark-nlp-starter)
217
+ projects [Spark NLP Starter](https://github.com/maziyarpanahi/spark-nlp-starter)
218
218
 
219
219
  ### Python
220
220
 
@@ -41,7 +41,7 @@ setup(
41
41
  # project code, see
42
42
  # https://packaging.python.org/en/latest/single_source_version.html
43
43
 
44
- version='6.0.0', # Required
44
+ version='6.0.1', # Required
45
45
 
46
46
  # This is a one-line description or tagline of what your project does. This
47
47
  # corresponds to the 'Summary' metadata field:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 6.0.0
3
+ Version: 6.0.1
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -95,7 +95,7 @@ $ java -version
95
95
  $ conda create -n sparknlp python=3.7 -y
96
96
  $ conda activate sparknlp
97
97
  # spark-nlp by default is based on pyspark 3.x
98
- $ pip install spark-nlp==6.0.0 pyspark==3.3.1
98
+ $ pip install spark-nlp==6.0.1 pyspark==3.3.1
99
99
  ```
100
100
 
101
101
  In Python console or Jupyter `Python3` kernel:
@@ -161,7 +161,7 @@ For a quick example of using pipelines and models take a look at our official [d
161
161
 
162
162
  ### Apache Spark Support
163
163
 
164
- Spark NLP *6.0.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
164
+ Spark NLP *6.0.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
165
165
 
166
166
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
167
167
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -191,7 +191,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
191
191
 
192
192
  ### Databricks Support
193
193
 
194
- Spark NLP 6.0.0 has been tested and is compatible with the following runtimes:
194
+ Spark NLP 6.0.1 has been tested and is compatible with the following runtimes:
195
195
 
196
196
  | **CPU** | **GPU** |
197
197
  |--------------------|--------------------|
@@ -208,7 +208,7 @@ We are compatible with older runtimes. For a full list check databricks support
208
208
 
209
209
  ### EMR Support
210
210
 
211
- Spark NLP 6.0.0 has been tested and is compatible with the following EMR releases:
211
+ Spark NLP 6.0.1 has been tested and is compatible with the following EMR releases:
212
212
 
213
213
  | **EMR Release** |
214
214
  |--------------------|
@@ -246,7 +246,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
246
246
  from our official documentation.
247
247
 
248
248
  If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
249
- projects [Spark NLP SBT S6.0.0r](https://github.com/maziyarpanahi/spark-nlp-starter)
249
+ projects [Spark NLP Starter](https://github.com/maziyarpanahi/spark-nlp-starter)
250
250
 
251
251
  ### Python
252
252
 
@@ -93,11 +93,14 @@ sparknlp/annotator/cv/__init__.py
93
93
  sparknlp/annotator/cv/blip_for_question_answering.py
94
94
  sparknlp/annotator/cv/clip_for_zero_shot_classification.py
95
95
  sparknlp/annotator/cv/convnext_for_image_classification.py
96
+ sparknlp/annotator/cv/gemma3_for_multimodal.py
96
97
  sparknlp/annotator/cv/janus_for_multimodal.py
97
98
  sparknlp/annotator/cv/llava_for_multimodal.py
98
99
  sparknlp/annotator/cv/mllama_for_multimodal.py
100
+ sparknlp/annotator/cv/paligemma_for_multimodal.py
99
101
  sparknlp/annotator/cv/phi3_vision_for_multimodal.py
100
102
  sparknlp/annotator/cv/qwen2vl_transformer.py
103
+ sparknlp/annotator/cv/smolvlm_transformer.py
101
104
  sparknlp/annotator/cv/swin_for_image_classification.py
102
105
  sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py
103
106
  sparknlp/annotator/cv/vit_for_image_classification.py
@@ -239,6 +242,7 @@ sparknlp/pretrained/pretrained_pipeline.py
239
242
  sparknlp/pretrained/resource_downloader.py
240
243
  sparknlp/pretrained/utils.py
241
244
  sparknlp/reader/__init__.py
245
+ sparknlp/reader/enums.py
242
246
  sparknlp/reader/pdf_to_text.py
243
247
  sparknlp/reader/sparknlp_reader.py
244
248
  sparknlp/training/__init__.py
@@ -132,7 +132,7 @@ def start(gpu=False,
132
132
  The initiated Spark session.
133
133
 
134
134
  """
135
- current_version = "6.0.0"
135
+ current_version = "6.0.1"
136
136
 
137
137
  if params is None:
138
138
  params = {}
@@ -316,4 +316,4 @@ def version():
316
316
  str
317
317
  The current Spark NLP version.
318
318
  """
319
- return '6.0.0'
319
+ return '6.0.1'
@@ -104,7 +104,7 @@ class RoBertaForMultipleChoice(AnnotatorModel,
104
104
  return self._set(caseSensitive=value)
105
105
 
106
106
  @keyword_only
107
- def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.RobertaForMultipleChoice",
107
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.RoBertaForMultipleChoice",
108
108
  java_model=None):
109
109
  super(RoBertaForMultipleChoice, self).__init__(
110
110
  classname=classname,
@@ -22,3 +22,6 @@ from sparknlp.annotator.cv.mllama_for_multimodal import *
22
22
  from sparknlp.annotator.cv.qwen2vl_transformer import *
23
23
  from sparknlp.annotator.cv.llava_for_multimodal import *
24
24
  from sparknlp.annotator.cv.phi3_vision_for_multimodal import *
25
+ from sparknlp.annotator.cv.smolvlm_transformer import *
26
+ from sparknlp.annotator.cv.paligemma_for_multimodal import *
27
+ from sparknlp.annotator.cv.gemma3_for_multimodal import *
@@ -0,0 +1,351 @@
1
+ # Copyright 2017-2024 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from sparknlp.common import *
16
+
17
+ class Gemma3ForMultiModal(AnnotatorModel,
18
+ HasBatchedAnnotateImage,
19
+ HasImageFeatureProperties,
20
+ HasEngine,
21
+ HasGeneratorProperties):
22
+ """Gemma3ForMultiModal can load Gemma 3 Vision models for visual question answering.
23
+ The model consists of a vision encoder, a text encoder, a text decoder and a model merger.
24
+ The vision encoder will encode the input image, the text encoder will encode the input text,
25
+ the model merger will merge the image and text embeddings, and the text decoder will output the answer.
26
+
27
+ Gemma 3 is a family of lightweight, state-of-the-art open models from Google, built from the same
28
+ research and technology used to create the Gemini models. It features:
29
+ - Large 128K context window
30
+ - Multilingual support in over 140 languages
31
+ - Multimodal capabilities handling both text and image inputs
32
+ - Optimized for deployment on limited resources (laptops, desktops, cloud)
33
+
34
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
35
+ object:
36
+
37
+ >>> visualQA = Gemma3ForMultiModal.pretrained() \
38
+ ... .setInputCols(["image_assembler"]) \
39
+ ... .setOutputCol("answer")
40
+
41
+ The default model is ``"gemma3_4b_it_int4"``, if no name is
42
+ provided.
43
+
44
+ For available pretrained models please see the `Models Hub
45
+ <https://sparknlp.org/models?task=Question+Answering>`__.
46
+
47
+ ====================== ======================
48
+ Input Annotation types Output Annotation type
49
+ ====================== ======================
50
+ ``IMAGE`` ``DOCUMENT``
51
+ ====================== ======================
52
+
53
+ Parameters
54
+ ----------
55
+ batchSize
56
+ Batch size. Large values allows faster processing but requires more
57
+ memory, by default 1
58
+ minOutputLength
59
+ Minimum length of the sequence to be generated, by default 0
60
+ maxOutputLength
61
+ Maximum length of output text, by default 20
62
+ doSample
63
+ Whether or not to use sampling; use greedy decoding otherwise, by default False
64
+ temperature
65
+ The value used to module the next token probabilities, by default 0.6
66
+ topK
67
+ The number of highest probability vocabulary tokens to keep for top-k-filtering, by default -1
68
+ topP
69
+ If set to float < 1, only the most probable tokens with probabilities that add up to ``top_p`` or higher are kept for generation, by default 0.9
70
+ repetitionPenalty
71
+ The parameter for repetition penalty. 1.0 means no penalty, by default 1.0
72
+ noRepeatNgramSize
73
+ If set to int > 0, all ngrams of that size can only occur once, by default 3
74
+ beamSize
75
+ The Number of beams for beam search, by default 1
76
+ maxInputLength
77
+ Maximum length of input text, by default 4096
78
+
79
+ Examples
80
+ --------
81
+ >>> import sparknlp
82
+ >>> from sparknlp.base import *
83
+ >>> from sparknlp.annotator import *
84
+ >>> from pyspark.ml import Pipeline
85
+ >>> from pyspark.sql.functions import lit
86
+ >>>
87
+ >>> imageDF = spark.read.format("image").load(images_path)
88
+ >>> testDF = imageDF.withColumn("text", lit("<bos><start_of_turn>user\nYou are a helpful assistant.\n\n<start_of_image>Describe this image in detail.<end_of_turn>\n<start_of_turn>model\n"))
89
+ >>>
90
+ >>> imageAssembler = ImageAssembler() \
91
+ ... .setInputCol("image") \
92
+ ... .setOutputCol("image_assembler")
93
+ >>>
94
+ >>> visualQA = Gemma3ForMultiModal.pretrained() \
95
+ ... .setInputCols("image_assembler") \
96
+ ... .setOutputCol("answer")
97
+ >>>
98
+ >>> pipeline = Pipeline().setStages([
99
+ ... imageAssembler,
100
+ ... visualQA
101
+ ... ])
102
+ >>>
103
+ >>> result = pipeline.fit(testDF).transform(testDF)
104
+ >>> result.select("image_assembler.origin", "answer.result").show(truncate=False)
105
+ """
106
+
107
+ name = "Gemma3ForMultiModal"
108
+
109
+ inputAnnotatorTypes = [AnnotatorType.IMAGE]
110
+
111
+ outputAnnotatorType = AnnotatorType.DOCUMENT
112
+
113
+ configProtoBytes = Param(Params._dummy(),
114
+ "configProtoBytes",
115
+ "ConfigProto from tensorflow, serialized into byte array. Get with "
116
+ "config_proto.SerializeToString()",
117
+ TypeConverters.toListInt)
118
+
119
+ minOutputLength = Param(Params._dummy(), "minOutputLength", "Minimum length of the sequence to be generated",
120
+ typeConverter=TypeConverters.toInt)
121
+
122
+ maxOutputLength = Param(Params._dummy(), "maxOutputLength", "Maximum length of output text",
123
+ typeConverter=TypeConverters.toInt)
124
+
125
+ doSample = Param(Params._dummy(), "doSample", "Whether or not to use sampling; use greedy decoding otherwise",
126
+ typeConverter=TypeConverters.toBoolean)
127
+
128
+ temperature = Param(Params._dummy(), "temperature", "The value used to module the next token probabilities",
129
+ typeConverter=TypeConverters.toFloat)
130
+
131
+ topK = Param(Params._dummy(), "topK",
132
+ "The number of highest probability vocabulary tokens to keep for top-k-filtering",
133
+ typeConverter=TypeConverters.toInt)
134
+
135
+ topP = Param(Params._dummy(), "topP",
136
+ "If set to float < 1, only the most probable tokens with probabilities that add up to ``top_p`` or higher are kept for generation",
137
+ typeConverter=TypeConverters.toFloat)
138
+
139
+ repetitionPenalty = Param(Params._dummy(), "repetitionPenalty",
140
+ "The parameter for repetition penalty. 1.0 means no penalty. See `this paper <https://arxiv.org/pdf/1909.05858.pdf>`__ for more details",
141
+ typeConverter=TypeConverters.toFloat)
142
+
143
+ noRepeatNgramSize = Param(Params._dummy(), "noRepeatNgramSize",
144
+ "If set to int > 0, all ngrams of that size can only occur once",
145
+ typeConverter=TypeConverters.toInt)
146
+
147
+ ignoreTokenIds = Param(Params._dummy(), "ignoreTokenIds",
148
+ "A list of token ids which are ignored in the decoder's output",
149
+ typeConverter=TypeConverters.toListInt)
150
+ beamSize = Param(Params._dummy(), "beamSize",
151
+ "The Number of beams for beam search.",
152
+ typeConverter=TypeConverters.toInt)
153
+
154
+ maxInputLength = Param(Params._dummy(), "maxInputLength", "Maximum length of input text",
155
+ typeConverter=TypeConverters.toInt)
156
+
157
+ def setMaxSentenceSize(self, value):
158
+ """Sets Maximum sentence length that the annotator will process, by
159
+ default 50.
160
+
161
+ Parameters
162
+ ----------
163
+ value : int
164
+ Maximum sentence length that the annotator will process
165
+ """
166
+ return self._set(maxSentenceLength=value)
167
+
168
+ def setIgnoreTokenIds(self, value):
169
+ """A list of token ids which are ignored in the decoder's output.
170
+
171
+ Parameters
172
+ ----------
173
+ value : List[int]
174
+ The words to be filtered out
175
+ """
176
+ return self._set(ignoreTokenIds=value)
177
+
178
+ def setConfigProtoBytes(self, b):
179
+ """Sets configProto from tensorflow, serialized into byte array.
180
+
181
+ Parameters
182
+ ----------
183
+ b : List[int]
184
+ ConfigProto from tensorflow, serialized into byte array
185
+ """
186
+ return self._set(configProtoBytes=b)
187
+
188
+ def setMinOutputLength(self, value):
189
+ """Sets minimum length of the sequence to be generated.
190
+
191
+ Parameters
192
+ ----------
193
+ value : int
194
+ Minimum length of the sequence to be generated
195
+ """
196
+ return self._set(minOutputLength=value)
197
+
198
+ def setMaxOutputLength(self, value):
199
+ """Sets maximum length of output text.
200
+
201
+ Parameters
202
+ ----------
203
+ value : int
204
+ Maximum length of output text
205
+ """
206
+ return self._set(maxOutputLength=value)
207
+
208
+ def setDoSample(self, value):
209
+ """Sets whether or not to use sampling, use greedy decoding otherwise.
210
+
211
+ Parameters
212
+ ----------
213
+ value : bool
214
+ Whether or not to use sampling; use greedy decoding otherwise
215
+ """
216
+ return self._set(doSample=value)
217
+
218
+ def setTemperature(self, value):
219
+ """Sets the value used to module the next token probabilities.
220
+
221
+ Parameters
222
+ ----------
223
+ value : float
224
+ The value used to module the next token probabilities
225
+ """
226
+ return self._set(temperature=value)
227
+
228
+ def setTopK(self, value):
229
+ """Sets the number of highest probability vocabulary tokens to keep for
230
+ top-k-filtering.
231
+
232
+ Parameters
233
+ ----------
234
+ value : int
235
+ Number of highest probability vocabulary tokens to keep
236
+ """
237
+ return self._set(topK=value)
238
+
239
+ def setTopP(self, value):
240
+ """Sets the top cumulative probability for vocabulary tokens.
241
+
242
+ If set to float < 1, only the most probable tokens with probabilities
243
+ that add up to ``topP`` or higher are kept for generation.
244
+
245
+ Parameters
246
+ ----------
247
+ value : float
248
+ Cumulative probability for vocabulary tokens
249
+ """
250
+ return self._set(topP=value)
251
+
252
+ def setRepetitionPenalty(self, value):
253
+ """Sets the parameter for repetition penalty. 1.0 means no penalty.
254
+
255
+ Parameters
256
+ ----------
257
+ value : float
258
+ The repetition penalty
259
+
260
+ References
261
+ ----------
262
+ See `Ctrl: A Conditional Transformer Language Model For Controllable
263
+ Generation <https://arxiv.org/pdf/1909.05858.pdf>`__ for more details.
264
+ """
265
+ return self._set(repetitionPenalty=value)
266
+
267
+ def setNoRepeatNgramSize(self, value):
268
+ """Sets size of n-grams that can only occur once.
269
+
270
+ If set to int > 0, all ngrams of that size can only occur once.
271
+
272
+ Parameters
273
+ ----------
274
+ value : int
275
+ N-gram size can only occur once
276
+ """
277
+ return self._set(noRepeatNgramSize=value)
278
+
279
+ def setBeamSize(self, value):
280
+ """Sets the number of beam size for beam search, by default `4`.
281
+
282
+ Parameters
283
+ ----------
284
+ value : int
285
+ Number of beam size for beam search
286
+ """
287
+ return self._set(beamSize=value)
288
+
289
+ @keyword_only
290
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.cv.Gemma3ForMultiModal",
291
+ java_model=None):
292
+ super(Gemma3ForMultiModal, self).__init__(
293
+ classname=classname,
294
+ java_model=java_model
295
+ )
296
+ self._setDefault(
297
+ batchSize=1,
298
+ minOutputLength=0,
299
+ maxOutputLength=20,
300
+ doSample=False,
301
+ temperature=0.6,
302
+ topK=-1,
303
+ topP=0.9,
304
+ repetitionPenalty=1.0,
305
+ noRepeatNgramSize=3,
306
+ ignoreTokenIds=[],
307
+ beamSize=1,
308
+ maxInputLength=4096,
309
+ )
310
+
311
+ @staticmethod
312
+ def loadSavedModel(folder, spark_session, use_openvino=False):
313
+ """Loads a locally saved model.
314
+
315
+ Parameters
316
+ ----------
317
+ folder : str
318
+ Folder of the saved model
319
+ spark_session : pyspark.sql.SparkSession
320
+ The current SparkSession
321
+
322
+ Returns
323
+ -------
324
+ Gemma3ForMultiModal
325
+ The restored model
326
+ """
327
+ from sparknlp.internal import _Gemma3ForMultiModalLoader
328
+ jModel = _Gemma3ForMultiModalLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
329
+ return Gemma3ForMultiModal(java_model=jModel)
330
+
331
+ @staticmethod
332
+ def pretrained(name="gemma3_4b_it_int4", lang="en", remote_loc=None):
333
+ """Downloads and loads a pretrained model.
334
+
335
+ Parameters
336
+ ----------
337
+ name : str, optional
338
+ Name of the pretrained model, by default "gemma3_4b_it_int4"
339
+ lang : str, optional
340
+ Language of the pretrained model, by default "en"
341
+ remote_loc : str, optional
342
+ Optional remote address of the resource, by default None. Will use
343
+ Spark NLPs repositories otherwise.
344
+
345
+ Returns
346
+ -------
347
+ Gemma3ForMultiModal
348
+ The restored model
349
+ """
350
+ from sparknlp.pretrained import ResourceDownloader
351
+ return ResourceDownloader.downloadModel(Gemma3ForMultiModal, name, lang, remote_loc)