spark-nlp 6.0.5__tar.gz → 6.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (289) hide show
  1. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/PKG-INFO +5 -5
  2. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/README.md +4 -4
  3. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/setup.py +1 -1
  4. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/spark_nlp.egg-info/PKG-INFO +5 -5
  5. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/spark_nlp.egg-info/SOURCES.txt +2 -0
  6. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/__init__.py +1 -1
  7. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/__init__.py +1 -0
  8. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/auto_gguf_model.py +3 -1
  9. spark_nlp-6.1.0/sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  10. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/properties.py +100 -66
  11. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/internal/__init__.py +9 -0
  12. spark_nlp-6.1.0/sparknlp/reader/reader2doc.py +194 -0
  13. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/reader/sparknlp_reader.py +45 -0
  14. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/com/__init__.py +0 -0
  15. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/com/johnsnowlabs/__init__.py +0 -0
  16. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/com/johnsnowlabs/ml/__init__.py +0 -0
  17. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
  18. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/com/johnsnowlabs/nlp/__init__.py +0 -0
  19. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/setup.cfg +0 -0
  20. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/spark_nlp.egg-info/dependency_links.txt +0 -0
  21. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/spark_nlp.egg-info/top_level.txt +0 -0
  22. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotation.py +0 -0
  23. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotation_audio.py +0 -0
  24. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotation_image.py +0 -0
  25. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/__init__.py +0 -0
  26. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/audio/__init__.py +0 -0
  27. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
  28. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
  29. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
  30. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/chunk2_doc.py +0 -0
  31. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/chunker.py +0 -0
  32. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
  33. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +0 -0
  34. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
  35. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
  36. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
  37. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
  38. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
  39. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +0 -0
  40. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
  41. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
  42. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
  43. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
  44. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
  45. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
  46. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
  47. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
  48. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
  49. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
  50. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
  51. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
  52. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
  53. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
  54. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
  55. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
  56. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
  57. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +0 -0
  58. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
  59. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
  60. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
  61. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
  62. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
  63. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
  64. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
  65. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +0 -0
  66. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
  67. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
  68. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
  69. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
  70. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
  71. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
  72. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +0 -0
  73. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
  74. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
  75. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
  76. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
  77. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
  78. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
  79. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cleaners/__init__.py +0 -0
  80. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cleaners/cleaner.py +0 -0
  81. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cleaners/extractor.py +0 -0
  82. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/coref/__init__.py +0 -0
  83. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
  84. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/__init__.py +0 -0
  85. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
  86. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
  87. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
  88. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/florence2_transformer.py +0 -0
  89. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/gemma3_for_multimodal.py +0 -0
  90. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/internvl_for_multimodal.py +0 -0
  91. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/janus_for_multimodal.py +0 -0
  92. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/llava_for_multimodal.py +0 -0
  93. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/mllama_for_multimodal.py +0 -0
  94. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/paligemma_for_multimodal.py +0 -0
  95. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/phi3_vision_for_multimodal.py +0 -0
  96. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/qwen2vl_transformer.py +0 -0
  97. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/smolvlm_transformer.py +0 -0
  98. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
  99. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
  100. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
  101. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/dataframe_optimizer.py +0 -0
  102. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/date2_chunk.py +0 -0
  103. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/dependency/__init__.py +0 -0
  104. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
  105. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
  106. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/document_character_text_splitter.py +0 -0
  107. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/document_normalizer.py +0 -0
  108. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/document_token_splitter.py +0 -0
  109. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/document_token_splitter_test.py +0 -0
  110. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/__init__.py +0 -0
  111. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
  112. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +0 -0
  113. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
  114. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
  115. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
  116. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
  117. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
  118. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
  119. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
  120. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
  121. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
  122. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/e5v_embeddings.py +0 -0
  123. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
  124. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
  125. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
  126. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/minilm_embeddings.py +0 -0
  127. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
  128. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
  129. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
  130. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
  131. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
  132. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
  133. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
  134. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
  135. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
  136. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/word2vec.py +0 -0
  137. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
  138. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
  139. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
  140. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
  141. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/er/__init__.py +0 -0
  142. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/er/entity_ruler.py +0 -0
  143. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/graph_extraction.py +0 -0
  144. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
  145. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
  146. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ld_dl/__init__.py +0 -0
  147. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
  148. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/lemmatizer.py +0 -0
  149. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/matcher/__init__.py +0 -0
  150. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
  151. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/matcher/date_matcher.py +0 -0
  152. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
  153. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
  154. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/matcher/text_matcher.py +0 -0
  155. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/n_gram_generator.py +0 -0
  156. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ner/__init__.py +0 -0
  157. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ner/ner_approach.py +0 -0
  158. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ner/ner_converter.py +0 -0
  159. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ner/ner_crf.py +0 -0
  160. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ner/ner_dl.py +0 -0
  161. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
  162. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
  163. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/normalizer.py +0 -0
  164. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/openai/__init__.py +0 -0
  165. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/openai/openai_completion.py +0 -0
  166. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
  167. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/param/__init__.py +0 -0
  168. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/param/classifier_encoder.py +0 -0
  169. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
  170. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/pos/__init__.py +0 -0
  171. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/pos/perceptron.py +0 -0
  172. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/sentence/__init__.py +0 -0
  173. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
  174. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
  175. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/sentiment/__init__.py +0 -0
  176. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
  177. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
  178. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +0 -0
  179. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
  180. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/cohere_transformer.py +0 -0
  181. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
  182. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
  183. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
  184. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/llama3_transformer.py +0 -0
  185. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
  186. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
  187. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
  188. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
  189. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/olmo_transformer.py +0 -0
  190. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
  191. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
  192. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
  193. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
  194. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
  195. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/similarity/__init__.py +0 -0
  196. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
  197. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/spell_check/__init__.py +0 -0
  198. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
  199. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
  200. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
  201. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/stemmer.py +0 -0
  202. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/stop_words_cleaner.py +0 -0
  203. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
  204. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/token/__init__.py +0 -0
  205. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
  206. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
  207. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
  208. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/token/tokenizer.py +0 -0
  209. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/token2_chunk.py +0 -0
  210. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ws/__init__.py +0 -0
  211. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/annotator/ws/word_segmenter.py +0 -0
  212. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/__init__.py +0 -0
  213. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/audio_assembler.py +0 -0
  214. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/doc2_chunk.py +0 -0
  215. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/document_assembler.py +0 -0
  216. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/embeddings_finisher.py +0 -0
  217. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/finisher.py +0 -0
  218. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/graph_finisher.py +0 -0
  219. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/has_recursive_fit.py +0 -0
  220. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/has_recursive_transform.py +0 -0
  221. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/image_assembler.py +0 -0
  222. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/light_pipeline.py +0 -0
  223. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/multi_document_assembler.py +0 -0
  224. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/prompt_assembler.py +0 -0
  225. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/recursive_pipeline.py +0 -0
  226. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/table_assembler.py +0 -0
  227. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/base/token_assembler.py +0 -0
  228. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/__init__.py +0 -0
  229. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/annotator_approach.py +0 -0
  230. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/annotator_model.py +0 -0
  231. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/annotator_properties.py +0 -0
  232. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/annotator_type.py +0 -0
  233. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/coverage_result.py +0 -0
  234. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/match_strategy.py +0 -0
  235. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/read_as.py +0 -0
  236. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/recursive_annotator_approach.py +0 -0
  237. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/storage.py +0 -0
  238. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/common/utils.py +0 -0
  239. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/functions.py +0 -0
  240. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/internal/annotator_java_ml.py +0 -0
  241. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/internal/annotator_transformer.py +0 -0
  242. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/internal/extended_java_wrapper.py +0 -0
  243. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/internal/params_getters_setters.py +0 -0
  244. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/internal/recursive.py +0 -0
  245. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/logging/__init__.py +0 -0
  246. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/logging/comet.py +0 -0
  247. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/partition/__init__.py +0 -0
  248. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/partition/partition.py +0 -0
  249. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/partition/partition_properties.py +0 -0
  250. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/partition/partition_transformer.py +0 -0
  251. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/pretrained/__init__.py +0 -0
  252. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
  253. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/pretrained/resource_downloader.py +0 -0
  254. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/pretrained/utils.py +0 -0
  255. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/reader/__init__.py +0 -0
  256. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/reader/enums.py +0 -0
  257. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/reader/pdf_to_text.py +0 -0
  258. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/__init__.py +0 -0
  259. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  260. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
  261. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  262. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
  263. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
  264. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
  265. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
  266. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
  267. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
  268. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
  269. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
  270. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
  271. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
  272. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
  273. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
  274. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  275. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
  276. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  277. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
  278. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
  279. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
  280. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
  281. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
  282. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/conll.py +0 -0
  283. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/conllu.py +0 -0
  284. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/pos.py +0 -0
  285. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/pub_tator.py +0 -0
  286. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/spacy_to_annotation.py +0 -0
  287. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/training/tfgraphs.py +0 -0
  288. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/upload_to_hub.py +0 -0
  289. {spark_nlp-6.0.5 → spark_nlp-6.1.0}/sparknlp/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-nlp
3
- Version: 6.0.5
3
+ Version: 6.1.0
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -102,7 +102,7 @@ $ java -version
102
102
  $ conda create -n sparknlp python=3.7 -y
103
103
  $ conda activate sparknlp
104
104
  # spark-nlp by default is based on pyspark 3.x
105
- $ pip install spark-nlp==6.0.5 pyspark==3.3.1
105
+ $ pip install spark-nlp==6.1.0 pyspark==3.3.1
106
106
  ```
107
107
 
108
108
  In Python console or Jupyter `Python3` kernel:
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
168
168
 
169
169
  ### Apache Spark Support
170
170
 
171
- Spark NLP *6.0.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
171
+ Spark NLP *6.1.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
172
172
 
173
173
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
174
174
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
198
198
 
199
199
  ### Databricks Support
200
200
 
201
- Spark NLP 6.0.5 has been tested and is compatible with the following runtimes:
201
+ Spark NLP 6.1.0 has been tested and is compatible with the following runtimes:
202
202
 
203
203
  | **CPU** | **GPU** |
204
204
  |--------------------|--------------------|
@@ -215,7 +215,7 @@ We are compatible with older runtimes. For a full list check databricks support
215
215
 
216
216
  ### EMR Support
217
217
 
218
- Spark NLP 6.0.5 has been tested and is compatible with the following EMR releases:
218
+ Spark NLP 6.1.0 has been tested and is compatible with the following EMR releases:
219
219
 
220
220
  | **EMR Release** |
221
221
  |--------------------|
@@ -63,7 +63,7 @@ $ java -version
63
63
  $ conda create -n sparknlp python=3.7 -y
64
64
  $ conda activate sparknlp
65
65
  # spark-nlp by default is based on pyspark 3.x
66
- $ pip install spark-nlp==6.0.5 pyspark==3.3.1
66
+ $ pip install spark-nlp==6.1.0 pyspark==3.3.1
67
67
  ```
68
68
 
69
69
  In Python console or Jupyter `Python3` kernel:
@@ -129,7 +129,7 @@ For a quick example of using pipelines and models take a look at our official [d
129
129
 
130
130
  ### Apache Spark Support
131
131
 
132
- Spark NLP *6.0.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
132
+ Spark NLP *6.1.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
133
133
 
134
134
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
135
135
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -159,7 +159,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
159
159
 
160
160
  ### Databricks Support
161
161
 
162
- Spark NLP 6.0.5 has been tested and is compatible with the following runtimes:
162
+ Spark NLP 6.1.0 has been tested and is compatible with the following runtimes:
163
163
 
164
164
  | **CPU** | **GPU** |
165
165
  |--------------------|--------------------|
@@ -176,7 +176,7 @@ We are compatible with older runtimes. For a full list check databricks support
176
176
 
177
177
  ### EMR Support
178
178
 
179
- Spark NLP 6.0.5 has been tested and is compatible with the following EMR releases:
179
+ Spark NLP 6.1.0 has been tested and is compatible with the following EMR releases:
180
180
 
181
181
  | **EMR Release** |
182
182
  |--------------------|
@@ -41,7 +41,7 @@ setup(
41
41
  # project code, see
42
42
  # https://packaging.python.org/en/latest/single_source_version.html
43
43
 
44
- version='6.0.5', # Required
44
+ version='6.1.0', # Required
45
45
 
46
46
  # This is a one-line description or tagline of what your project does. This
47
47
  # corresponds to the 'Summary' metadata field:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-nlp
3
- Version: 6.0.5
3
+ Version: 6.1.0
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -102,7 +102,7 @@ $ java -version
102
102
  $ conda create -n sparknlp python=3.7 -y
103
103
  $ conda activate sparknlp
104
104
  # spark-nlp by default is based on pyspark 3.x
105
- $ pip install spark-nlp==6.0.5 pyspark==3.3.1
105
+ $ pip install spark-nlp==6.1.0 pyspark==3.3.1
106
106
  ```
107
107
 
108
108
  In Python console or Jupyter `Python3` kernel:
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
168
168
 
169
169
  ### Apache Spark Support
170
170
 
171
- Spark NLP *6.0.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
171
+ Spark NLP *6.1.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
172
172
 
173
173
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
174
174
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
198
198
 
199
199
  ### Databricks Support
200
200
 
201
- Spark NLP 6.0.5 has been tested and is compatible with the following runtimes:
201
+ Spark NLP 6.1.0 has been tested and is compatible with the following runtimes:
202
202
 
203
203
  | **CPU** | **GPU** |
204
204
  |--------------------|--------------------|
@@ -215,7 +215,7 @@ We are compatible with older runtimes. For a full list check databricks support
215
215
 
216
216
  ### EMR Support
217
217
 
218
- Spark NLP 6.0.5 has been tested and is compatible with the following EMR releases:
218
+ Spark NLP 6.1.0 has been tested and is compatible with the following EMR releases:
219
219
 
220
220
  | **EMR Release** |
221
221
  |--------------------|
@@ -190,6 +190,7 @@ sparknlp/annotator/seq2seq/nllb_transformer.py
190
190
  sparknlp/annotator/seq2seq/olmo_transformer.py
191
191
  sparknlp/annotator/seq2seq/phi2_transformer.py
192
192
  sparknlp/annotator/seq2seq/phi3_transformer.py
193
+ sparknlp/annotator/seq2seq/phi4_transformer.py
193
194
  sparknlp/annotator/seq2seq/qwen_transformer.py
194
195
  sparknlp/annotator/seq2seq/starcoder_transformer.py
195
196
  sparknlp/annotator/seq2seq/t5_transformer.py
@@ -253,6 +254,7 @@ sparknlp/pretrained/utils.py
253
254
  sparknlp/reader/__init__.py
254
255
  sparknlp/reader/enums.py
255
256
  sparknlp/reader/pdf_to_text.py
257
+ sparknlp/reader/reader2doc.py
256
258
  sparknlp/reader/sparknlp_reader.py
257
259
  sparknlp/training/__init__.py
258
260
  sparknlp/training/conll.py
@@ -66,7 +66,7 @@ sys.modules['com.johnsnowlabs.ml.ai'] = annotator
66
66
  annotators = annotator
67
67
  embeddings = annotator
68
68
 
69
- __version__ = "6.0.5"
69
+ __version__ = "6.1.0"
70
70
 
71
71
 
72
72
  def start(gpu=False,
@@ -31,3 +31,4 @@ from sparknlp.annotator.seq2seq.starcoder_transformer import *
31
31
  from sparknlp.annotator.seq2seq.llama3_transformer import *
32
32
  from sparknlp.annotator.seq2seq.cohere_transformer import *
33
33
  from sparknlp.annotator.seq2seq.olmo_transformer import *
34
+ from sparknlp.annotator.seq2seq.phi4_transformer import *
@@ -253,7 +253,9 @@ class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties):
253
253
  nCtx=4096,
254
254
  nBatch=512,
255
255
  embedding=False,
256
- nPredict=100
256
+ nPredict=100,
257
+ nGpuLayers=99,
258
+ systemPrompt="You are a helpful assistant."
257
259
  )
258
260
 
259
261
  @staticmethod
@@ -0,0 +1,387 @@
1
+ # Copyright 2017-2024 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for the Phi4Transformer."""
15
+
16
+ from sparknlp.common import *
17
+
18
+ class Phi4Transformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
19
+ """Phi-4: State-of-the-art open model by Microsoft Research
20
+
21
+ phi-4 is a 14B parameter, dense decoder-only Transformer model trained on 9.8T tokens, designed for advanced reasoning, code, and general NLP tasks.
22
+ For more details, see: https://huggingface.co/microsoft/phi-4
23
+
24
+ Model Overview
25
+ --------------
26
+ - 14B parameters, dense decoder-only Transformer
27
+ - 16K context length
28
+ - Trained on 9.8T tokens (synthetic, public domain, academic, Q&A, code)
29
+ - Focus on high-quality, advanced reasoning, math, code, and general NLP
30
+ - Multilingual data: ~8% (primarily English)
31
+ - Released under MIT License
32
+
33
+ Intended Use
34
+ ------------
35
+ - General-purpose AI, research, and generative features
36
+ - Memory/compute constrained and latency-bound environments
37
+ - Reasoning, logic, and code generation
38
+
39
+ Benchmarks
40
+ ----------
41
+ - MMLU: 84.8 | HumanEval: 82.6 | GPQA: 56.1 | DROP: 75.5 | MATH: 80.6
42
+ - Outperforms or matches other 14B/70B models on many tasks
43
+
44
+ Safety & Limitations
45
+ -------------------
46
+ - Safety alignment via SFT and DPO, red-teamed by Microsoft AIRT
47
+ - Not intended for high-risk or consequential domains without further assessment
48
+ - Primarily English; other languages may have reduced performance
49
+ - May generate inaccurate, offensive, or biased content; use with care
50
+
51
+ Usage
52
+ -----
53
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion object:
54
+
55
+ >>> phi4 = Phi4Transformer.pretrained() \
56
+ ... .setInputCols(["document"]) \
57
+ ... .setOutputCol("generation")
58
+
59
+ The default model is ``"phi-4"``, if no name is provided. For available pretrained models please see the `Models Hub <https://huggingface.co/microsoft/phi-4>`__.
60
+
61
+ Note
62
+ ----
63
+ This is a resource-intensive module, especially with larger models and sequences. Use of accelerators such as GPUs is strongly recommended.
64
+
65
+ References
66
+ ----------
67
+ - https://huggingface.co/microsoft/phi-4
68
+ - arXiv:2412.08905
69
+
70
+ ====================== ======================
71
+ Input Annotation types Output Annotation type
72
+ ====================== ======================
73
+ ``DOCUMENT`` ``DOCUMENT``
74
+ ====================== ======================
75
+
76
+ Parameters
77
+ ----------
78
+ configProtoBytes
79
+ ConfigProto from tensorflow, serialized into byte array.
80
+ minOutputLength
81
+ Minimum length of the sequence to be generated, by default 0
82
+ maxOutputLength
83
+ Maximum length of output text, by default 60
84
+ doSample
85
+ Whether or not to use sampling; use greedy decoding otherwise, by default False
86
+ temperature
87
+ The value used to modulate the next token probabilities, by default 1.0
88
+ topK
89
+ The number of highest probability vocabulary tokens to keep for
90
+ top-k-filtering, by default 40
91
+ topP
92
+ Top cumulative probability for vocabulary tokens, by default 1.0
93
+
94
+ If set to float < 1, only the most probable tokens with probabilities
95
+ that add up to ``topP`` or higher are kept for generation.
96
+ repetitionPenalty
97
+ The parameter for repetition penalty, 1.0 means no penalty. , by default
98
+ 1.0
99
+ noRepeatNgramSize
100
+ If set to int > 0, all ngrams of that size can only occur once, by
101
+ default 0
102
+ ignoreTokenIds
103
+ A list of token ids which are ignored in the decoder's output, by
104
+ default []
105
+
106
+ Notes
107
+ -----
108
+ This is a very computationally expensive module, especially on larger
109
+ sequences. The use of an accelerator such as GPU is recommended.
110
+
111
+ Examples
112
+ --------
113
+ >>> import sparknlp
114
+ >>> from sparknlp.base import *
115
+ >>> from sparknlp.annotator import *
116
+ >>> from pyspark.ml import Pipeline
117
+ >>> documentAssembler = DocumentAssembler() \
118
+ ... .setInputCol("text") \
119
+ ... .setOutputCol("documents")
120
+ >>> phi4 = Phi4Transformer.pretrained("phi-4") \
121
+ ... .setInputCols(["documents"]) \
122
+ ... .setMaxOutputLength(60) \
123
+ ... .setOutputCol("generation")
124
+ >>> pipeline = Pipeline().setStages([documentAssembler, phi4])
125
+ >>> data = spark.createDataFrame([
126
+ ... (
127
+ ... 1,
128
+ ... "<|start_header_id|>system<|end_header_id|> \\n"+ \
129
+ ... "You are a helpful assistant! \\n" + \
130
+ ... "<|start_header_id|>user<|end_header_id|> \\n" + \
131
+ ... "What is Phi-4? \\n" + \
132
+ ... "<|start_header_id|>assistant<|end_header_id|> \\n"
133
+ ... )
134
+ ... ]).toDF("id", "text")
135
+ >>> result = pipeline.fit(data).transform(data)
136
+ >>> result.select("generation.result").show(truncate=False)
137
+ +------------------------------------------------+
138
+ |result |
139
+ +------------------------------------------------+
140
+ |[Phi-4 is a 14B parameter, dense decoder-only Transformer model developed by Microsoft Research for advanced reasoning, code, and general NLP tasks.]|
141
+ +------------------------------------------------+
142
+ """
143
+
144
+ name = "Phi4Transformer"
145
+
146
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT]
147
+
148
+ outputAnnotatorType = AnnotatorType.DOCUMENT
149
+
150
+ configProtoBytes = Param(Params._dummy(),
151
+ "configProtoBytes",
152
+ "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
153
+ TypeConverters.toListInt)
154
+
155
+ minOutputLength = Param(Params._dummy(), "minOutputLength", "Minimum length of the sequence to be generated",
156
+ typeConverter=TypeConverters.toInt)
157
+
158
+ maxOutputLength = Param(Params._dummy(), "maxOutputLength", "Maximum length of output text",
159
+ typeConverter=TypeConverters.toInt)
160
+
161
+ doSample = Param(Params._dummy(), "doSample", "Whether or not to use sampling; use greedy decoding otherwise",
162
+ typeConverter=TypeConverters.toBoolean)
163
+
164
+ temperature = Param(Params._dummy(), "temperature", "The value used to module the next token probabilities",
165
+ typeConverter=TypeConverters.toFloat)
166
+
167
+ topK = Param(Params._dummy(), "topK",
168
+ "The number of highest probability vocabulary tokens to keep for top-k-filtering",
169
+ typeConverter=TypeConverters.toInt)
170
+
171
+ topP = Param(Params._dummy(), "topP",
172
+ "If set to float < 1, only the most probable tokens with probabilities that add up to ``top_p`` or higher are kept for generation",
173
+ typeConverter=TypeConverters.toFloat)
174
+
175
+ repetitionPenalty = Param(Params._dummy(), "repetitionPenalty",
176
+ "The parameter for repetition penalty. 1.0 means no penalty. See `this paper <https://arxiv.org/pdf/1909.05858.pdf>`__ for more details",
177
+ typeConverter=TypeConverters.toFloat)
178
+
179
+ noRepeatNgramSize = Param(Params._dummy(), "noRepeatNgramSize",
180
+ "If set to int > 0, all ngrams of that size can only occur once",
181
+ typeConverter=TypeConverters.toInt)
182
+
183
+ ignoreTokenIds = Param(Params._dummy(), "ignoreTokenIds",
184
+ "A list of token ids which are ignored in the decoder's output",
185
+ typeConverter=TypeConverters.toListInt)
186
+
187
+ beamSize = Param(Params._dummy(), "beamSize",
188
+ "The number of beams to use for beam search",
189
+ typeConverter=TypeConverters.toInt)
190
+
191
+ stopTokenIds = Param(Params._dummy(), "stopTokenIds",
192
+ "A list of token ids which are considered as stop tokens in the decoder's output",
193
+ typeConverter=TypeConverters.toListInt)
194
+
195
+ def setIgnoreTokenIds(self, value):
196
+ """A list of token ids which are ignored in the decoder's output.
197
+
198
+ Parameters
199
+ ----------
200
+ value : List[int]
201
+ The words to be filtered out
202
+ """
203
+ return self._set(ignoreTokenIds=value)
204
+
205
+ def setConfigProtoBytes(self, b):
206
+ """Sets configProto from tensorflow, serialized into byte array.
207
+
208
+ Parameters
209
+ ----------
210
+ b : List[int]
211
+ ConfigProto from tensorflow, serialized into byte array
212
+ """
213
+ return self._set(configProtoBytes=b)
214
+
215
+ def setMinOutputLength(self, value):
216
+ """Sets minimum length of the sequence to be generated.
217
+
218
+ Parameters
219
+ ----------
220
+ value : int
221
+ Minimum length of the sequence to be generated
222
+ """
223
+ return self._set(minOutputLength=value)
224
+
225
+ def setMaxOutputLength(self, value):
226
+ """Sets maximum length of output text.
227
+
228
+ Parameters
229
+ ----------
230
+ value : int
231
+ Maximum length of output text
232
+ """
233
+ return self._set(maxOutputLength=value)
234
+
235
+ def setDoSample(self, value):
236
+ """Sets whether or not to use sampling, use greedy decoding otherwise.
237
+
238
+ Parameters
239
+ ----------
240
+ value : bool
241
+ Whether or not to use sampling; use greedy decoding otherwise
242
+ """
243
+ return self._set(doSample=value)
244
+
245
+ def setTemperature(self, value):
246
+ """Sets the value used to module the next token probabilities.
247
+
248
+ Parameters
249
+ ----------
250
+ value : float
251
+ The value used to module the next token probabilities
252
+ """
253
+ return self._set(temperature=value)
254
+
255
+ def setTopK(self, value):
256
+ """Sets the number of highest probability vocabulary tokens to keep for
257
+ top-k-filtering.
258
+
259
+ Parameters
260
+ ----------
261
+ value : int
262
+ Number of highest probability vocabulary tokens to keep
263
+ """
264
+ return self._set(topK=value)
265
+
266
+ def setTopP(self, value):
267
+ """Sets the top cumulative probability for vocabulary tokens.
268
+
269
+ If set to float < 1, only the most probable tokens with probabilities
270
+ that add up to ``topP`` or higher are kept for generation.
271
+
272
+ Parameters
273
+ ----------
274
+ value : float
275
+ Cumulative probability for vocabulary tokens
276
+ """
277
+ return self._set(topP=value)
278
+
279
+ def setRepetitionPenalty(self, value):
280
+ """Sets the parameter for repetition penalty. 1.0 means no penalty.
281
+
282
+ Parameters
283
+ ----------
284
+ value : float
285
+ The repetition penalty
286
+
287
+ References
288
+ ----------
289
+ See `Ctrl: A Conditional Transformer Language Model For Controllable
290
+ Generation <https://arxiv.org/pdf/1909.05858.pdf>`__ for more details.
291
+ """
292
+ return self._set(repetitionPenalty=value)
293
+
294
+ def setNoRepeatNgramSize(self, value):
295
+ """Sets size of n-grams that can only occur once.
296
+
297
+ If set to int > 0, all ngrams of that size can only occur once.
298
+
299
+ Parameters
300
+ ----------
301
+ value : int
302
+ N-gram size can only occur once
303
+ """
304
+ return self._set(noRepeatNgramSize=value)
305
+
306
+ def setBeamSize(self, value):
307
+ """Sets the number of beams to use for beam search.
308
+
309
+ Parameters
310
+ ----------
311
+ value : int
312
+ The number of beams to use for beam search
313
+ """
314
+ return self._set(beamSize=value)
315
+
316
+ def setStopTokenIds(self, value):
317
+ """Sets a list of token ids which are considered as stop tokens in the decoder's output.
318
+
319
+ Parameters
320
+ ----------
321
+ value : List[int]
322
+ The words to be considered as stop tokens
323
+ """
324
+ return self._set(stopTokenIds=value)
325
+
326
+ @keyword_only
327
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.seq2seq.Phi4Transformer", java_model=None):
328
+ super(Phi4Transformer, self).__init__(
329
+ classname=classname,
330
+ java_model=java_model
331
+ )
332
+ self._setDefault(
333
+ minOutputLength=0,
334
+ maxOutputLength=20,
335
+ doSample=False,
336
+ temperature=0.6,
337
+ topK=-1,
338
+ topP=0.9,
339
+ repetitionPenalty=1.0,
340
+ noRepeatNgramSize=3,
341
+ ignoreTokenIds=[],
342
+ batchSize=1,
343
+ beamSize=1,
344
+ stopTokenIds=[128001,]
345
+ )
346
+
347
+ @staticmethod
348
+ def loadSavedModel(folder, spark_session, use_openvino = False):
349
+ """Loads a locally saved model.
350
+
351
+ Parameters
352
+ ----------
353
+ folder : str
354
+ Folder of the saved model
355
+ spark_session : pyspark.sql.SparkSession
356
+ The current SparkSession
357
+
358
+ Returns
359
+ -------
360
+ Phi4Transformer
361
+ The restored model
362
+ """
363
+ from sparknlp.internal import _Phi4Loader
364
+ jModel = _Phi4Loader(folder, spark_session._jsparkSession, use_openvino)._java_obj
365
+ return Phi4Transformer(java_model=jModel)
366
+
367
+ @staticmethod
368
+ def pretrained(name="phi-4", lang="en", remote_loc=None):
369
+ """Downloads and loads a pretrained model.
370
+
371
+ Parameters
372
+ ----------
373
+ name : str, optional
374
+ Name of the pretrained model, by default "phi-4"
375
+ lang : str, optional
376
+ Language of the pretrained model, by default "en"
377
+ remote_loc : str, optional
378
+ Optional remote address of the resource, by default None. Will use
379
+ Spark NLPs repositories otherwise.
380
+
381
+ Returns
382
+ -------
383
+ Phi4Transformer
384
+ The restored model
385
+ """
386
+ from sparknlp.pretrained import ResourceDownloader
387
+ return ResourceDownloader.downloadModel(Phi4Transformer, name, lang, remote_loc)