spark-nlp 5.5.3__tar.gz → 6.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/PKG-INFO +20 -11
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/README.md +19 -10
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/setup.py +1 -1
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/spark_nlp.egg-info/PKG-INFO +20 -11
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/spark_nlp.egg-info/SOURCES.txt +20 -1
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/__init__.py +2 -2
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/__init__.py +4 -0
- spark_nlp-6.0.1/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +2 -2
- spark_nlp-6.0.1/sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
- spark_nlp-6.0.1/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
- spark_nlp-6.0.1/sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
- spark_nlp-6.0.1/sparknlp/annotator/cleaners/__init__.py +15 -0
- spark_nlp-6.0.1/sparknlp/annotator/cleaners/cleaner.py +202 -0
- spark_nlp-6.0.1/sparknlp/annotator/cleaners/extractor.py +191 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/cv/__init__.py +9 -1
- spark_nlp-6.0.1/sparknlp/annotator/cv/gemma3_for_multimodal.py +351 -0
- spark_nlp-6.0.1/sparknlp/annotator/cv/janus_for_multimodal.py +356 -0
- spark_nlp-6.0.1/sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
- spark_nlp-6.0.1/sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
- spark_nlp-6.0.1/sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
- spark_nlp-6.0.1/sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
- spark_nlp-6.0.1/sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
- spark_nlp-6.0.1/sparknlp/annotator/cv/smolvlm_transformer.py +432 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +10 -6
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/__init__.py +3 -0
- spark_nlp-6.0.1/sparknlp/annotator/seq2seq/auto_gguf_model.py +299 -0
- spark_nlp-6.0.1/sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +333 -0
- spark_nlp-6.0.1/sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/llama3_transformer.py +4 -4
- spark_nlp-6.0.1/sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/image_assembler.py +58 -0
- spark-nlp-5.5.3/sparknlp/annotator/seq2seq/auto_gguf_model.py → spark_nlp-6.0.1/sparknlp/common/properties.py +755 -280
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/internal/__init__.py +127 -2
- spark_nlp-6.0.1/sparknlp/reader/enums.py +19 -0
- spark_nlp-6.0.1/sparknlp/reader/pdf_to_text.py +111 -0
- spark_nlp-6.0.1/sparknlp/reader/sparknlp_reader.py +321 -0
- spark-nlp-5.5.3/spark_nlp.egg-info/.uuid +0 -1
- spark-nlp-5.5.3/sparknlp/common/properties.py +0 -760
- spark-nlp-5.5.3/sparknlp/reader/sparknlp_reader.py +0 -113
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/com/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/com/johnsnowlabs/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/com/johnsnowlabs/ml/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/com/johnsnowlabs/nlp/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/setup.cfg +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/spark_nlp.egg-info/dependency_links.txt +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/spark_nlp.egg-info/top_level.txt +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotation.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotation_audio.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotation_image.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/audio/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/chunk2_doc.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/chunker.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/coref/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/date2_chunk.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/dependency/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/document_character_text_splitter.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/document_normalizer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/document_token_splitter.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/document_token_splitter_test.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/word2vec.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/er/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/er/entity_ruler.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/graph_extraction.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/lemmatizer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/n_gram_generator.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ner/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_approach.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_converter.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_crf.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_dl.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/normalizer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/openai/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/openai/openai_completion.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/param/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/pos/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/pos/perceptron.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/sentence/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/sentiment/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/similarity/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/spell_check/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/stemmer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/token/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/token/tokenizer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/token2_chunk.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ws/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/audio_assembler.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/doc2_chunk.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/document_assembler.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/embeddings_finisher.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/finisher.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/graph_finisher.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/has_recursive_fit.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/has_recursive_transform.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/light_pipeline.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/multi_document_assembler.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/prompt_assembler.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/recursive_pipeline.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/table_assembler.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/base/token_assembler.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/annotator_approach.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/annotator_model.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/annotator_properties.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/annotator_type.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/coverage_result.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/match_strategy.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/read_as.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/recursive_annotator_approach.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/storage.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/common/utils.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/functions.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/internal/annotator_java_ml.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/internal/annotator_transformer.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/internal/extended_java_wrapper.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/internal/params_getters_setters.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/internal/recursive.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/logging/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/logging/comet.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/pretrained/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/pretrained/resource_downloader.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/pretrained/utils.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/reader/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/conll.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/conllu.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/pos.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/pub_tator.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/spacy_to_annotation.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/training/tfgraphs.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/upload_to_hub.py +0 -0
- {spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version:
|
|
3
|
+
Version: 6.0.1
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -87,7 +87,7 @@ documentation and examples
|
|
|
87
87
|
|
|
88
88
|
## Quick Start
|
|
89
89
|
|
|
90
|
-
This is a quick example of how to use Spark NLP pre-trained pipeline in Python and PySpark:
|
|
90
|
+
This is a quick example of how to use a Spark NLP pre-trained pipeline in Python and PySpark:
|
|
91
91
|
|
|
92
92
|
```sh
|
|
93
93
|
$ java -version
|
|
@@ -95,7 +95,7 @@ $ java -version
|
|
|
95
95
|
$ conda create -n sparknlp python=3.7 -y
|
|
96
96
|
$ conda activate sparknlp
|
|
97
97
|
# spark-nlp by default is based on pyspark 3.x
|
|
98
|
-
$ pip install spark-nlp==
|
|
98
|
+
$ pip install spark-nlp==6.0.1 pyspark==3.3.1
|
|
99
99
|
```
|
|
100
100
|
|
|
101
101
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -161,10 +161,11 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
161
161
|
|
|
162
162
|
### Apache Spark Support
|
|
163
163
|
|
|
164
|
-
Spark NLP *
|
|
164
|
+
Spark NLP *6.0.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
165
165
|
|
|
166
166
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
167
167
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
168
|
+
| 6.0.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
168
169
|
| 5.5.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
169
170
|
| 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
170
171
|
| 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
@@ -178,6 +179,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
178
179
|
|
|
179
180
|
| Spark NLP | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10| Scala 2.11 | Scala 2.12 |
|
|
180
181
|
|-----------|------------|------------|------------|------------|------------|------------|------------|
|
|
182
|
+
| 6.0.x | NO | YES | YES | YES | YES | NO | YES |
|
|
181
183
|
| 5.5.x | NO | YES | YES | YES | YES | NO | YES |
|
|
182
184
|
| 5.4.x | NO | YES | YES | YES | YES | NO | YES |
|
|
183
185
|
| 5.3.x | NO | YES | YES | YES | YES | NO | YES |
|
|
@@ -189,7 +191,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
189
191
|
|
|
190
192
|
### Databricks Support
|
|
191
193
|
|
|
192
|
-
Spark NLP
|
|
194
|
+
Spark NLP 6.0.1 has been tested and is compatible with the following runtimes:
|
|
193
195
|
|
|
194
196
|
| **CPU** | **GPU** |
|
|
195
197
|
|--------------------|--------------------|
|
|
@@ -206,7 +208,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
206
208
|
|
|
207
209
|
### EMR Support
|
|
208
210
|
|
|
209
|
-
Spark NLP
|
|
211
|
+
Spark NLP 6.0.1 has been tested and is compatible with the following EMR releases:
|
|
210
212
|
|
|
211
213
|
| **EMR Release** |
|
|
212
214
|
|--------------------|
|
|
@@ -216,6 +218,13 @@ Spark NLP 5.5.3 has been tested and is compatible with the following EMR release
|
|
|
216
218
|
| emr-7.0.0 |
|
|
217
219
|
| emr-7.1.0 |
|
|
218
220
|
| emr-7.2.0 |
|
|
221
|
+
| emr-7.3.0 |
|
|
222
|
+
| emr-7.4.0 |
|
|
223
|
+
| emr-7.5.0 |
|
|
224
|
+
| emr-7.6.0 |
|
|
225
|
+
| emr-7.7.0 |
|
|
226
|
+
| emr-7.8.0 |
|
|
227
|
+
|
|
219
228
|
|
|
220
229
|
We are compatible with older EMR releases. For a full list check EMR support in our official [documentation](https://sparknlp.org/docs/en/install#emr-support)
|
|
221
230
|
|
|
@@ -237,7 +246,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
|
|
|
237
246
|
from our official documentation.
|
|
238
247
|
|
|
239
248
|
If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
|
|
240
|
-
projects [Spark NLP
|
|
249
|
+
projects [Spark NLP Starter](https://github.com/maziyarpanahi/spark-nlp-starter)
|
|
241
250
|
|
|
242
251
|
### Python
|
|
243
252
|
|
|
@@ -246,7 +255,7 @@ Check all available installations for Python in our official [documentation](htt
|
|
|
246
255
|
|
|
247
256
|
### Compiled JARs
|
|
248
257
|
|
|
249
|
-
To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official
|
|
258
|
+
To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official documentation
|
|
250
259
|
|
|
251
260
|
## Platform-Specific Instructions
|
|
252
261
|
|
|
@@ -266,7 +275,7 @@ For detailed instructions on how to use Spark NLP on supported platforms, please
|
|
|
266
275
|
|
|
267
276
|
Spark NLP library and all the pre-trained models/pipelines can be used entirely offline with no access to the Internet.
|
|
268
277
|
Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation
|
|
269
|
-
to use Spark NLP offline
|
|
278
|
+
to use Spark NLP offline.
|
|
270
279
|
|
|
271
280
|
## Advanced Settings
|
|
272
281
|
|
|
@@ -282,7 +291,7 @@ In Spark NLP we can define S3 locations to:
|
|
|
282
291
|
|
|
283
292
|
Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
|
|
284
293
|
|
|
285
|
-
##
|
|
294
|
+
## Documentation
|
|
286
295
|
|
|
287
296
|
### Examples
|
|
288
297
|
|
|
@@ -315,7 +324,7 @@ the Spark NLP library:
|
|
|
315
324
|
keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
|
|
316
325
|
abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
|
|
317
326
|
}
|
|
318
|
-
}
|
|
327
|
+
}
|
|
319
328
|
```
|
|
320
329
|
|
|
321
330
|
## Community support
|
|
@@ -55,7 +55,7 @@ documentation and examples
|
|
|
55
55
|
|
|
56
56
|
## Quick Start
|
|
57
57
|
|
|
58
|
-
This is a quick example of how to use Spark NLP pre-trained pipeline in Python and PySpark:
|
|
58
|
+
This is a quick example of how to use a Spark NLP pre-trained pipeline in Python and PySpark:
|
|
59
59
|
|
|
60
60
|
```sh
|
|
61
61
|
$ java -version
|
|
@@ -63,7 +63,7 @@ $ java -version
|
|
|
63
63
|
$ conda create -n sparknlp python=3.7 -y
|
|
64
64
|
$ conda activate sparknlp
|
|
65
65
|
# spark-nlp by default is based on pyspark 3.x
|
|
66
|
-
$ pip install spark-nlp==
|
|
66
|
+
$ pip install spark-nlp==6.0.1 pyspark==3.3.1
|
|
67
67
|
```
|
|
68
68
|
|
|
69
69
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -129,10 +129,11 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
129
129
|
|
|
130
130
|
### Apache Spark Support
|
|
131
131
|
|
|
132
|
-
Spark NLP *
|
|
132
|
+
Spark NLP *6.0.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
133
133
|
|
|
134
134
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
135
135
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
136
|
+
| 6.0.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
136
137
|
| 5.5.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
137
138
|
| 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
138
139
|
| 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
@@ -146,6 +147,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
146
147
|
|
|
147
148
|
| Spark NLP | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10| Scala 2.11 | Scala 2.12 |
|
|
148
149
|
|-----------|------------|------------|------------|------------|------------|------------|------------|
|
|
150
|
+
| 6.0.x | NO | YES | YES | YES | YES | NO | YES |
|
|
149
151
|
| 5.5.x | NO | YES | YES | YES | YES | NO | YES |
|
|
150
152
|
| 5.4.x | NO | YES | YES | YES | YES | NO | YES |
|
|
151
153
|
| 5.3.x | NO | YES | YES | YES | YES | NO | YES |
|
|
@@ -157,7 +159,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
157
159
|
|
|
158
160
|
### Databricks Support
|
|
159
161
|
|
|
160
|
-
Spark NLP
|
|
162
|
+
Spark NLP 6.0.1 has been tested and is compatible with the following runtimes:
|
|
161
163
|
|
|
162
164
|
| **CPU** | **GPU** |
|
|
163
165
|
|--------------------|--------------------|
|
|
@@ -174,7 +176,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
174
176
|
|
|
175
177
|
### EMR Support
|
|
176
178
|
|
|
177
|
-
Spark NLP
|
|
179
|
+
Spark NLP 6.0.1 has been tested and is compatible with the following EMR releases:
|
|
178
180
|
|
|
179
181
|
| **EMR Release** |
|
|
180
182
|
|--------------------|
|
|
@@ -184,6 +186,13 @@ Spark NLP 5.5.3 has been tested and is compatible with the following EMR release
|
|
|
184
186
|
| emr-7.0.0 |
|
|
185
187
|
| emr-7.1.0 |
|
|
186
188
|
| emr-7.2.0 |
|
|
189
|
+
| emr-7.3.0 |
|
|
190
|
+
| emr-7.4.0 |
|
|
191
|
+
| emr-7.5.0 |
|
|
192
|
+
| emr-7.6.0 |
|
|
193
|
+
| emr-7.7.0 |
|
|
194
|
+
| emr-7.8.0 |
|
|
195
|
+
|
|
187
196
|
|
|
188
197
|
We are compatible with older EMR releases. For a full list check EMR support in our official [documentation](https://sparknlp.org/docs/en/install#emr-support)
|
|
189
198
|
|
|
@@ -205,7 +214,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
|
|
|
205
214
|
from our official documentation.
|
|
206
215
|
|
|
207
216
|
If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
|
|
208
|
-
projects [Spark NLP
|
|
217
|
+
projects [Spark NLP Starter](https://github.com/maziyarpanahi/spark-nlp-starter)
|
|
209
218
|
|
|
210
219
|
### Python
|
|
211
220
|
|
|
@@ -214,7 +223,7 @@ Check all available installations for Python in our official [documentation](htt
|
|
|
214
223
|
|
|
215
224
|
### Compiled JARs
|
|
216
225
|
|
|
217
|
-
To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official
|
|
226
|
+
To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official documentation
|
|
218
227
|
|
|
219
228
|
## Platform-Specific Instructions
|
|
220
229
|
|
|
@@ -234,7 +243,7 @@ For detailed instructions on how to use Spark NLP on supported platforms, please
|
|
|
234
243
|
|
|
235
244
|
Spark NLP library and all the pre-trained models/pipelines can be used entirely offline with no access to the Internet.
|
|
236
245
|
Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation
|
|
237
|
-
to use Spark NLP offline
|
|
246
|
+
to use Spark NLP offline.
|
|
238
247
|
|
|
239
248
|
## Advanced Settings
|
|
240
249
|
|
|
@@ -250,7 +259,7 @@ In Spark NLP we can define S3 locations to:
|
|
|
250
259
|
|
|
251
260
|
Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
|
|
252
261
|
|
|
253
|
-
##
|
|
262
|
+
## Documentation
|
|
254
263
|
|
|
255
264
|
### Examples
|
|
256
265
|
|
|
@@ -283,7 +292,7 @@ the Spark NLP library:
|
|
|
283
292
|
keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
|
|
284
293
|
abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
|
|
285
294
|
}
|
|
286
|
-
}
|
|
295
|
+
}
|
|
287
296
|
```
|
|
288
297
|
|
|
289
298
|
## Community support
|
|
@@ -41,7 +41,7 @@ setup(
|
|
|
41
41
|
# project code, see
|
|
42
42
|
# https://packaging.python.org/en/latest/single_source_version.html
|
|
43
43
|
|
|
44
|
-
version='
|
|
44
|
+
version='6.0.1', # Required
|
|
45
45
|
|
|
46
46
|
# This is a one-line description or tagline of what your project does. This
|
|
47
47
|
# corresponds to the 'Summary' metadata field:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version:
|
|
3
|
+
Version: 6.0.1
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -87,7 +87,7 @@ documentation and examples
|
|
|
87
87
|
|
|
88
88
|
## Quick Start
|
|
89
89
|
|
|
90
|
-
This is a quick example of how to use Spark NLP pre-trained pipeline in Python and PySpark:
|
|
90
|
+
This is a quick example of how to use a Spark NLP pre-trained pipeline in Python and PySpark:
|
|
91
91
|
|
|
92
92
|
```sh
|
|
93
93
|
$ java -version
|
|
@@ -95,7 +95,7 @@ $ java -version
|
|
|
95
95
|
$ conda create -n sparknlp python=3.7 -y
|
|
96
96
|
$ conda activate sparknlp
|
|
97
97
|
# spark-nlp by default is based on pyspark 3.x
|
|
98
|
-
$ pip install spark-nlp==
|
|
98
|
+
$ pip install spark-nlp==6.0.1 pyspark==3.3.1
|
|
99
99
|
```
|
|
100
100
|
|
|
101
101
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -161,10 +161,11 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
161
161
|
|
|
162
162
|
### Apache Spark Support
|
|
163
163
|
|
|
164
|
-
Spark NLP *
|
|
164
|
+
Spark NLP *6.0.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
165
165
|
|
|
166
166
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
167
167
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
168
|
+
| 6.0.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
168
169
|
| 5.5.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
169
170
|
| 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
170
171
|
| 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
@@ -178,6 +179,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
178
179
|
|
|
179
180
|
| Spark NLP | Python 3.6 | Python 3.7 | Python 3.8 | Python 3.9 | Python 3.10| Scala 2.11 | Scala 2.12 |
|
|
180
181
|
|-----------|------------|------------|------------|------------|------------|------------|------------|
|
|
182
|
+
| 6.0.x | NO | YES | YES | YES | YES | NO | YES |
|
|
181
183
|
| 5.5.x | NO | YES | YES | YES | YES | NO | YES |
|
|
182
184
|
| 5.4.x | NO | YES | YES | YES | YES | NO | YES |
|
|
183
185
|
| 5.3.x | NO | YES | YES | YES | YES | NO | YES |
|
|
@@ -189,7 +191,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
189
191
|
|
|
190
192
|
### Databricks Support
|
|
191
193
|
|
|
192
|
-
Spark NLP
|
|
194
|
+
Spark NLP 6.0.1 has been tested and is compatible with the following runtimes:
|
|
193
195
|
|
|
194
196
|
| **CPU** | **GPU** |
|
|
195
197
|
|--------------------|--------------------|
|
|
@@ -206,7 +208,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
206
208
|
|
|
207
209
|
### EMR Support
|
|
208
210
|
|
|
209
|
-
Spark NLP
|
|
211
|
+
Spark NLP 6.0.1 has been tested and is compatible with the following EMR releases:
|
|
210
212
|
|
|
211
213
|
| **EMR Release** |
|
|
212
214
|
|--------------------|
|
|
@@ -216,6 +218,13 @@ Spark NLP 5.5.3 has been tested and is compatible with the following EMR release
|
|
|
216
218
|
| emr-7.0.0 |
|
|
217
219
|
| emr-7.1.0 |
|
|
218
220
|
| emr-7.2.0 |
|
|
221
|
+
| emr-7.3.0 |
|
|
222
|
+
| emr-7.4.0 |
|
|
223
|
+
| emr-7.5.0 |
|
|
224
|
+
| emr-7.6.0 |
|
|
225
|
+
| emr-7.7.0 |
|
|
226
|
+
| emr-7.8.0 |
|
|
227
|
+
|
|
219
228
|
|
|
220
229
|
We are compatible with older EMR releases. For a full list check EMR support in our official [documentation](https://sparknlp.org/docs/en/install#emr-support)
|
|
221
230
|
|
|
@@ -237,7 +246,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
|
|
|
237
246
|
from our official documentation.
|
|
238
247
|
|
|
239
248
|
If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
|
|
240
|
-
projects [Spark NLP
|
|
249
|
+
projects [Spark NLP Starter](https://github.com/maziyarpanahi/spark-nlp-starter)
|
|
241
250
|
|
|
242
251
|
### Python
|
|
243
252
|
|
|
@@ -246,7 +255,7 @@ Check all available installations for Python in our official [documentation](htt
|
|
|
246
255
|
|
|
247
256
|
### Compiled JARs
|
|
248
257
|
|
|
249
|
-
To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official
|
|
258
|
+
To compile the jars from source follow [these instructions](https://sparknlp.org/docs/en/compiled#jars) from our official documentation
|
|
250
259
|
|
|
251
260
|
## Platform-Specific Instructions
|
|
252
261
|
|
|
@@ -266,7 +275,7 @@ For detailed instructions on how to use Spark NLP on supported platforms, please
|
|
|
266
275
|
|
|
267
276
|
Spark NLP library and all the pre-trained models/pipelines can be used entirely offline with no access to the Internet.
|
|
268
277
|
Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation
|
|
269
|
-
to use Spark NLP offline
|
|
278
|
+
to use Spark NLP offline.
|
|
270
279
|
|
|
271
280
|
## Advanced Settings
|
|
272
281
|
|
|
@@ -282,7 +291,7 @@ In Spark NLP we can define S3 locations to:
|
|
|
282
291
|
|
|
283
292
|
Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
|
|
284
293
|
|
|
285
|
-
##
|
|
294
|
+
## Documentation
|
|
286
295
|
|
|
287
296
|
### Examples
|
|
288
297
|
|
|
@@ -315,7 +324,7 @@ the Spark NLP library:
|
|
|
315
324
|
keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
|
|
316
325
|
abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
|
|
317
326
|
}
|
|
318
|
-
}
|
|
327
|
+
}
|
|
319
328
|
```
|
|
320
329
|
|
|
321
330
|
## Community support
|
|
@@ -6,7 +6,6 @@ com/johnsnowlabs/__init__.py
|
|
|
6
6
|
com/johnsnowlabs/ml/__init__.py
|
|
7
7
|
com/johnsnowlabs/ml/ai/__init__.py
|
|
8
8
|
com/johnsnowlabs/nlp/__init__.py
|
|
9
|
-
spark_nlp.egg-info/.uuid
|
|
10
9
|
spark_nlp.egg-info/PKG-INFO
|
|
11
10
|
spark_nlp.egg-info/SOURCES.txt
|
|
12
11
|
spark_nlp.egg-info/dependency_links.txt
|
|
@@ -39,6 +38,7 @@ sparknlp/annotator/audio/hubert_for_ctc.py
|
|
|
39
38
|
sparknlp/annotator/audio/wav2vec2_for_ctc.py
|
|
40
39
|
sparknlp/annotator/audio/whisper_for_ctc.py
|
|
41
40
|
sparknlp/annotator/classifier_dl/__init__.py
|
|
41
|
+
sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py
|
|
42
42
|
sparknlp/annotator/classifier_dl/albert_for_question_answering.py
|
|
43
43
|
sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py
|
|
44
44
|
sparknlp/annotator/classifier_dl/albert_for_token_classification.py
|
|
@@ -62,6 +62,7 @@ sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py
|
|
|
62
62
|
sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py
|
|
63
63
|
sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py
|
|
64
64
|
sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py
|
|
65
|
+
sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py
|
|
65
66
|
sparknlp/annotator/classifier_dl/longformer_for_question_answering.py
|
|
66
67
|
sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py
|
|
67
68
|
sparknlp/annotator/classifier_dl/longformer_for_token_classification.py
|
|
@@ -69,24 +70,37 @@ sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py
|
|
|
69
70
|
sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py
|
|
70
71
|
sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py
|
|
71
72
|
sparknlp/annotator/classifier_dl/multi_classifier_dl.py
|
|
73
|
+
sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py
|
|
72
74
|
sparknlp/annotator/classifier_dl/roberta_for_question_answering.py
|
|
73
75
|
sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py
|
|
74
76
|
sparknlp/annotator/classifier_dl/roberta_for_token_classification.py
|
|
75
77
|
sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py
|
|
76
78
|
sparknlp/annotator/classifier_dl/sentiment_dl.py
|
|
77
79
|
sparknlp/annotator/classifier_dl/tapas_for_question_answering.py
|
|
80
|
+
sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py
|
|
78
81
|
sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py
|
|
79
82
|
sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py
|
|
80
83
|
sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py
|
|
81
84
|
sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py
|
|
82
85
|
sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py
|
|
83
86
|
sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py
|
|
87
|
+
sparknlp/annotator/cleaners/__init__.py
|
|
88
|
+
sparknlp/annotator/cleaners/cleaner.py
|
|
89
|
+
sparknlp/annotator/cleaners/extractor.py
|
|
84
90
|
sparknlp/annotator/coref/__init__.py
|
|
85
91
|
sparknlp/annotator/coref/spanbert_coref.py
|
|
86
92
|
sparknlp/annotator/cv/__init__.py
|
|
87
93
|
sparknlp/annotator/cv/blip_for_question_answering.py
|
|
88
94
|
sparknlp/annotator/cv/clip_for_zero_shot_classification.py
|
|
89
95
|
sparknlp/annotator/cv/convnext_for_image_classification.py
|
|
96
|
+
sparknlp/annotator/cv/gemma3_for_multimodal.py
|
|
97
|
+
sparknlp/annotator/cv/janus_for_multimodal.py
|
|
98
|
+
sparknlp/annotator/cv/llava_for_multimodal.py
|
|
99
|
+
sparknlp/annotator/cv/mllama_for_multimodal.py
|
|
100
|
+
sparknlp/annotator/cv/paligemma_for_multimodal.py
|
|
101
|
+
sparknlp/annotator/cv/phi3_vision_for_multimodal.py
|
|
102
|
+
sparknlp/annotator/cv/qwen2vl_transformer.py
|
|
103
|
+
sparknlp/annotator/cv/smolvlm_transformer.py
|
|
90
104
|
sparknlp/annotator/cv/swin_for_image_classification.py
|
|
91
105
|
sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py
|
|
92
106
|
sparknlp/annotator/cv/vit_for_image_classification.py
|
|
@@ -157,7 +171,9 @@ sparknlp/annotator/sentiment/sentiment_detector.py
|
|
|
157
171
|
sparknlp/annotator/sentiment/vivekn_sentiment.py
|
|
158
172
|
sparknlp/annotator/seq2seq/__init__.py
|
|
159
173
|
sparknlp/annotator/seq2seq/auto_gguf_model.py
|
|
174
|
+
sparknlp/annotator/seq2seq/auto_gguf_vision_model.py
|
|
160
175
|
sparknlp/annotator/seq2seq/bart_transformer.py
|
|
176
|
+
sparknlp/annotator/seq2seq/cohere_transformer.py
|
|
161
177
|
sparknlp/annotator/seq2seq/cpm_transformer.py
|
|
162
178
|
sparknlp/annotator/seq2seq/gpt2_transformer.py
|
|
163
179
|
sparknlp/annotator/seq2seq/llama2_transformer.py
|
|
@@ -166,6 +182,7 @@ sparknlp/annotator/seq2seq/m2m100_transformer.py
|
|
|
166
182
|
sparknlp/annotator/seq2seq/marian_transformer.py
|
|
167
183
|
sparknlp/annotator/seq2seq/mistral_transformer.py
|
|
168
184
|
sparknlp/annotator/seq2seq/nllb_transformer.py
|
|
185
|
+
sparknlp/annotator/seq2seq/olmo_transformer.py
|
|
169
186
|
sparknlp/annotator/seq2seq/phi2_transformer.py
|
|
170
187
|
sparknlp/annotator/seq2seq/phi3_transformer.py
|
|
171
188
|
sparknlp/annotator/seq2seq/qwen_transformer.py
|
|
@@ -225,6 +242,8 @@ sparknlp/pretrained/pretrained_pipeline.py
|
|
|
225
242
|
sparknlp/pretrained/resource_downloader.py
|
|
226
243
|
sparknlp/pretrained/utils.py
|
|
227
244
|
sparknlp/reader/__init__.py
|
|
245
|
+
sparknlp/reader/enums.py
|
|
246
|
+
sparknlp/reader/pdf_to_text.py
|
|
228
247
|
sparknlp/reader/sparknlp_reader.py
|
|
229
248
|
sparknlp/training/__init__.py
|
|
230
249
|
sparknlp/training/conll.py
|
|
@@ -132,7 +132,7 @@ def start(gpu=False,
|
|
|
132
132
|
The initiated Spark session.
|
|
133
133
|
|
|
134
134
|
"""
|
|
135
|
-
current_version = "
|
|
135
|
+
current_version = "6.0.1"
|
|
136
136
|
|
|
137
137
|
if params is None:
|
|
138
138
|
params = {}
|
|
@@ -316,4 +316,4 @@ def version():
|
|
|
316
316
|
str
|
|
317
317
|
The current Spark NLP version.
|
|
318
318
|
"""
|
|
319
|
-
return '
|
|
319
|
+
return '6.0.1'
|
|
@@ -55,3 +55,7 @@ from sparknlp.annotator.classifier_dl.mpnet_for_token_classification import *
|
|
|
55
55
|
from sparknlp.annotator.classifier_dl.albert_for_zero_shot_classification import *
|
|
56
56
|
from sparknlp.annotator.classifier_dl.camembert_for_zero_shot_classification import *
|
|
57
57
|
from sparknlp.annotator.classifier_dl.bert_for_multiple_choice import *
|
|
58
|
+
from sparknlp.annotator.classifier_dl.xlm_roberta_for_multiple_choice import *
|
|
59
|
+
from sparknlp.annotator.classifier_dl.roberta_for_multiple_choice import *
|
|
60
|
+
from sparknlp.annotator.classifier_dl.distilbert_for_multiple_choice import *
|
|
61
|
+
from sparknlp.annotator.classifier_dl.albert_for_multiple_choice import *
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# Copyright 2017-2024 John Snow Labs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from sparknlp.common import *
|
|
16
|
+
|
|
17
|
+
class AlbertForMultipleChoice(AnnotatorModel,
|
|
18
|
+
HasCaseSensitiveProperties,
|
|
19
|
+
HasBatchedAnnotate,
|
|
20
|
+
HasEngine,
|
|
21
|
+
HasMaxSentenceLengthLimit):
|
|
22
|
+
"""AlbertForMultipleChoice can load ALBERT Models with a multiple choice classification head on top
|
|
23
|
+
(a linear layer on top of the pooled output and a softmax) e.g. for RocStories/SWAG tasks.
|
|
24
|
+
|
|
25
|
+
Pretrained models can be loaded with :meth:`.pretrained` of the companion
|
|
26
|
+
object:
|
|
27
|
+
|
|
28
|
+
>>> spanClassifier = AlbertForMultipleChoice.pretrained() \\
|
|
29
|
+
... .setInputCols(["document_question", "document_context"]) \\
|
|
30
|
+
... .setOutputCol("answer")
|
|
31
|
+
|
|
32
|
+
The default model is ``"albert_base_uncased_multiple_choice"``, if no name is
|
|
33
|
+
provided.
|
|
34
|
+
|
|
35
|
+
For available pretrained models please see the `Models Hub
|
|
36
|
+
<https://sparknlp.org/models?task=Multiple+Choice>`__.
|
|
37
|
+
|
|
38
|
+
To see which models are compatible and how to import them see
|
|
39
|
+
`Import Transformers into Spark NLP 🚀
|
|
40
|
+
<https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
|
|
41
|
+
|
|
42
|
+
====================== ======================
|
|
43
|
+
Input Annotation types Output Annotation type
|
|
44
|
+
====================== ======================
|
|
45
|
+
``DOCUMENT, DOCUMENT`` ``CHUNK``
|
|
46
|
+
====================== ======================
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
batchSize
|
|
51
|
+
Batch size. Large values allows faster processing but requires more
|
|
52
|
+
memory, by default 8
|
|
53
|
+
caseSensitive
|
|
54
|
+
Whether to ignore case in tokens for embeddings matching, by default
|
|
55
|
+
False
|
|
56
|
+
maxSentenceLength
|
|
57
|
+
Max sentence length to process, by default 512
|
|
58
|
+
|
|
59
|
+
Examples
|
|
60
|
+
--------
|
|
61
|
+
>>> import sparknlp
|
|
62
|
+
>>> from sparknlp.base import *
|
|
63
|
+
>>> from sparknlp.annotator import *
|
|
64
|
+
>>> from pyspark.ml import Pipeline
|
|
65
|
+
>>> documentAssembler = MultiDocumentAssembler() \\
|
|
66
|
+
... .setInputCols(["question", "context"]) \\
|
|
67
|
+
... .setOutputCols(["document_question", "document_context"])
|
|
68
|
+
>>> questionAnswering = AlbertForMultipleChoice.pretrained() \\
|
|
69
|
+
... .setInputCols(["document_question", "document_context"]) \\
|
|
70
|
+
... .setOutputCol("answer") \\
|
|
71
|
+
... .setCaseSensitive(False)
|
|
72
|
+
>>> pipeline = Pipeline().setStages([
|
|
73
|
+
... documentAssembler,
|
|
74
|
+
... questionAnswering
|
|
75
|
+
... ])
|
|
76
|
+
>>> data = spark.createDataFrame([["The Eiffel Tower is located in which country??", "Germany, France, Italy"]]).toDF("question", "context")
|
|
77
|
+
>>> result = pipeline.fit(data).transform(data)
|
|
78
|
+
>>> result.select("answer.result").show(truncate=False)
|
|
79
|
+
+--------------------+
|
|
80
|
+
|result |
|
|
81
|
+
+--------------------+
|
|
82
|
+
|[France] |
|
|
83
|
+
+--------------------+
|
|
84
|
+
"""
|
|
85
|
+
name = "AlbertForMultipleChoice"
|
|
86
|
+
|
|
87
|
+
inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]
|
|
88
|
+
|
|
89
|
+
outputAnnotatorType = AnnotatorType.CHUNK
|
|
90
|
+
|
|
91
|
+
choicesDelimiter = Param(Params._dummy(),
|
|
92
|
+
"choicesDelimiter",
|
|
93
|
+
"Delimiter character use to split the choices",
|
|
94
|
+
TypeConverters.toString)
|
|
95
|
+
|
|
96
|
+
def setChoicesDelimiter(self, value):
|
|
97
|
+
"""Sets delimiter character use to split the choices
|
|
98
|
+
|
|
99
|
+
Parameters
|
|
100
|
+
----------
|
|
101
|
+
value : string
|
|
102
|
+
Delimiter character use to split the choices
|
|
103
|
+
"""
|
|
104
|
+
return self._set(caseSensitive=value)
|
|
105
|
+
|
|
106
|
+
@keyword_only
|
|
107
|
+
def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.AlbertForMultipleChoice",
|
|
108
|
+
java_model=None):
|
|
109
|
+
super(AlbertForMultipleChoice, self).__init__(
|
|
110
|
+
classname=classname,
|
|
111
|
+
java_model=java_model
|
|
112
|
+
)
|
|
113
|
+
self._setDefault(
|
|
114
|
+
batchSize=4,
|
|
115
|
+
maxSentenceLength=512,
|
|
116
|
+
caseSensitive=False,
|
|
117
|
+
choicesDelimiter = ","
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
@staticmethod
|
|
121
|
+
def loadSavedModel(folder, spark_session):
|
|
122
|
+
"""Loads a locally saved model.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
folder : str
|
|
127
|
+
Folder of the saved model
|
|
128
|
+
spark_session : pyspark.sql.SparkSession
|
|
129
|
+
The current SparkSession
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
BertForQuestionAnswering
|
|
134
|
+
The restored model
|
|
135
|
+
"""
|
|
136
|
+
from sparknlp.internal import _AlbertMultipleChoiceLoader
|
|
137
|
+
jModel = _AlbertMultipleChoiceLoader(folder, spark_session._jsparkSession)._java_obj
|
|
138
|
+
return AlbertForMultipleChoice(java_model=jModel)
|
|
139
|
+
|
|
140
|
+
@staticmethod
|
|
141
|
+
def pretrained(name="albert_base_uncased_multiple_choice", lang="en", remote_loc=None):
|
|
142
|
+
"""Downloads and loads a pretrained model.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
name : str, optional
|
|
147
|
+
Name of the pretrained model, by default
|
|
148
|
+
"bert_base_uncased_multiple_choice"
|
|
149
|
+
lang : str, optional
|
|
150
|
+
Language of the pretrained model, by default "en"
|
|
151
|
+
remote_loc : str, optional
|
|
152
|
+
Optional remote address of the resource, by default None. Will use
|
|
153
|
+
Spark NLPs repositories otherwise.
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
BertForQuestionAnswering
|
|
158
|
+
The restored model
|
|
159
|
+
"""
|
|
160
|
+
from sparknlp.pretrained import ResourceDownloader
|
|
161
|
+
return ResourceDownloader.downloadModel(AlbertForMultipleChoice, name, lang, remote_loc)
|
{spark-nlp-5.5.3 → spark_nlp-6.0.1}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py
RENAMED
|
@@ -130,7 +130,7 @@ class BertForMultipleChoice(AnnotatorModel,
|
|
|
130
130
|
|
|
131
131
|
Returns
|
|
132
132
|
-------
|
|
133
|
-
|
|
133
|
+
BertForMultipleChoice
|
|
134
134
|
The restored model
|
|
135
135
|
"""
|
|
136
136
|
from sparknlp.internal import _BertMultipleChoiceLoader
|
|
@@ -154,7 +154,7 @@ class BertForMultipleChoice(AnnotatorModel,
|
|
|
154
154
|
|
|
155
155
|
Returns
|
|
156
156
|
-------
|
|
157
|
-
|
|
157
|
+
BertForMultipleChoice
|
|
158
158
|
The restored model
|
|
159
159
|
"""
|
|
160
160
|
from sparknlp.pretrained import ResourceDownloader
|