spark-nlp 5.3.3__tar.gz → 5.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/PKG-INFO +50 -60
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/README.md +49 -59
- spark-nlp-5.4.0/com/johnsnowlabs/ml/ai/__init__.py +10 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/setup.py +1 -1
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/PKG-INFO +50 -60
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/SOURCES.txt +5 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/__init__.py +3 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/__init__.py +1 -0
- spark-nlp-5.4.0/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +3 -3
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/bert_embeddings.py +4 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +4 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/bge_embeddings.py +2 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/e5_embeddings.py +6 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/mpnet_embeddings.py +2 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/roberta_embeddings.py +4 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +4 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/openai/openai_embeddings.py +43 -69
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/__init__.py +2 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/llama2_transformer.py +2 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/m2m100_transformer.py +2 -2
- spark-nlp-5.4.0/sparknlp/annotator/seq2seq/mistral_transformer.py +349 -0
- spark-nlp-5.4.0/sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/__init__.py +434 -148
- spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/com/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/com/johnsnowlabs/__init__.py +0 -0
- {spark-nlp-5.3.3/sparknlp/annotator/similarity → spark-nlp-5.4.0/com/johnsnowlabs/ml}/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/com/johnsnowlabs/nlp/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/setup.cfg +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/.uuid +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/dependency_links.txt +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/spark_nlp.egg-info/top_level.txt +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotation.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotation_audio.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotation_image.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/audio/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/chunk2_doc.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/chunker.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/coref/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/date2_chunk.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/dependency/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/document_character_text_splitter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/document_normalizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/document_token_splitter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/document_token_splitter_test.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/word2vec.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/er/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/er/entity_ruler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/graph_extraction.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ld_dl/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/lemmatizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/date_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/matcher/text_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/n_gram_generator.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_approach.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_converter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_crf.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/normalizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/openai/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/openai/openai_completion.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/param/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/param/classifier_encoder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/pos/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/pos/perceptron.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentence/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentiment/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
- {spark-nlp-5.3.3/sparknlp/training/_tf_graph_builders → spark-nlp-5.4.0/sparknlp/annotator/similarity}/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/spell_check/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/stemmer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/stop_words_cleaner.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token/tokenizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/token2_chunk.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ws/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/annotator/ws/word_segmenter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/audio_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/doc2_chunk.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/document_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/embeddings_finisher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/finisher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/graph_finisher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/has_recursive_fit.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/has_recursive_transform.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/image_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/light_pipeline.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/multi_document_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/recursive_pipeline.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/table_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/base/token_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/annotator_approach.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/annotator_model.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/annotator_properties.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/annotator_type.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/coverage_result.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/match_strategy.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/properties.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/read_as.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/recursive_annotator_approach.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/storage.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/common/utils.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/functions.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/annotator_java_ml.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/annotator_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/extended_java_wrapper.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/params_getters_setters.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/internal/recursive.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/logging/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/logging/comet.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/pretrained/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/pretrained/resource_downloader.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/pretrained/utils.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/__init__.py +0 -0
- {spark-nlp-5.3.3/sparknlp/training/_tf_graph_builders/ner_dl → spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders}/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
- {spark-nlp-5.3.3/sparknlp/training/_tf_graph_builders_1x → spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders/ner_dl}/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
- {spark-nlp-5.3.3/sparknlp/training/_tf_graph_builders_1x/ner_dl → spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders_1x}/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/conll.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/conllu.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/pos.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/pub_tator.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/spacy_to_annotation.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/training/tfgraphs.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/upload_to_hub.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0}/sparknlp/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.4.0
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -146,6 +146,7 @@ documentation and examples
|
|
|
146
146
|
- INSTRUCTOR Embeddings (HuggingFace models)
|
|
147
147
|
- E5 Embeddings (HuggingFace models)
|
|
148
148
|
- MPNet Embeddings (HuggingFace models)
|
|
149
|
+
- UAE Embeddings (HuggingFace models)
|
|
149
150
|
- OpenAI Embeddings
|
|
150
151
|
- Sentence & Chunk Embeddings
|
|
151
152
|
- Unsupervised keywords extraction
|
|
@@ -170,7 +171,7 @@ documentation and examples
|
|
|
170
171
|
- Text-To-Text Transfer Transformer (Google T5)
|
|
171
172
|
- Generative Pre-trained Transformer 2 (OpenAI GPT2)
|
|
172
173
|
- Seq2Seq for NLG, Translation, and Comprehension (Facebook BART)
|
|
173
|
-
- Chat and Conversational LLMs (Facebook Llama-
|
|
174
|
+
- Chat and Conversational LLMs (Facebook Llama-2)
|
|
174
175
|
- Vision Transformer (Google ViT)
|
|
175
176
|
- Swin Image Classification (Microsoft Swin Transformer)
|
|
176
177
|
- ConvNext Image Classification (Facebook ConvNext)
|
|
@@ -180,10 +181,10 @@ documentation and examples
|
|
|
180
181
|
- Automatic Speech Recognition (HuBERT)
|
|
181
182
|
- Automatic Speech Recognition (OpenAI Whisper)
|
|
182
183
|
- Named entity recognition (Deep learning)
|
|
183
|
-
- Easy ONNX and TensorFlow integrations
|
|
184
|
+
- Easy ONNX, OpenVINO, and TensorFlow integrations
|
|
184
185
|
- GPU Support
|
|
185
186
|
- Full integration with Spark ML functions
|
|
186
|
-
- +
|
|
187
|
+
- +31000 pre-trained models in +200 languages!
|
|
187
188
|
- +6000 pre-trained pipelines in +200 languages!
|
|
188
189
|
- Multi-lingual NER models: Arabic, Bengali, Chinese, Danish, Dutch, English, Finnish, French, German, Hebrew, Italian,
|
|
189
190
|
Japanese, Korean, Norwegian, Persian, Polish, Portuguese, Russian, Spanish, Swedish, Urdu, and more.
|
|
@@ -197,7 +198,7 @@ To use Spark NLP you need the following requirements:
|
|
|
197
198
|
|
|
198
199
|
**GPU (optional):**
|
|
199
200
|
|
|
200
|
-
Spark NLP 5.
|
|
201
|
+
Spark NLP 5.4.0 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
201
202
|
|
|
202
203
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
203
204
|
- CUDA® Toolkit 11.2
|
|
@@ -213,7 +214,7 @@ $ java -version
|
|
|
213
214
|
$ conda create -n sparknlp python=3.7 -y
|
|
214
215
|
$ conda activate sparknlp
|
|
215
216
|
# spark-nlp by default is based on pyspark 3.x
|
|
216
|
-
$ pip install spark-nlp==5.
|
|
217
|
+
$ pip install spark-nlp==5.4.0 pyspark==3.3.1
|
|
217
218
|
```
|
|
218
219
|
|
|
219
220
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -258,10 +259,11 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
258
259
|
|
|
259
260
|
## Apache Spark Support
|
|
260
261
|
|
|
261
|
-
Spark NLP *5.
|
|
262
|
+
Spark NLP *5.4.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
262
263
|
|
|
263
264
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
264
265
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
266
|
+
| 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
265
267
|
| 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
266
268
|
| 5.2.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
267
269
|
| 5.1.x | Partially | YES | YES | YES | YES | YES | NO | NO |
|
|
@@ -271,12 +273,6 @@ Spark NLP *5.3.3* has been built on top of Apache Spark 3.4 while fully supports
|
|
|
271
273
|
| 4.2.x | NO | NO | YES | YES | YES | YES | NO | NO |
|
|
272
274
|
| 4.1.x | NO | NO | YES | YES | YES | YES | NO | NO |
|
|
273
275
|
| 4.0.x | NO | NO | YES | YES | YES | YES | NO | NO |
|
|
274
|
-
| 3.4.x | NO | NO | N/A | Partially | YES | YES | YES | YES |
|
|
275
|
-
| 3.3.x | NO | NO | NO | NO | YES | YES | YES | YES |
|
|
276
|
-
| 3.2.x | NO | NO | NO | NO | YES | YES | YES | YES |
|
|
277
|
-
| 3.1.x | NO | NO | NO | NO | YES | YES | YES | YES |
|
|
278
|
-
| 3.0.x | NO | NO | NO | NO | YES | YES | YES | YES |
|
|
279
|
-
| 2.7.x | NO | NO | NO | NO | NO | NO | YES | YES |
|
|
280
276
|
|
|
281
277
|
Find out more about `Spark NLP` versions from our [release notes](https://github.com/JohnSnowLabs/spark-nlp/releases).
|
|
282
278
|
|
|
@@ -293,16 +289,10 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
293
289
|
| 4.2.x | YES | YES | YES | YES | YES | NO | YES |
|
|
294
290
|
| 4.1.x | YES | YES | YES | YES | NO | NO | YES |
|
|
295
291
|
| 4.0.x | YES | YES | YES | YES | NO | NO | YES |
|
|
296
|
-
| 3.4.x | YES | YES | YES | YES | NO | YES | YES |
|
|
297
|
-
| 3.3.x | YES | YES | YES | NO | NO | YES | YES |
|
|
298
|
-
| 3.2.x | YES | YES | YES | NO | NO | YES | YES |
|
|
299
|
-
| 3.1.x | YES | YES | YES | NO | NO | YES | YES |
|
|
300
|
-
| 3.0.x | YES | YES | YES | NO | NO | YES | YES |
|
|
301
|
-
| 2.7.x | YES | YES | NO | NO | NO | YES | NO |
|
|
302
292
|
|
|
303
293
|
## Databricks Support
|
|
304
294
|
|
|
305
|
-
Spark NLP 5.
|
|
295
|
+
Spark NLP 5.4.0 has been tested and is compatible with the following runtimes:
|
|
306
296
|
|
|
307
297
|
**CPU:**
|
|
308
298
|
|
|
@@ -375,7 +365,7 @@ Spark NLP 5.3.3 has been tested and is compatible with the following runtimes:
|
|
|
375
365
|
|
|
376
366
|
## EMR Support
|
|
377
367
|
|
|
378
|
-
Spark NLP 5.
|
|
368
|
+
Spark NLP 5.4.0 has been tested and is compatible with the following EMR releases:
|
|
379
369
|
|
|
380
370
|
- emr-6.2.0
|
|
381
371
|
- emr-6.3.0
|
|
@@ -425,11 +415,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
425
415
|
```sh
|
|
426
416
|
# CPU
|
|
427
417
|
|
|
428
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
418
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
429
419
|
|
|
430
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
420
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
431
421
|
|
|
432
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
422
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
433
423
|
```
|
|
434
424
|
|
|
435
425
|
The `spark-nlp` has been published to
|
|
@@ -438,11 +428,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
438
428
|
```sh
|
|
439
429
|
# GPU
|
|
440
430
|
|
|
441
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
431
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
|
|
442
432
|
|
|
443
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
433
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
|
|
444
434
|
|
|
445
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
435
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
|
|
446
436
|
|
|
447
437
|
```
|
|
448
438
|
|
|
@@ -452,11 +442,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
452
442
|
```sh
|
|
453
443
|
# AArch64
|
|
454
444
|
|
|
455
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
445
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
|
|
456
446
|
|
|
457
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
447
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
|
|
458
448
|
|
|
459
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
449
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
|
|
460
450
|
|
|
461
451
|
```
|
|
462
452
|
|
|
@@ -466,11 +456,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
466
456
|
```sh
|
|
467
457
|
# M1/M2 (Apple Silicon)
|
|
468
458
|
|
|
469
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
459
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
|
|
470
460
|
|
|
471
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
461
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
|
|
472
462
|
|
|
473
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
463
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
|
|
474
464
|
|
|
475
465
|
```
|
|
476
466
|
|
|
@@ -484,7 +474,7 @@ set in your SparkSession:
|
|
|
484
474
|
spark-shell \
|
|
485
475
|
--driver-memory 16g \
|
|
486
476
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
487
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
477
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
488
478
|
```
|
|
489
479
|
|
|
490
480
|
## Scala
|
|
@@ -502,7 +492,7 @@ coordinates:
|
|
|
502
492
|
<dependency>
|
|
503
493
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
504
494
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
505
|
-
<version>5.
|
|
495
|
+
<version>5.4.0</version>
|
|
506
496
|
</dependency>
|
|
507
497
|
```
|
|
508
498
|
|
|
@@ -513,7 +503,7 @@ coordinates:
|
|
|
513
503
|
<dependency>
|
|
514
504
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
515
505
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
516
|
-
<version>5.
|
|
506
|
+
<version>5.4.0</version>
|
|
517
507
|
</dependency>
|
|
518
508
|
```
|
|
519
509
|
|
|
@@ -524,7 +514,7 @@ coordinates:
|
|
|
524
514
|
<dependency>
|
|
525
515
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
526
516
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
527
|
-
<version>5.
|
|
517
|
+
<version>5.4.0</version>
|
|
528
518
|
</dependency>
|
|
529
519
|
```
|
|
530
520
|
|
|
@@ -535,7 +525,7 @@ coordinates:
|
|
|
535
525
|
<dependency>
|
|
536
526
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
537
527
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
538
|
-
<version>5.
|
|
528
|
+
<version>5.4.0</version>
|
|
539
529
|
</dependency>
|
|
540
530
|
```
|
|
541
531
|
|
|
@@ -545,28 +535,28 @@ coordinates:
|
|
|
545
535
|
|
|
546
536
|
```sbtshell
|
|
547
537
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
548
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.
|
|
538
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.0"
|
|
549
539
|
```
|
|
550
540
|
|
|
551
541
|
**spark-nlp-gpu:**
|
|
552
542
|
|
|
553
543
|
```sbtshell
|
|
554
544
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
555
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.
|
|
545
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.0"
|
|
556
546
|
```
|
|
557
547
|
|
|
558
548
|
**spark-nlp-aarch64:**
|
|
559
549
|
|
|
560
550
|
```sbtshell
|
|
561
551
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
562
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.
|
|
552
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.0"
|
|
563
553
|
```
|
|
564
554
|
|
|
565
555
|
**spark-nlp-silicon:**
|
|
566
556
|
|
|
567
557
|
```sbtshell
|
|
568
558
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
569
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.
|
|
559
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.0"
|
|
570
560
|
```
|
|
571
561
|
|
|
572
562
|
Maven
|
|
@@ -588,7 +578,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
588
578
|
Pip:
|
|
589
579
|
|
|
590
580
|
```bash
|
|
591
|
-
pip install spark-nlp==5.
|
|
581
|
+
pip install spark-nlp==5.4.0
|
|
592
582
|
```
|
|
593
583
|
|
|
594
584
|
Conda:
|
|
@@ -617,7 +607,7 @@ spark = SparkSession.builder
|
|
|
617
607
|
.config("spark.driver.memory", "16G")
|
|
618
608
|
.config("spark.driver.maxResultSize", "0")
|
|
619
609
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
620
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
610
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
|
|
621
611
|
.getOrCreate()
|
|
622
612
|
```
|
|
623
613
|
|
|
@@ -688,7 +678,7 @@ Use either one of the following options
|
|
|
688
678
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
689
679
|
|
|
690
680
|
```bash
|
|
691
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
681
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
692
682
|
```
|
|
693
683
|
|
|
694
684
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -699,7 +689,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
|
|
|
699
689
|
Apart from the previous step, install the python module through pip
|
|
700
690
|
|
|
701
691
|
```bash
|
|
702
|
-
pip install spark-nlp==5.
|
|
692
|
+
pip install spark-nlp==5.4.0
|
|
703
693
|
```
|
|
704
694
|
|
|
705
695
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -727,7 +717,7 @@ launch the Jupyter from the same Python environment:
|
|
|
727
717
|
$ conda create -n sparknlp python=3.8 -y
|
|
728
718
|
$ conda activate sparknlp
|
|
729
719
|
# spark-nlp by default is based on pyspark 3.x
|
|
730
|
-
$ pip install spark-nlp==5.
|
|
720
|
+
$ pip install spark-nlp==5.4.0 pyspark==3.3.1 jupyter
|
|
731
721
|
$ jupyter notebook
|
|
732
722
|
```
|
|
733
723
|
|
|
@@ -744,7 +734,7 @@ export PYSPARK_PYTHON=python3
|
|
|
744
734
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
745
735
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
746
736
|
|
|
747
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
737
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
748
738
|
```
|
|
749
739
|
|
|
750
740
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -771,7 +761,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
771
761
|
# -s is for spark-nlp
|
|
772
762
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
773
763
|
# by default they are set to the latest
|
|
774
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
764
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
|
|
775
765
|
```
|
|
776
766
|
|
|
777
767
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -794,7 +784,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
794
784
|
# -s is for spark-nlp
|
|
795
785
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
796
786
|
# by default they are set to the latest
|
|
797
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
787
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
|
|
798
788
|
```
|
|
799
789
|
|
|
800
790
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -813,9 +803,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
813
803
|
|
|
814
804
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
815
805
|
|
|
816
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.
|
|
806
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.4.0` -> Install
|
|
817
807
|
|
|
818
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
808
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0` -> Install
|
|
819
809
|
|
|
820
810
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
821
811
|
|
|
@@ -866,7 +856,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
866
856
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
867
857
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
868
858
|
"spark.driver.maxResultSize": "0",
|
|
869
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
859
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0"
|
|
870
860
|
}
|
|
871
861
|
}]
|
|
872
862
|
```
|
|
@@ -875,7 +865,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
875
865
|
|
|
876
866
|
```.sh
|
|
877
867
|
aws emr create-cluster \
|
|
878
|
-
--name "Spark NLP 5.
|
|
868
|
+
--name "Spark NLP 5.4.0" \
|
|
879
869
|
--release-label emr-6.2.0 \
|
|
880
870
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
881
871
|
--instance-type m4.4xlarge \
|
|
@@ -939,7 +929,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
939
929
|
--enable-component-gateway \
|
|
940
930
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
941
931
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
942
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
932
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
943
933
|
```
|
|
944
934
|
|
|
945
935
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -982,7 +972,7 @@ spark = SparkSession.builder
|
|
|
982
972
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
983
973
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
984
974
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
985
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
975
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
|
|
986
976
|
.getOrCreate()
|
|
987
977
|
```
|
|
988
978
|
|
|
@@ -996,7 +986,7 @@ spark-shell \
|
|
|
996
986
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
997
987
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
998
988
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
999
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
989
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
1000
990
|
```
|
|
1001
991
|
|
|
1002
992
|
**pyspark:**
|
|
@@ -1009,7 +999,7 @@ pyspark \
|
|
|
1009
999
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
1010
1000
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
1011
1001
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
1012
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
1002
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
1013
1003
|
```
|
|
1014
1004
|
|
|
1015
1005
|
**Databricks:**
|
|
@@ -1281,7 +1271,7 @@ spark = SparkSession.builder
|
|
|
1281
1271
|
.config("spark.driver.memory", "16G")
|
|
1282
1272
|
.config("spark.driver.maxResultSize", "0")
|
|
1283
1273
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1284
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.
|
|
1274
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.4.0.jar")
|
|
1285
1275
|
.getOrCreate()
|
|
1286
1276
|
```
|
|
1287
1277
|
|
|
@@ -1290,7 +1280,7 @@ spark = SparkSession.builder
|
|
|
1290
1280
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
|
|
1291
1281
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1292
1282
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1293
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.
|
|
1283
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.0.jar`)
|
|
1294
1284
|
|
|
1295
1285
|
Example of using pretrained Models and Pipelines in offline:
|
|
1296
1286
|
|