spark-nlp 5.3.3__tar.gz → 5.4.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/PKG-INFO +45 -45
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/README.md +44 -44
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/setup.py +1 -1
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/PKG-INFO +45 -45
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/__init__.py +2 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/bert_embeddings.py +4 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +4 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/e5_embeddings.py +4 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/roberta_embeddings.py +4 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +4 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/llama2_transformer.py +2 -2
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/internal/__init__.py +12 -12
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/com/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/com/johnsnowlabs/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/com/johnsnowlabs/nlp/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/setup.cfg +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/.uuid +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/SOURCES.txt +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/dependency_links.txt +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/top_level.txt +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotation.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotation_audio.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotation_image.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/chunk2_doc.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/chunker.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/coref/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/date2_chunk.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/dependency/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_character_text_splitter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_normalizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_token_splitter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_token_splitter_test.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/word2vec.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/er/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/er/entity_ruler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/graph_extraction.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/lemmatizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/n_gram_generator.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_approach.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_converter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_crf.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/normalizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/openai/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/openai/openai_completion.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/param/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/pos/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/pos/perceptron.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentence/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentiment/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/similarity/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/stemmer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/tokenizer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token2_chunk.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ws/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/audio_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/doc2_chunk.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/document_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/embeddings_finisher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/finisher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/graph_finisher.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/has_recursive_fit.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/has_recursive_transform.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/image_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/light_pipeline.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/multi_document_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/recursive_pipeline.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/table_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/base/token_assembler.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_approach.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_model.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_properties.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_type.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/coverage_result.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/match_strategy.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/properties.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/read_as.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/recursive_annotator_approach.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/storage.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/common/utils.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/functions.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/internal/annotator_java_ml.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/internal/annotator_transformer.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/internal/extended_java_wrapper.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/internal/params_getters_setters.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/internal/recursive.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/logging/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/logging/comet.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/resource_downloader.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/utils.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/conll.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/conllu.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/pos.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/pub_tator.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/spacy_to_annotation.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/training/tfgraphs.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/upload_to_hub.py +0 -0
- {spark-nlp-5.3.3 → spark-nlp-5.4.0rc1}/sparknlp/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.4.0rc1
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -197,7 +197,7 @@ To use Spark NLP you need the following requirements:
|
|
|
197
197
|
|
|
198
198
|
**GPU (optional):**
|
|
199
199
|
|
|
200
|
-
Spark NLP 5.
|
|
200
|
+
Spark NLP 5.4.0-rc1 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
201
201
|
|
|
202
202
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
203
203
|
- CUDA® Toolkit 11.2
|
|
@@ -213,7 +213,7 @@ $ java -version
|
|
|
213
213
|
$ conda create -n sparknlp python=3.7 -y
|
|
214
214
|
$ conda activate sparknlp
|
|
215
215
|
# spark-nlp by default is based on pyspark 3.x
|
|
216
|
-
$ pip install spark-nlp==5.
|
|
216
|
+
$ pip install spark-nlp==5.4.0-rc1 pyspark==3.3.1
|
|
217
217
|
```
|
|
218
218
|
|
|
219
219
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -258,7 +258,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
258
258
|
|
|
259
259
|
## Apache Spark Support
|
|
260
260
|
|
|
261
|
-
Spark NLP *5.
|
|
261
|
+
Spark NLP *5.4.0-rc1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
262
262
|
|
|
263
263
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
264
264
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -302,7 +302,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
302
302
|
|
|
303
303
|
## Databricks Support
|
|
304
304
|
|
|
305
|
-
Spark NLP 5.
|
|
305
|
+
Spark NLP 5.4.0-rc1 has been tested and is compatible with the following runtimes:
|
|
306
306
|
|
|
307
307
|
**CPU:**
|
|
308
308
|
|
|
@@ -375,7 +375,7 @@ Spark NLP 5.3.3 has been tested and is compatible with the following runtimes:
|
|
|
375
375
|
|
|
376
376
|
## EMR Support
|
|
377
377
|
|
|
378
|
-
Spark NLP 5.
|
|
378
|
+
Spark NLP 5.4.0-rc1 has been tested and is compatible with the following EMR releases:
|
|
379
379
|
|
|
380
380
|
- emr-6.2.0
|
|
381
381
|
- emr-6.3.0
|
|
@@ -425,11 +425,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
425
425
|
```sh
|
|
426
426
|
# CPU
|
|
427
427
|
|
|
428
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
428
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
429
429
|
|
|
430
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
430
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
431
431
|
|
|
432
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
432
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
433
433
|
```
|
|
434
434
|
|
|
435
435
|
The `spark-nlp` has been published to
|
|
@@ -438,11 +438,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
438
438
|
```sh
|
|
439
439
|
# GPU
|
|
440
440
|
|
|
441
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
441
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
|
|
442
442
|
|
|
443
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
443
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
|
|
444
444
|
|
|
445
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
445
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
|
|
446
446
|
|
|
447
447
|
```
|
|
448
448
|
|
|
@@ -452,11 +452,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
452
452
|
```sh
|
|
453
453
|
# AArch64
|
|
454
454
|
|
|
455
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
455
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
|
|
456
456
|
|
|
457
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
457
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
|
|
458
458
|
|
|
459
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
459
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
|
|
460
460
|
|
|
461
461
|
```
|
|
462
462
|
|
|
@@ -466,11 +466,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
466
466
|
```sh
|
|
467
467
|
# M1/M2 (Apple Silicon)
|
|
468
468
|
|
|
469
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
469
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
|
|
470
470
|
|
|
471
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
471
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
|
|
472
472
|
|
|
473
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
473
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
|
|
474
474
|
|
|
475
475
|
```
|
|
476
476
|
|
|
@@ -484,7 +484,7 @@ set in your SparkSession:
|
|
|
484
484
|
spark-shell \
|
|
485
485
|
--driver-memory 16g \
|
|
486
486
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
487
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
487
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
488
488
|
```
|
|
489
489
|
|
|
490
490
|
## Scala
|
|
@@ -502,7 +502,7 @@ coordinates:
|
|
|
502
502
|
<dependency>
|
|
503
503
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
504
504
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
505
|
-
<version>5.
|
|
505
|
+
<version>5.4.0-rc1</version>
|
|
506
506
|
</dependency>
|
|
507
507
|
```
|
|
508
508
|
|
|
@@ -513,7 +513,7 @@ coordinates:
|
|
|
513
513
|
<dependency>
|
|
514
514
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
515
515
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
516
|
-
<version>5.
|
|
516
|
+
<version>5.4.0-rc1</version>
|
|
517
517
|
</dependency>
|
|
518
518
|
```
|
|
519
519
|
|
|
@@ -524,7 +524,7 @@ coordinates:
|
|
|
524
524
|
<dependency>
|
|
525
525
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
526
526
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
527
|
-
<version>5.
|
|
527
|
+
<version>5.4.0-rc1</version>
|
|
528
528
|
</dependency>
|
|
529
529
|
```
|
|
530
530
|
|
|
@@ -535,7 +535,7 @@ coordinates:
|
|
|
535
535
|
<dependency>
|
|
536
536
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
537
537
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
538
|
-
<version>5.
|
|
538
|
+
<version>5.4.0-rc1</version>
|
|
539
539
|
</dependency>
|
|
540
540
|
```
|
|
541
541
|
|
|
@@ -545,28 +545,28 @@ coordinates:
|
|
|
545
545
|
|
|
546
546
|
```sbtshell
|
|
547
547
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
548
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.
|
|
548
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.0-rc1"
|
|
549
549
|
```
|
|
550
550
|
|
|
551
551
|
**spark-nlp-gpu:**
|
|
552
552
|
|
|
553
553
|
```sbtshell
|
|
554
554
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
555
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.
|
|
555
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.0-rc1"
|
|
556
556
|
```
|
|
557
557
|
|
|
558
558
|
**spark-nlp-aarch64:**
|
|
559
559
|
|
|
560
560
|
```sbtshell
|
|
561
561
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
562
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.
|
|
562
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.0-rc1"
|
|
563
563
|
```
|
|
564
564
|
|
|
565
565
|
**spark-nlp-silicon:**
|
|
566
566
|
|
|
567
567
|
```sbtshell
|
|
568
568
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
569
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.
|
|
569
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.0-rc1"
|
|
570
570
|
```
|
|
571
571
|
|
|
572
572
|
Maven
|
|
@@ -588,7 +588,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
588
588
|
Pip:
|
|
589
589
|
|
|
590
590
|
```bash
|
|
591
|
-
pip install spark-nlp==5.
|
|
591
|
+
pip install spark-nlp==5.4.0-rc1
|
|
592
592
|
```
|
|
593
593
|
|
|
594
594
|
Conda:
|
|
@@ -617,7 +617,7 @@ spark = SparkSession.builder
|
|
|
617
617
|
.config("spark.driver.memory", "16G")
|
|
618
618
|
.config("spark.driver.maxResultSize", "0")
|
|
619
619
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
620
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
620
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1")
|
|
621
621
|
.getOrCreate()
|
|
622
622
|
```
|
|
623
623
|
|
|
@@ -688,7 +688,7 @@ Use either one of the following options
|
|
|
688
688
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
689
689
|
|
|
690
690
|
```bash
|
|
691
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
691
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
692
692
|
```
|
|
693
693
|
|
|
694
694
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -699,7 +699,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
|
|
|
699
699
|
Apart from the previous step, install the python module through pip
|
|
700
700
|
|
|
701
701
|
```bash
|
|
702
|
-
pip install spark-nlp==5.
|
|
702
|
+
pip install spark-nlp==5.4.0-rc1
|
|
703
703
|
```
|
|
704
704
|
|
|
705
705
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -727,7 +727,7 @@ launch the Jupyter from the same Python environment:
|
|
|
727
727
|
$ conda create -n sparknlp python=3.8 -y
|
|
728
728
|
$ conda activate sparknlp
|
|
729
729
|
# spark-nlp by default is based on pyspark 3.x
|
|
730
|
-
$ pip install spark-nlp==5.
|
|
730
|
+
$ pip install spark-nlp==5.4.0-rc1 pyspark==3.3.1 jupyter
|
|
731
731
|
$ jupyter notebook
|
|
732
732
|
```
|
|
733
733
|
|
|
@@ -744,7 +744,7 @@ export PYSPARK_PYTHON=python3
|
|
|
744
744
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
745
745
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
746
746
|
|
|
747
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
747
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
748
748
|
```
|
|
749
749
|
|
|
750
750
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -771,7 +771,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
771
771
|
# -s is for spark-nlp
|
|
772
772
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
773
773
|
# by default they are set to the latest
|
|
774
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
774
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0-rc1
|
|
775
775
|
```
|
|
776
776
|
|
|
777
777
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -794,7 +794,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
794
794
|
# -s is for spark-nlp
|
|
795
795
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
796
796
|
# by default they are set to the latest
|
|
797
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
797
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0-rc1
|
|
798
798
|
```
|
|
799
799
|
|
|
800
800
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -813,9 +813,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
813
813
|
|
|
814
814
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
815
815
|
|
|
816
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.
|
|
816
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.4.0-rc1` -> Install
|
|
817
817
|
|
|
818
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
818
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1` -> Install
|
|
819
819
|
|
|
820
820
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
821
821
|
|
|
@@ -866,7 +866,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
866
866
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
867
867
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
868
868
|
"spark.driver.maxResultSize": "0",
|
|
869
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
869
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1"
|
|
870
870
|
}
|
|
871
871
|
}]
|
|
872
872
|
```
|
|
@@ -875,7 +875,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
875
875
|
|
|
876
876
|
```.sh
|
|
877
877
|
aws emr create-cluster \
|
|
878
|
-
--name "Spark NLP 5.
|
|
878
|
+
--name "Spark NLP 5.4.0-rc1" \
|
|
879
879
|
--release-label emr-6.2.0 \
|
|
880
880
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
881
881
|
--instance-type m4.4xlarge \
|
|
@@ -939,7 +939,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
939
939
|
--enable-component-gateway \
|
|
940
940
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
941
941
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
942
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
942
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
943
943
|
```
|
|
944
944
|
|
|
945
945
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -982,7 +982,7 @@ spark = SparkSession.builder
|
|
|
982
982
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
983
983
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
984
984
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
985
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
985
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1")
|
|
986
986
|
.getOrCreate()
|
|
987
987
|
```
|
|
988
988
|
|
|
@@ -996,7 +996,7 @@ spark-shell \
|
|
|
996
996
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
997
997
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
998
998
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
999
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
999
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
1000
1000
|
```
|
|
1001
1001
|
|
|
1002
1002
|
**pyspark:**
|
|
@@ -1009,7 +1009,7 @@ pyspark \
|
|
|
1009
1009
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
1010
1010
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
1011
1011
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
1012
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
1012
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
1013
1013
|
```
|
|
1014
1014
|
|
|
1015
1015
|
**Databricks:**
|
|
@@ -1281,7 +1281,7 @@ spark = SparkSession.builder
|
|
|
1281
1281
|
.config("spark.driver.memory", "16G")
|
|
1282
1282
|
.config("spark.driver.maxResultSize", "0")
|
|
1283
1283
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1284
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.
|
|
1284
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.4.0-rc1.jar")
|
|
1285
1285
|
.getOrCreate()
|
|
1286
1286
|
```
|
|
1287
1287
|
|
|
@@ -1290,7 +1290,7 @@ spark = SparkSession.builder
|
|
|
1290
1290
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
|
|
1291
1291
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1292
1292
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1293
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.
|
|
1293
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.0-rc1.jar`)
|
|
1294
1294
|
|
|
1295
1295
|
Example of using pretrained Models and Pipelines in offline:
|
|
1296
1296
|
|
|
@@ -165,7 +165,7 @@ To use Spark NLP you need the following requirements:
|
|
|
165
165
|
|
|
166
166
|
**GPU (optional):**
|
|
167
167
|
|
|
168
|
-
Spark NLP 5.
|
|
168
|
+
Spark NLP 5.4.0-rc1 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
169
169
|
|
|
170
170
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
171
171
|
- CUDA® Toolkit 11.2
|
|
@@ -181,7 +181,7 @@ $ java -version
|
|
|
181
181
|
$ conda create -n sparknlp python=3.7 -y
|
|
182
182
|
$ conda activate sparknlp
|
|
183
183
|
# spark-nlp by default is based on pyspark 3.x
|
|
184
|
-
$ pip install spark-nlp==5.
|
|
184
|
+
$ pip install spark-nlp==5.4.0-rc1 pyspark==3.3.1
|
|
185
185
|
```
|
|
186
186
|
|
|
187
187
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -226,7 +226,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
226
226
|
|
|
227
227
|
## Apache Spark Support
|
|
228
228
|
|
|
229
|
-
Spark NLP *5.
|
|
229
|
+
Spark NLP *5.4.0-rc1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
230
230
|
|
|
231
231
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
232
232
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -270,7 +270,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
270
270
|
|
|
271
271
|
## Databricks Support
|
|
272
272
|
|
|
273
|
-
Spark NLP 5.
|
|
273
|
+
Spark NLP 5.4.0-rc1 has been tested and is compatible with the following runtimes:
|
|
274
274
|
|
|
275
275
|
**CPU:**
|
|
276
276
|
|
|
@@ -343,7 +343,7 @@ Spark NLP 5.3.3 has been tested and is compatible with the following runtimes:
|
|
|
343
343
|
|
|
344
344
|
## EMR Support
|
|
345
345
|
|
|
346
|
-
Spark NLP 5.
|
|
346
|
+
Spark NLP 5.4.0-rc1 has been tested and is compatible with the following EMR releases:
|
|
347
347
|
|
|
348
348
|
- emr-6.2.0
|
|
349
349
|
- emr-6.3.0
|
|
@@ -393,11 +393,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
393
393
|
```sh
|
|
394
394
|
# CPU
|
|
395
395
|
|
|
396
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
396
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
397
397
|
|
|
398
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
398
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
399
399
|
|
|
400
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
400
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
401
401
|
```
|
|
402
402
|
|
|
403
403
|
The `spark-nlp` has been published to
|
|
@@ -406,11 +406,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
406
406
|
```sh
|
|
407
407
|
# GPU
|
|
408
408
|
|
|
409
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
409
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
|
|
410
410
|
|
|
411
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
411
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
|
|
412
412
|
|
|
413
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
413
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
|
|
414
414
|
|
|
415
415
|
```
|
|
416
416
|
|
|
@@ -420,11 +420,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
420
420
|
```sh
|
|
421
421
|
# AArch64
|
|
422
422
|
|
|
423
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
423
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
|
|
424
424
|
|
|
425
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
425
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
|
|
426
426
|
|
|
427
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
427
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
|
|
428
428
|
|
|
429
429
|
```
|
|
430
430
|
|
|
@@ -434,11 +434,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
434
434
|
```sh
|
|
435
435
|
# M1/M2 (Apple Silicon)
|
|
436
436
|
|
|
437
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
437
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
|
|
438
438
|
|
|
439
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
439
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
|
|
440
440
|
|
|
441
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
441
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
|
|
442
442
|
|
|
443
443
|
```
|
|
444
444
|
|
|
@@ -452,7 +452,7 @@ set in your SparkSession:
|
|
|
452
452
|
spark-shell \
|
|
453
453
|
--driver-memory 16g \
|
|
454
454
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
455
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
455
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
456
456
|
```
|
|
457
457
|
|
|
458
458
|
## Scala
|
|
@@ -470,7 +470,7 @@ coordinates:
|
|
|
470
470
|
<dependency>
|
|
471
471
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
472
472
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
473
|
-
<version>5.
|
|
473
|
+
<version>5.4.0-rc1</version>
|
|
474
474
|
</dependency>
|
|
475
475
|
```
|
|
476
476
|
|
|
@@ -481,7 +481,7 @@ coordinates:
|
|
|
481
481
|
<dependency>
|
|
482
482
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
483
483
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
484
|
-
<version>5.
|
|
484
|
+
<version>5.4.0-rc1</version>
|
|
485
485
|
</dependency>
|
|
486
486
|
```
|
|
487
487
|
|
|
@@ -492,7 +492,7 @@ coordinates:
|
|
|
492
492
|
<dependency>
|
|
493
493
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
494
494
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
495
|
-
<version>5.
|
|
495
|
+
<version>5.4.0-rc1</version>
|
|
496
496
|
</dependency>
|
|
497
497
|
```
|
|
498
498
|
|
|
@@ -503,7 +503,7 @@ coordinates:
|
|
|
503
503
|
<dependency>
|
|
504
504
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
505
505
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
506
|
-
<version>5.
|
|
506
|
+
<version>5.4.0-rc1</version>
|
|
507
507
|
</dependency>
|
|
508
508
|
```
|
|
509
509
|
|
|
@@ -513,28 +513,28 @@ coordinates:
|
|
|
513
513
|
|
|
514
514
|
```sbtshell
|
|
515
515
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
516
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.
|
|
516
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.0-rc1"
|
|
517
517
|
```
|
|
518
518
|
|
|
519
519
|
**spark-nlp-gpu:**
|
|
520
520
|
|
|
521
521
|
```sbtshell
|
|
522
522
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
523
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.
|
|
523
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.0-rc1"
|
|
524
524
|
```
|
|
525
525
|
|
|
526
526
|
**spark-nlp-aarch64:**
|
|
527
527
|
|
|
528
528
|
```sbtshell
|
|
529
529
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
530
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.
|
|
530
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.0-rc1"
|
|
531
531
|
```
|
|
532
532
|
|
|
533
533
|
**spark-nlp-silicon:**
|
|
534
534
|
|
|
535
535
|
```sbtshell
|
|
536
536
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
537
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.
|
|
537
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.0-rc1"
|
|
538
538
|
```
|
|
539
539
|
|
|
540
540
|
Maven
|
|
@@ -556,7 +556,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
556
556
|
Pip:
|
|
557
557
|
|
|
558
558
|
```bash
|
|
559
|
-
pip install spark-nlp==5.
|
|
559
|
+
pip install spark-nlp==5.4.0-rc1
|
|
560
560
|
```
|
|
561
561
|
|
|
562
562
|
Conda:
|
|
@@ -585,7 +585,7 @@ spark = SparkSession.builder
|
|
|
585
585
|
.config("spark.driver.memory", "16G")
|
|
586
586
|
.config("spark.driver.maxResultSize", "0")
|
|
587
587
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
588
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
588
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1")
|
|
589
589
|
.getOrCreate()
|
|
590
590
|
```
|
|
591
591
|
|
|
@@ -656,7 +656,7 @@ Use either one of the following options
|
|
|
656
656
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
657
657
|
|
|
658
658
|
```bash
|
|
659
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
659
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
660
660
|
```
|
|
661
661
|
|
|
662
662
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -667,7 +667,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.3
|
|
|
667
667
|
Apart from the previous step, install the python module through pip
|
|
668
668
|
|
|
669
669
|
```bash
|
|
670
|
-
pip install spark-nlp==5.
|
|
670
|
+
pip install spark-nlp==5.4.0-rc1
|
|
671
671
|
```
|
|
672
672
|
|
|
673
673
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -695,7 +695,7 @@ launch the Jupyter from the same Python environment:
|
|
|
695
695
|
$ conda create -n sparknlp python=3.8 -y
|
|
696
696
|
$ conda activate sparknlp
|
|
697
697
|
# spark-nlp by default is based on pyspark 3.x
|
|
698
|
-
$ pip install spark-nlp==5.
|
|
698
|
+
$ pip install spark-nlp==5.4.0-rc1 pyspark==3.3.1 jupyter
|
|
699
699
|
$ jupyter notebook
|
|
700
700
|
```
|
|
701
701
|
|
|
@@ -712,7 +712,7 @@ export PYSPARK_PYTHON=python3
|
|
|
712
712
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
713
713
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
714
714
|
|
|
715
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
715
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
716
716
|
```
|
|
717
717
|
|
|
718
718
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -739,7 +739,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
739
739
|
# -s is for spark-nlp
|
|
740
740
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
741
741
|
# by default they are set to the latest
|
|
742
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
742
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0-rc1
|
|
743
743
|
```
|
|
744
744
|
|
|
745
745
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -762,7 +762,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
762
762
|
# -s is for spark-nlp
|
|
763
763
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
764
764
|
# by default they are set to the latest
|
|
765
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
765
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0-rc1
|
|
766
766
|
```
|
|
767
767
|
|
|
768
768
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -781,9 +781,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
781
781
|
|
|
782
782
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
783
783
|
|
|
784
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.
|
|
784
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.4.0-rc1` -> Install
|
|
785
785
|
|
|
786
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
786
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1` -> Install
|
|
787
787
|
|
|
788
788
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
789
789
|
|
|
@@ -834,7 +834,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
834
834
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
835
835
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
836
836
|
"spark.driver.maxResultSize": "0",
|
|
837
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
837
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1"
|
|
838
838
|
}
|
|
839
839
|
}]
|
|
840
840
|
```
|
|
@@ -843,7 +843,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
843
843
|
|
|
844
844
|
```.sh
|
|
845
845
|
aws emr create-cluster \
|
|
846
|
-
--name "Spark NLP 5.
|
|
846
|
+
--name "Spark NLP 5.4.0-rc1" \
|
|
847
847
|
--release-label emr-6.2.0 \
|
|
848
848
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
849
849
|
--instance-type m4.4xlarge \
|
|
@@ -907,7 +907,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
907
907
|
--enable-component-gateway \
|
|
908
908
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
909
909
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
910
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
910
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
911
911
|
```
|
|
912
912
|
|
|
913
913
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -950,7 +950,7 @@ spark = SparkSession.builder
|
|
|
950
950
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
951
951
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
952
952
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
953
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
953
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1")
|
|
954
954
|
.getOrCreate()
|
|
955
955
|
```
|
|
956
956
|
|
|
@@ -964,7 +964,7 @@ spark-shell \
|
|
|
964
964
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
965
965
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
966
966
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
967
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
967
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
968
968
|
```
|
|
969
969
|
|
|
970
970
|
**pyspark:**
|
|
@@ -977,7 +977,7 @@ pyspark \
|
|
|
977
977
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
978
978
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
979
979
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
980
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
980
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
981
981
|
```
|
|
982
982
|
|
|
983
983
|
**Databricks:**
|
|
@@ -1249,7 +1249,7 @@ spark = SparkSession.builder
|
|
|
1249
1249
|
.config("spark.driver.memory", "16G")
|
|
1250
1250
|
.config("spark.driver.maxResultSize", "0")
|
|
1251
1251
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1252
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.
|
|
1252
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.4.0-rc1.jar")
|
|
1253
1253
|
.getOrCreate()
|
|
1254
1254
|
```
|
|
1255
1255
|
|
|
@@ -1258,7 +1258,7 @@ spark = SparkSession.builder
|
|
|
1258
1258
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
|
|
1259
1259
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1260
1260
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1261
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.
|
|
1261
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.0-rc1.jar`)
|
|
1262
1262
|
|
|
1263
1263
|
Example of using pretrained Models and Pipelines in offline:
|
|
1264
1264
|
|