spark-nlp 5.4.2__tar.gz → 5.5.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/PKG-INFO +45 -45
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/README.md +44 -44
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/setup.py +1 -1
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/PKG-INFO +45 -45
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/SOURCES.txt +12 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/__init__.py +2 -2
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/__init__.py +3 -1
- spark-nlp-5.5.0rc1/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +2 -15
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/__init__.py +3 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/__init__.py +7 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/auto_gguf_model.py +804 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/qwen_transformer.py +339 -0
- spark-nlp-5.5.0rc1/sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/__init__.py +89 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/johnsnowlabs/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/johnsnowlabs/ml/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/com/johnsnowlabs/nlp/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/setup.cfg +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/.uuid +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/dependency_links.txt +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/spark_nlp.egg-info/top_level.txt +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotation.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotation_audio.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotation_image.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/audio/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/chunk2_doc.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/chunker.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/coref/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/date2_chunk.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/dependency/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/document_character_text_splitter.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/document_normalizer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/document_token_splitter.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/document_token_splitter_test.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/word2vec.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/er/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/er/entity_ruler.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/graph_extraction.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/lemmatizer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/n_gram_generator.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_approach.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_converter.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_crf.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_dl.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/normalizer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/openai/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/openai/openai_completion.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/param/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/pos/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/pos/perceptron.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentence/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentiment/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/similarity/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/spell_check/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/stemmer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token/tokenizer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/token2_chunk.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ws/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/audio_assembler.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/doc2_chunk.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/document_assembler.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/embeddings_finisher.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/finisher.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/graph_finisher.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/has_recursive_fit.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/has_recursive_transform.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/image_assembler.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/light_pipeline.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/multi_document_assembler.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/recursive_pipeline.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/table_assembler.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/base/token_assembler.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/annotator_approach.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/annotator_model.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/annotator_properties.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/annotator_type.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/coverage_result.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/match_strategy.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/properties.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/read_as.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/recursive_annotator_approach.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/storage.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/common/utils.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/functions.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/annotator_java_ml.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/annotator_transformer.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/extended_java_wrapper.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/params_getters_setters.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/internal/recursive.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/logging/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/logging/comet.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/pretrained/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/pretrained/resource_downloader.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/pretrained/utils.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/conll.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/conllu.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/pos.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/pub_tator.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/spacy_to_annotation.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/training/tfgraphs.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/upload_to_hub.py +0 -0
- {spark-nlp-5.4.2 → spark-nlp-5.5.0rc1}/sparknlp/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.5.0rc1
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -198,7 +198,7 @@ To use Spark NLP you need the following requirements:
|
|
|
198
198
|
|
|
199
199
|
**GPU (optional):**
|
|
200
200
|
|
|
201
|
-
Spark NLP 5.
|
|
201
|
+
Spark NLP 5.5.0-rc1 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
202
202
|
|
|
203
203
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
204
204
|
- CUDA® Toolkit 11.2
|
|
@@ -214,7 +214,7 @@ $ java -version
|
|
|
214
214
|
$ conda create -n sparknlp python=3.7 -y
|
|
215
215
|
$ conda activate sparknlp
|
|
216
216
|
# spark-nlp by default is based on pyspark 3.x
|
|
217
|
-
$ pip install spark-nlp==5.
|
|
217
|
+
$ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1
|
|
218
218
|
```
|
|
219
219
|
|
|
220
220
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -259,7 +259,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
259
259
|
|
|
260
260
|
## Apache Spark Support
|
|
261
261
|
|
|
262
|
-
Spark NLP *5.
|
|
262
|
+
Spark NLP *5.5.0-rc1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
263
263
|
|
|
264
264
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
265
265
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -292,7 +292,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
292
292
|
|
|
293
293
|
## Databricks Support
|
|
294
294
|
|
|
295
|
-
Spark NLP 5.
|
|
295
|
+
Spark NLP 5.5.0-rc1 has been tested and is compatible with the following runtimes:
|
|
296
296
|
|
|
297
297
|
**CPU:**
|
|
298
298
|
|
|
@@ -365,7 +365,7 @@ Spark NLP 5.4.2 has been tested and is compatible with the following runtimes:
|
|
|
365
365
|
|
|
366
366
|
## EMR Support
|
|
367
367
|
|
|
368
|
-
Spark NLP 5.
|
|
368
|
+
Spark NLP 5.5.0-rc1 has been tested and is compatible with the following EMR releases:
|
|
369
369
|
|
|
370
370
|
- emr-6.2.0
|
|
371
371
|
- emr-6.3.0
|
|
@@ -415,11 +415,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
415
415
|
```sh
|
|
416
416
|
# CPU
|
|
417
417
|
|
|
418
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
418
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
419
419
|
|
|
420
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
420
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
421
421
|
|
|
422
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
422
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
423
423
|
```
|
|
424
424
|
|
|
425
425
|
The `spark-nlp` has been published to
|
|
@@ -428,11 +428,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
428
428
|
```sh
|
|
429
429
|
# GPU
|
|
430
430
|
|
|
431
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
431
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
|
|
432
432
|
|
|
433
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
433
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
|
|
434
434
|
|
|
435
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
435
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
|
|
436
436
|
|
|
437
437
|
```
|
|
438
438
|
|
|
@@ -442,11 +442,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
442
442
|
```sh
|
|
443
443
|
# AArch64
|
|
444
444
|
|
|
445
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
445
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
|
|
446
446
|
|
|
447
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
447
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
|
|
448
448
|
|
|
449
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
449
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
|
|
450
450
|
|
|
451
451
|
```
|
|
452
452
|
|
|
@@ -456,11 +456,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
456
456
|
```sh
|
|
457
457
|
# M1/M2 (Apple Silicon)
|
|
458
458
|
|
|
459
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
459
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
|
|
460
460
|
|
|
461
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
461
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
|
|
462
462
|
|
|
463
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
463
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
|
|
464
464
|
|
|
465
465
|
```
|
|
466
466
|
|
|
@@ -474,7 +474,7 @@ set in your SparkSession:
|
|
|
474
474
|
spark-shell \
|
|
475
475
|
--driver-memory 16g \
|
|
476
476
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
477
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
477
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
478
478
|
```
|
|
479
479
|
|
|
480
480
|
## Scala
|
|
@@ -492,7 +492,7 @@ coordinates:
|
|
|
492
492
|
<dependency>
|
|
493
493
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
494
494
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
495
|
-
<version>5.
|
|
495
|
+
<version>5.5.0-rc1</version>
|
|
496
496
|
</dependency>
|
|
497
497
|
```
|
|
498
498
|
|
|
@@ -503,7 +503,7 @@ coordinates:
|
|
|
503
503
|
<dependency>
|
|
504
504
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
505
505
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
506
|
-
<version>5.
|
|
506
|
+
<version>5.5.0-rc1</version>
|
|
507
507
|
</dependency>
|
|
508
508
|
```
|
|
509
509
|
|
|
@@ -514,7 +514,7 @@ coordinates:
|
|
|
514
514
|
<dependency>
|
|
515
515
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
516
516
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
517
|
-
<version>5.
|
|
517
|
+
<version>5.5.0-rc1</version>
|
|
518
518
|
</dependency>
|
|
519
519
|
```
|
|
520
520
|
|
|
@@ -525,7 +525,7 @@ coordinates:
|
|
|
525
525
|
<dependency>
|
|
526
526
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
527
527
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
528
|
-
<version>5.
|
|
528
|
+
<version>5.5.0-rc1</version>
|
|
529
529
|
</dependency>
|
|
530
530
|
```
|
|
531
531
|
|
|
@@ -535,28 +535,28 @@ coordinates:
|
|
|
535
535
|
|
|
536
536
|
```sbtshell
|
|
537
537
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
538
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.
|
|
538
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.5.0-rc1"
|
|
539
539
|
```
|
|
540
540
|
|
|
541
541
|
**spark-nlp-gpu:**
|
|
542
542
|
|
|
543
543
|
```sbtshell
|
|
544
544
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
545
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.
|
|
545
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.5.0-rc1"
|
|
546
546
|
```
|
|
547
547
|
|
|
548
548
|
**spark-nlp-aarch64:**
|
|
549
549
|
|
|
550
550
|
```sbtshell
|
|
551
551
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
552
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.
|
|
552
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.5.0-rc1"
|
|
553
553
|
```
|
|
554
554
|
|
|
555
555
|
**spark-nlp-silicon:**
|
|
556
556
|
|
|
557
557
|
```sbtshell
|
|
558
558
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
559
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.
|
|
559
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.5.0-rc1"
|
|
560
560
|
```
|
|
561
561
|
|
|
562
562
|
Maven
|
|
@@ -578,7 +578,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
578
578
|
Pip:
|
|
579
579
|
|
|
580
580
|
```bash
|
|
581
|
-
pip install spark-nlp==5.
|
|
581
|
+
pip install spark-nlp==5.5.0-rc1
|
|
582
582
|
```
|
|
583
583
|
|
|
584
584
|
Conda:
|
|
@@ -607,7 +607,7 @@ spark = SparkSession.builder
|
|
|
607
607
|
.config("spark.driver.memory", "16G")
|
|
608
608
|
.config("spark.driver.maxResultSize", "0")
|
|
609
609
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
610
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
610
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
|
|
611
611
|
.getOrCreate()
|
|
612
612
|
```
|
|
613
613
|
|
|
@@ -678,7 +678,7 @@ Use either one of the following options
|
|
|
678
678
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
679
679
|
|
|
680
680
|
```bash
|
|
681
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
681
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
682
682
|
```
|
|
683
683
|
|
|
684
684
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -689,7 +689,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
|
|
|
689
689
|
Apart from the previous step, install the python module through pip
|
|
690
690
|
|
|
691
691
|
```bash
|
|
692
|
-
pip install spark-nlp==5.
|
|
692
|
+
pip install spark-nlp==5.5.0-rc1
|
|
693
693
|
```
|
|
694
694
|
|
|
695
695
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -717,7 +717,7 @@ launch the Jupyter from the same Python environment:
|
|
|
717
717
|
$ conda create -n sparknlp python=3.8 -y
|
|
718
718
|
$ conda activate sparknlp
|
|
719
719
|
# spark-nlp by default is based on pyspark 3.x
|
|
720
|
-
$ pip install spark-nlp==5.
|
|
720
|
+
$ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1 jupyter
|
|
721
721
|
$ jupyter notebook
|
|
722
722
|
```
|
|
723
723
|
|
|
@@ -734,7 +734,7 @@ export PYSPARK_PYTHON=python3
|
|
|
734
734
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
735
735
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
736
736
|
|
|
737
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
737
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
738
738
|
```
|
|
739
739
|
|
|
740
740
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -761,7 +761,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
761
761
|
# -s is for spark-nlp
|
|
762
762
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
763
763
|
# by default they are set to the latest
|
|
764
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
764
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
|
|
765
765
|
```
|
|
766
766
|
|
|
767
767
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -784,7 +784,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
784
784
|
# -s is for spark-nlp
|
|
785
785
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
786
786
|
# by default they are set to the latest
|
|
787
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
787
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
|
|
788
788
|
```
|
|
789
789
|
|
|
790
790
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -803,9 +803,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
803
803
|
|
|
804
804
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
805
805
|
|
|
806
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.
|
|
806
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.5.0-rc1` -> Install
|
|
807
807
|
|
|
808
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
808
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1` -> Install
|
|
809
809
|
|
|
810
810
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
811
811
|
|
|
@@ -856,7 +856,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
856
856
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
857
857
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
858
858
|
"spark.driver.maxResultSize": "0",
|
|
859
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
859
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1"
|
|
860
860
|
}
|
|
861
861
|
}]
|
|
862
862
|
```
|
|
@@ -865,7 +865,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
865
865
|
|
|
866
866
|
```.sh
|
|
867
867
|
aws emr create-cluster \
|
|
868
|
-
--name "Spark NLP 5.
|
|
868
|
+
--name "Spark NLP 5.5.0-rc1" \
|
|
869
869
|
--release-label emr-6.2.0 \
|
|
870
870
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
871
871
|
--instance-type m4.4xlarge \
|
|
@@ -929,7 +929,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
929
929
|
--enable-component-gateway \
|
|
930
930
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
931
931
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
932
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
932
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
933
933
|
```
|
|
934
934
|
|
|
935
935
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -972,7 +972,7 @@ spark = SparkSession.builder
|
|
|
972
972
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
973
973
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
974
974
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
975
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
975
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
|
|
976
976
|
.getOrCreate()
|
|
977
977
|
```
|
|
978
978
|
|
|
@@ -986,7 +986,7 @@ spark-shell \
|
|
|
986
986
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
987
987
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
988
988
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
989
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
989
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
990
990
|
```
|
|
991
991
|
|
|
992
992
|
**pyspark:**
|
|
@@ -999,7 +999,7 @@ pyspark \
|
|
|
999
999
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
1000
1000
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
1001
1001
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
1002
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
1002
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
1003
1003
|
```
|
|
1004
1004
|
|
|
1005
1005
|
**Databricks:**
|
|
@@ -1271,7 +1271,7 @@ spark = SparkSession.builder
|
|
|
1271
1271
|
.config("spark.driver.memory", "16G")
|
|
1272
1272
|
.config("spark.driver.maxResultSize", "0")
|
|
1273
1273
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1274
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.
|
|
1274
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.5.0-rc1.jar")
|
|
1275
1275
|
.getOrCreate()
|
|
1276
1276
|
```
|
|
1277
1277
|
|
|
@@ -1280,7 +1280,7 @@ spark = SparkSession.builder
|
|
|
1280
1280
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
|
|
1281
1281
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1282
1282
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1283
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.
|
|
1283
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.5.0-rc1.jar`)
|
|
1284
1284
|
|
|
1285
1285
|
Example of using pretrained Models and Pipelines in offline:
|
|
1286
1286
|
|
|
@@ -166,7 +166,7 @@ To use Spark NLP you need the following requirements:
|
|
|
166
166
|
|
|
167
167
|
**GPU (optional):**
|
|
168
168
|
|
|
169
|
-
Spark NLP 5.
|
|
169
|
+
Spark NLP 5.5.0-rc1 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
170
170
|
|
|
171
171
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
172
172
|
- CUDA® Toolkit 11.2
|
|
@@ -182,7 +182,7 @@ $ java -version
|
|
|
182
182
|
$ conda create -n sparknlp python=3.7 -y
|
|
183
183
|
$ conda activate sparknlp
|
|
184
184
|
# spark-nlp by default is based on pyspark 3.x
|
|
185
|
-
$ pip install spark-nlp==5.
|
|
185
|
+
$ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1
|
|
186
186
|
```
|
|
187
187
|
|
|
188
188
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -227,7 +227,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
227
227
|
|
|
228
228
|
## Apache Spark Support
|
|
229
229
|
|
|
230
|
-
Spark NLP *5.
|
|
230
|
+
Spark NLP *5.5.0-rc1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
231
231
|
|
|
232
232
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
233
233
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -260,7 +260,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
260
260
|
|
|
261
261
|
## Databricks Support
|
|
262
262
|
|
|
263
|
-
Spark NLP 5.
|
|
263
|
+
Spark NLP 5.5.0-rc1 has been tested and is compatible with the following runtimes:
|
|
264
264
|
|
|
265
265
|
**CPU:**
|
|
266
266
|
|
|
@@ -333,7 +333,7 @@ Spark NLP 5.4.2 has been tested and is compatible with the following runtimes:
|
|
|
333
333
|
|
|
334
334
|
## EMR Support
|
|
335
335
|
|
|
336
|
-
Spark NLP 5.
|
|
336
|
+
Spark NLP 5.5.0-rc1 has been tested and is compatible with the following EMR releases:
|
|
337
337
|
|
|
338
338
|
- emr-6.2.0
|
|
339
339
|
- emr-6.3.0
|
|
@@ -383,11 +383,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
383
383
|
```sh
|
|
384
384
|
# CPU
|
|
385
385
|
|
|
386
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
386
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
387
387
|
|
|
388
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
388
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
389
389
|
|
|
390
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
390
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
391
391
|
```
|
|
392
392
|
|
|
393
393
|
The `spark-nlp` has been published to
|
|
@@ -396,11 +396,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
396
396
|
```sh
|
|
397
397
|
# GPU
|
|
398
398
|
|
|
399
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
399
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
|
|
400
400
|
|
|
401
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
401
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
|
|
402
402
|
|
|
403
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
403
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
|
|
404
404
|
|
|
405
405
|
```
|
|
406
406
|
|
|
@@ -410,11 +410,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
410
410
|
```sh
|
|
411
411
|
# AArch64
|
|
412
412
|
|
|
413
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
413
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
|
|
414
414
|
|
|
415
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
415
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
|
|
416
416
|
|
|
417
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
417
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
|
|
418
418
|
|
|
419
419
|
```
|
|
420
420
|
|
|
@@ -424,11 +424,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
424
424
|
```sh
|
|
425
425
|
# M1/M2 (Apple Silicon)
|
|
426
426
|
|
|
427
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
427
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
|
|
428
428
|
|
|
429
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
429
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
|
|
430
430
|
|
|
431
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
431
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
|
|
432
432
|
|
|
433
433
|
```
|
|
434
434
|
|
|
@@ -442,7 +442,7 @@ set in your SparkSession:
|
|
|
442
442
|
spark-shell \
|
|
443
443
|
--driver-memory 16g \
|
|
444
444
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
445
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
445
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
446
446
|
```
|
|
447
447
|
|
|
448
448
|
## Scala
|
|
@@ -460,7 +460,7 @@ coordinates:
|
|
|
460
460
|
<dependency>
|
|
461
461
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
462
462
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
463
|
-
<version>5.
|
|
463
|
+
<version>5.5.0-rc1</version>
|
|
464
464
|
</dependency>
|
|
465
465
|
```
|
|
466
466
|
|
|
@@ -471,7 +471,7 @@ coordinates:
|
|
|
471
471
|
<dependency>
|
|
472
472
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
473
473
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
474
|
-
<version>5.
|
|
474
|
+
<version>5.5.0-rc1</version>
|
|
475
475
|
</dependency>
|
|
476
476
|
```
|
|
477
477
|
|
|
@@ -482,7 +482,7 @@ coordinates:
|
|
|
482
482
|
<dependency>
|
|
483
483
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
484
484
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
485
|
-
<version>5.
|
|
485
|
+
<version>5.5.0-rc1</version>
|
|
486
486
|
</dependency>
|
|
487
487
|
```
|
|
488
488
|
|
|
@@ -493,7 +493,7 @@ coordinates:
|
|
|
493
493
|
<dependency>
|
|
494
494
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
495
495
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
496
|
-
<version>5.
|
|
496
|
+
<version>5.5.0-rc1</version>
|
|
497
497
|
</dependency>
|
|
498
498
|
```
|
|
499
499
|
|
|
@@ -503,28 +503,28 @@ coordinates:
|
|
|
503
503
|
|
|
504
504
|
```sbtshell
|
|
505
505
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
506
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.
|
|
506
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.5.0-rc1"
|
|
507
507
|
```
|
|
508
508
|
|
|
509
509
|
**spark-nlp-gpu:**
|
|
510
510
|
|
|
511
511
|
```sbtshell
|
|
512
512
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
513
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.
|
|
513
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.5.0-rc1"
|
|
514
514
|
```
|
|
515
515
|
|
|
516
516
|
**spark-nlp-aarch64:**
|
|
517
517
|
|
|
518
518
|
```sbtshell
|
|
519
519
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
520
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.
|
|
520
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.5.0-rc1"
|
|
521
521
|
```
|
|
522
522
|
|
|
523
523
|
**spark-nlp-silicon:**
|
|
524
524
|
|
|
525
525
|
```sbtshell
|
|
526
526
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
527
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.
|
|
527
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.5.0-rc1"
|
|
528
528
|
```
|
|
529
529
|
|
|
530
530
|
Maven
|
|
@@ -546,7 +546,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
546
546
|
Pip:
|
|
547
547
|
|
|
548
548
|
```bash
|
|
549
|
-
pip install spark-nlp==5.
|
|
549
|
+
pip install spark-nlp==5.5.0-rc1
|
|
550
550
|
```
|
|
551
551
|
|
|
552
552
|
Conda:
|
|
@@ -575,7 +575,7 @@ spark = SparkSession.builder
|
|
|
575
575
|
.config("spark.driver.memory", "16G")
|
|
576
576
|
.config("spark.driver.maxResultSize", "0")
|
|
577
577
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
578
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
578
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
|
|
579
579
|
.getOrCreate()
|
|
580
580
|
```
|
|
581
581
|
|
|
@@ -646,7 +646,7 @@ Use either one of the following options
|
|
|
646
646
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
647
647
|
|
|
648
648
|
```bash
|
|
649
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
649
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
650
650
|
```
|
|
651
651
|
|
|
652
652
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -657,7 +657,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
|
|
|
657
657
|
Apart from the previous step, install the python module through pip
|
|
658
658
|
|
|
659
659
|
```bash
|
|
660
|
-
pip install spark-nlp==5.
|
|
660
|
+
pip install spark-nlp==5.5.0-rc1
|
|
661
661
|
```
|
|
662
662
|
|
|
663
663
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -685,7 +685,7 @@ launch the Jupyter from the same Python environment:
|
|
|
685
685
|
$ conda create -n sparknlp python=3.8 -y
|
|
686
686
|
$ conda activate sparknlp
|
|
687
687
|
# spark-nlp by default is based on pyspark 3.x
|
|
688
|
-
$ pip install spark-nlp==5.
|
|
688
|
+
$ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1 jupyter
|
|
689
689
|
$ jupyter notebook
|
|
690
690
|
```
|
|
691
691
|
|
|
@@ -702,7 +702,7 @@ export PYSPARK_PYTHON=python3
|
|
|
702
702
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
703
703
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
704
704
|
|
|
705
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
705
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
706
706
|
```
|
|
707
707
|
|
|
708
708
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -729,7 +729,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
729
729
|
# -s is for spark-nlp
|
|
730
730
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
731
731
|
# by default they are set to the latest
|
|
732
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
732
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
|
|
733
733
|
```
|
|
734
734
|
|
|
735
735
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -752,7 +752,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
752
752
|
# -s is for spark-nlp
|
|
753
753
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
754
754
|
# by default they are set to the latest
|
|
755
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
755
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
|
|
756
756
|
```
|
|
757
757
|
|
|
758
758
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -771,9 +771,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
771
771
|
|
|
772
772
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
773
773
|
|
|
774
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.
|
|
774
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.5.0-rc1` -> Install
|
|
775
775
|
|
|
776
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
776
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1` -> Install
|
|
777
777
|
|
|
778
778
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
779
779
|
|
|
@@ -824,7 +824,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
824
824
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
825
825
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
826
826
|
"spark.driver.maxResultSize": "0",
|
|
827
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
827
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1"
|
|
828
828
|
}
|
|
829
829
|
}]
|
|
830
830
|
```
|
|
@@ -833,7 +833,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
833
833
|
|
|
834
834
|
```.sh
|
|
835
835
|
aws emr create-cluster \
|
|
836
|
-
--name "Spark NLP 5.
|
|
836
|
+
--name "Spark NLP 5.5.0-rc1" \
|
|
837
837
|
--release-label emr-6.2.0 \
|
|
838
838
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
839
839
|
--instance-type m4.4xlarge \
|
|
@@ -897,7 +897,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
897
897
|
--enable-component-gateway \
|
|
898
898
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
899
899
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
900
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
900
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
901
901
|
```
|
|
902
902
|
|
|
903
903
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -940,7 +940,7 @@ spark = SparkSession.builder
|
|
|
940
940
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
941
941
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
942
942
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
943
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
943
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
|
|
944
944
|
.getOrCreate()
|
|
945
945
|
```
|
|
946
946
|
|
|
@@ -954,7 +954,7 @@ spark-shell \
|
|
|
954
954
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
955
955
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
956
956
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
957
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
957
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
958
958
|
```
|
|
959
959
|
|
|
960
960
|
**pyspark:**
|
|
@@ -967,7 +967,7 @@ pyspark \
|
|
|
967
967
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
968
968
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
969
969
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
970
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
970
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
971
971
|
```
|
|
972
972
|
|
|
973
973
|
**Databricks:**
|
|
@@ -1239,7 +1239,7 @@ spark = SparkSession.builder
|
|
|
1239
1239
|
.config("spark.driver.memory", "16G")
|
|
1240
1240
|
.config("spark.driver.maxResultSize", "0")
|
|
1241
1241
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1242
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.
|
|
1242
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.5.0-rc1.jar")
|
|
1243
1243
|
.getOrCreate()
|
|
1244
1244
|
```
|
|
1245
1245
|
|
|
@@ -1248,7 +1248,7 @@ spark = SparkSession.builder
|
|
|
1248
1248
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
|
|
1249
1249
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1250
1250
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1251
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.
|
|
1251
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.5.0-rc1.jar`)
|
|
1252
1252
|
|
|
1253
1253
|
Example of using pretrained Models and Pipelines in offline:
|
|
1254
1254
|
|