spark-nlp 5.1.0__tar.gz → 5.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/PKG-INFO +45 -45
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/README.md +44 -44
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/setup.py +1 -1
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/spark_nlp.egg-info/PKG-INFO +45 -45
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/__init__.py +2 -2
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/doc2vec.py +6 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/word2vec.py +6 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/com/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/com/johnsnowlabs/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/com/johnsnowlabs/nlp/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/setup.cfg +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/spark_nlp.egg-info/SOURCES.txt +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/spark_nlp.egg-info/dependency_links.txt +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/spark_nlp.egg-info/top_level.txt +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotation.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotation_audio.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotation_image.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/audio/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/chunk2_doc.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/chunker.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/coref/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/cv/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/date2_chunk.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/dependency/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/document_normalizer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/er/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/er/entity_ruler.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/graph_extraction.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/lemmatizer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/n_gram_generator.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ner/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_approach.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_converter.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_crf.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_dl.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/normalizer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/openai/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/openai/openai_completion.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/param/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/pos/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/pos/perceptron.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/sentence/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/sentiment/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/similarity/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/stemmer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/token/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/token/tokenizer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ws/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/audio_assembler.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/doc2_chunk.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/document_assembler.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/embeddings_finisher.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/finisher.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/graph_finisher.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/has_recursive_fit.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/has_recursive_transform.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/image_assembler.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/light_pipeline.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/multi_document_assembler.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/recursive_pipeline.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/table_assembler.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/token2_chunk.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/base/token_assembler.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/annotator_approach.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/annotator_model.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/annotator_properties.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/annotator_type.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/coverage_result.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/match_strategy.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/properties.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/read_as.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/recursive_annotator_approach.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/storage.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/common/utils.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/functions.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/internal/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/internal/annotator_java_ml.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/internal/annotator_transformer.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/internal/extended_java_wrapper.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/internal/params_getters_setters.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/internal/recursive.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/logging/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/logging/comet.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/pretrained/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/pretrained/resource_downloader.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/pretrained/utils.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/conll.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/conllu.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/pos.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/pub_tator.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/spacy_to_annotation.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/training/tfgraphs.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/upload_to_hub.py +0 -0
- {spark-nlp-5.1.0 → spark-nlp-5.1.1}/sparknlp/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 5.1.
|
|
3
|
+
Version: 5.1.1
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -202,7 +202,7 @@ To use Spark NLP you need the following requirements:
|
|
|
202
202
|
|
|
203
203
|
**GPU (optional):**
|
|
204
204
|
|
|
205
|
-
Spark NLP 5.1.
|
|
205
|
+
Spark NLP 5.1.1 is built with ONNX 1.15.1 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
206
206
|
|
|
207
207
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
208
208
|
- CUDA® Toolkit 11.2
|
|
@@ -218,7 +218,7 @@ $ java -version
|
|
|
218
218
|
$ conda create -n sparknlp python=3.7 -y
|
|
219
219
|
$ conda activate sparknlp
|
|
220
220
|
# spark-nlp by default is based on pyspark 3.x
|
|
221
|
-
$ pip install spark-nlp==5.1.
|
|
221
|
+
$ pip install spark-nlp==5.1.1 pyspark==3.3.1
|
|
222
222
|
```
|
|
223
223
|
|
|
224
224
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -263,7 +263,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
263
263
|
|
|
264
264
|
## Apache Spark Support
|
|
265
265
|
|
|
266
|
-
Spark NLP *5.1.
|
|
266
|
+
Spark NLP *5.1.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x
|
|
267
267
|
|
|
268
268
|
| Spark NLP | Apache Spark 2.3.x | Apache Spark 2.4.x | Apache Spark 3.0.x | Apache Spark 3.1.x | Apache Spark 3.2.x | Apache Spark 3.3.x | Apache Spark 3.4.x |
|
|
269
269
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -302,7 +302,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
302
302
|
|
|
303
303
|
## Databricks Support
|
|
304
304
|
|
|
305
|
-
Spark NLP 5.1.
|
|
305
|
+
Spark NLP 5.1.1 has been tested and is compatible with the following runtimes:
|
|
306
306
|
|
|
307
307
|
**CPU:**
|
|
308
308
|
|
|
@@ -363,7 +363,7 @@ Spark NLP 5.1.0 has been tested and is compatible with the following runtimes:
|
|
|
363
363
|
|
|
364
364
|
## EMR Support
|
|
365
365
|
|
|
366
|
-
Spark NLP 5.1.
|
|
366
|
+
Spark NLP 5.1.1 has been tested and is compatible with the following EMR releases:
|
|
367
367
|
|
|
368
368
|
- emr-6.2.0
|
|
369
369
|
- emr-6.3.0
|
|
@@ -408,11 +408,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
408
408
|
```sh
|
|
409
409
|
# CPU
|
|
410
410
|
|
|
411
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
411
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
412
412
|
|
|
413
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
413
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
414
414
|
|
|
415
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
415
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
416
416
|
```
|
|
417
417
|
|
|
418
418
|
The `spark-nlp` has been published to
|
|
@@ -421,11 +421,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
421
421
|
```sh
|
|
422
422
|
# GPU
|
|
423
423
|
|
|
424
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.
|
|
424
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
425
425
|
|
|
426
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.
|
|
426
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
427
427
|
|
|
428
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.
|
|
428
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
429
429
|
|
|
430
430
|
```
|
|
431
431
|
|
|
@@ -435,11 +435,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
435
435
|
```sh
|
|
436
436
|
# AArch64
|
|
437
437
|
|
|
438
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.
|
|
438
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
439
439
|
|
|
440
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.
|
|
440
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
441
441
|
|
|
442
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.
|
|
442
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
443
443
|
|
|
444
444
|
```
|
|
445
445
|
|
|
@@ -449,11 +449,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
449
449
|
```sh
|
|
450
450
|
# M1/M2 (Apple Silicon)
|
|
451
451
|
|
|
452
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.
|
|
452
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
453
453
|
|
|
454
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.
|
|
454
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
455
455
|
|
|
456
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.
|
|
456
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
457
457
|
|
|
458
458
|
```
|
|
459
459
|
|
|
@@ -467,7 +467,7 @@ set in your SparkSession:
|
|
|
467
467
|
spark-shell \
|
|
468
468
|
--driver-memory 16g \
|
|
469
469
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
470
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
470
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
471
471
|
```
|
|
472
472
|
|
|
473
473
|
## Scala
|
|
@@ -485,7 +485,7 @@ coordinates:
|
|
|
485
485
|
<dependency>
|
|
486
486
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
487
487
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
488
|
-
<version>5.1.
|
|
488
|
+
<version>5.1.1</version>
|
|
489
489
|
</dependency>
|
|
490
490
|
```
|
|
491
491
|
|
|
@@ -496,7 +496,7 @@ coordinates:
|
|
|
496
496
|
<dependency>
|
|
497
497
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
498
498
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
499
|
-
<version>5.1.
|
|
499
|
+
<version>5.1.1</version>
|
|
500
500
|
</dependency>
|
|
501
501
|
```
|
|
502
502
|
|
|
@@ -507,7 +507,7 @@ coordinates:
|
|
|
507
507
|
<dependency>
|
|
508
508
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
509
509
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
510
|
-
<version>5.1.
|
|
510
|
+
<version>5.1.1</version>
|
|
511
511
|
</dependency>
|
|
512
512
|
```
|
|
513
513
|
|
|
@@ -518,7 +518,7 @@ coordinates:
|
|
|
518
518
|
<dependency>
|
|
519
519
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
520
520
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
521
|
-
<version>5.1.
|
|
521
|
+
<version>5.1.1</version>
|
|
522
522
|
</dependency>
|
|
523
523
|
```
|
|
524
524
|
|
|
@@ -528,28 +528,28 @@ coordinates:
|
|
|
528
528
|
|
|
529
529
|
```sbtshell
|
|
530
530
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
531
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.
|
|
531
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.1"
|
|
532
532
|
```
|
|
533
533
|
|
|
534
534
|
**spark-nlp-gpu:**
|
|
535
535
|
|
|
536
536
|
```sbtshell
|
|
537
537
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
538
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.
|
|
538
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.1"
|
|
539
539
|
```
|
|
540
540
|
|
|
541
541
|
**spark-nlp-aarch64:**
|
|
542
542
|
|
|
543
543
|
```sbtshell
|
|
544
544
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
545
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.
|
|
545
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.1"
|
|
546
546
|
```
|
|
547
547
|
|
|
548
548
|
**spark-nlp-silicon:**
|
|
549
549
|
|
|
550
550
|
```sbtshell
|
|
551
551
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
552
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.
|
|
552
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.1"
|
|
553
553
|
```
|
|
554
554
|
|
|
555
555
|
Maven
|
|
@@ -571,7 +571,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
571
571
|
Pip:
|
|
572
572
|
|
|
573
573
|
```bash
|
|
574
|
-
pip install spark-nlp==5.1.
|
|
574
|
+
pip install spark-nlp==5.1.1
|
|
575
575
|
```
|
|
576
576
|
|
|
577
577
|
Conda:
|
|
@@ -600,7 +600,7 @@ spark = SparkSession.builder
|
|
|
600
600
|
.config("spark.driver.memory", "16G")
|
|
601
601
|
.config("spark.driver.maxResultSize", "0")
|
|
602
602
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
603
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
603
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
|
|
604
604
|
.getOrCreate()
|
|
605
605
|
```
|
|
606
606
|
|
|
@@ -671,7 +671,7 @@ Use either one of the following options
|
|
|
671
671
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
672
672
|
|
|
673
673
|
```bash
|
|
674
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
674
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
675
675
|
```
|
|
676
676
|
|
|
677
677
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -682,7 +682,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.0
|
|
|
682
682
|
Apart from the previous step, install the python module through pip
|
|
683
683
|
|
|
684
684
|
```bash
|
|
685
|
-
pip install spark-nlp==5.1.
|
|
685
|
+
pip install spark-nlp==5.1.1
|
|
686
686
|
```
|
|
687
687
|
|
|
688
688
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -710,7 +710,7 @@ launch the Jupyter from the same Python environment:
|
|
|
710
710
|
$ conda create -n sparknlp python=3.8 -y
|
|
711
711
|
$ conda activate sparknlp
|
|
712
712
|
# spark-nlp by default is based on pyspark 3.x
|
|
713
|
-
$ pip install spark-nlp==5.1.
|
|
713
|
+
$ pip install spark-nlp==5.1.1 pyspark==3.3.1 jupyter
|
|
714
714
|
$ jupyter notebook
|
|
715
715
|
```
|
|
716
716
|
|
|
@@ -727,7 +727,7 @@ export PYSPARK_PYTHON=python3
|
|
|
727
727
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
728
728
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
729
729
|
|
|
730
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
730
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
731
731
|
```
|
|
732
732
|
|
|
733
733
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -754,7 +754,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
754
754
|
# -s is for spark-nlp
|
|
755
755
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
756
756
|
# by default they are set to the latest
|
|
757
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.
|
|
757
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
|
|
758
758
|
```
|
|
759
759
|
|
|
760
760
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -777,7 +777,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
777
777
|
# -s is for spark-nlp
|
|
778
778
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
779
779
|
# by default they are set to the latest
|
|
780
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.
|
|
780
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
|
|
781
781
|
```
|
|
782
782
|
|
|
783
783
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -796,9 +796,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
796
796
|
|
|
797
797
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
798
798
|
|
|
799
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.1.
|
|
799
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.1.1` -> Install
|
|
800
800
|
|
|
801
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
801
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1` -> Install
|
|
802
802
|
|
|
803
803
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
804
804
|
|
|
@@ -849,7 +849,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
849
849
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
850
850
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
851
851
|
"spark.driver.maxResultSize": "0",
|
|
852
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
852
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1"
|
|
853
853
|
}
|
|
854
854
|
}]
|
|
855
855
|
```
|
|
@@ -858,7 +858,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
858
858
|
|
|
859
859
|
```.sh
|
|
860
860
|
aws emr create-cluster \
|
|
861
|
-
--name "Spark NLP 5.1.
|
|
861
|
+
--name "Spark NLP 5.1.1" \
|
|
862
862
|
--release-label emr-6.2.0 \
|
|
863
863
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
864
864
|
--instance-type m4.4xlarge \
|
|
@@ -922,7 +922,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
922
922
|
--enable-component-gateway \
|
|
923
923
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
924
924
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
925
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
925
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
926
926
|
```
|
|
927
927
|
|
|
928
928
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -961,7 +961,7 @@ spark = SparkSession.builder
|
|
|
961
961
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
962
962
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
963
963
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
964
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
964
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
|
|
965
965
|
.getOrCreate()
|
|
966
966
|
```
|
|
967
967
|
|
|
@@ -975,7 +975,7 @@ spark-shell \
|
|
|
975
975
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
976
976
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
977
977
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
978
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
978
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
979
979
|
```
|
|
980
980
|
|
|
981
981
|
**pyspark:**
|
|
@@ -988,7 +988,7 @@ pyspark \
|
|
|
988
988
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
989
989
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
990
990
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
991
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
991
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
992
992
|
```
|
|
993
993
|
|
|
994
994
|
**Databricks:**
|
|
@@ -1260,7 +1260,7 @@ spark = SparkSession.builder
|
|
|
1260
1260
|
.config("spark.driver.memory", "16G")
|
|
1261
1261
|
.config("spark.driver.maxResultSize", "0")
|
|
1262
1262
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1263
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.1.
|
|
1263
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.1.1.jar")
|
|
1264
1264
|
.getOrCreate()
|
|
1265
1265
|
```
|
|
1266
1266
|
|
|
@@ -1269,7 +1269,7 @@ spark = SparkSession.builder
|
|
|
1269
1269
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x)
|
|
1270
1270
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1271
1271
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1272
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.
|
|
1272
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.1.jar`)
|
|
1273
1273
|
|
|
1274
1274
|
Example of using pretrained Models and Pipelines in offline:
|
|
1275
1275
|
|
|
@@ -170,7 +170,7 @@ To use Spark NLP you need the following requirements:
|
|
|
170
170
|
|
|
171
171
|
**GPU (optional):**
|
|
172
172
|
|
|
173
|
-
Spark NLP 5.1.
|
|
173
|
+
Spark NLP 5.1.1 is built with ONNX 1.15.1 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
174
174
|
|
|
175
175
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
176
176
|
- CUDA® Toolkit 11.2
|
|
@@ -186,7 +186,7 @@ $ java -version
|
|
|
186
186
|
$ conda create -n sparknlp python=3.7 -y
|
|
187
187
|
$ conda activate sparknlp
|
|
188
188
|
# spark-nlp by default is based on pyspark 3.x
|
|
189
|
-
$ pip install spark-nlp==5.1.
|
|
189
|
+
$ pip install spark-nlp==5.1.1 pyspark==3.3.1
|
|
190
190
|
```
|
|
191
191
|
|
|
192
192
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -231,7 +231,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
231
231
|
|
|
232
232
|
## Apache Spark Support
|
|
233
233
|
|
|
234
|
-
Spark NLP *5.1.
|
|
234
|
+
Spark NLP *5.1.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x
|
|
235
235
|
|
|
236
236
|
| Spark NLP | Apache Spark 2.3.x | Apache Spark 2.4.x | Apache Spark 3.0.x | Apache Spark 3.1.x | Apache Spark 3.2.x | Apache Spark 3.3.x | Apache Spark 3.4.x |
|
|
237
237
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -270,7 +270,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
270
270
|
|
|
271
271
|
## Databricks Support
|
|
272
272
|
|
|
273
|
-
Spark NLP 5.1.
|
|
273
|
+
Spark NLP 5.1.1 has been tested and is compatible with the following runtimes:
|
|
274
274
|
|
|
275
275
|
**CPU:**
|
|
276
276
|
|
|
@@ -331,7 +331,7 @@ Spark NLP 5.1.0 has been tested and is compatible with the following runtimes:
|
|
|
331
331
|
|
|
332
332
|
## EMR Support
|
|
333
333
|
|
|
334
|
-
Spark NLP 5.1.
|
|
334
|
+
Spark NLP 5.1.1 has been tested and is compatible with the following EMR releases:
|
|
335
335
|
|
|
336
336
|
- emr-6.2.0
|
|
337
337
|
- emr-6.3.0
|
|
@@ -376,11 +376,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
376
376
|
```sh
|
|
377
377
|
# CPU
|
|
378
378
|
|
|
379
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
379
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
380
380
|
|
|
381
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
381
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
382
382
|
|
|
383
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
383
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
384
384
|
```
|
|
385
385
|
|
|
386
386
|
The `spark-nlp` has been published to
|
|
@@ -389,11 +389,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
389
389
|
```sh
|
|
390
390
|
# GPU
|
|
391
391
|
|
|
392
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.
|
|
392
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
393
393
|
|
|
394
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.
|
|
394
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
395
395
|
|
|
396
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.
|
|
396
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
397
397
|
|
|
398
398
|
```
|
|
399
399
|
|
|
@@ -403,11 +403,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
403
403
|
```sh
|
|
404
404
|
# AArch64
|
|
405
405
|
|
|
406
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.
|
|
406
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
407
407
|
|
|
408
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.
|
|
408
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
409
409
|
|
|
410
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.
|
|
410
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
411
411
|
|
|
412
412
|
```
|
|
413
413
|
|
|
@@ -417,11 +417,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
417
417
|
```sh
|
|
418
418
|
# M1/M2 (Apple Silicon)
|
|
419
419
|
|
|
420
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.
|
|
420
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
421
421
|
|
|
422
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.
|
|
422
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
423
423
|
|
|
424
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.
|
|
424
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
425
425
|
|
|
426
426
|
```
|
|
427
427
|
|
|
@@ -435,7 +435,7 @@ set in your SparkSession:
|
|
|
435
435
|
spark-shell \
|
|
436
436
|
--driver-memory 16g \
|
|
437
437
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
438
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
438
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
439
439
|
```
|
|
440
440
|
|
|
441
441
|
## Scala
|
|
@@ -453,7 +453,7 @@ coordinates:
|
|
|
453
453
|
<dependency>
|
|
454
454
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
455
455
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
456
|
-
<version>5.1.
|
|
456
|
+
<version>5.1.1</version>
|
|
457
457
|
</dependency>
|
|
458
458
|
```
|
|
459
459
|
|
|
@@ -464,7 +464,7 @@ coordinates:
|
|
|
464
464
|
<dependency>
|
|
465
465
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
466
466
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
467
|
-
<version>5.1.
|
|
467
|
+
<version>5.1.1</version>
|
|
468
468
|
</dependency>
|
|
469
469
|
```
|
|
470
470
|
|
|
@@ -475,7 +475,7 @@ coordinates:
|
|
|
475
475
|
<dependency>
|
|
476
476
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
477
477
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
478
|
-
<version>5.1.
|
|
478
|
+
<version>5.1.1</version>
|
|
479
479
|
</dependency>
|
|
480
480
|
```
|
|
481
481
|
|
|
@@ -486,7 +486,7 @@ coordinates:
|
|
|
486
486
|
<dependency>
|
|
487
487
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
488
488
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
489
|
-
<version>5.1.
|
|
489
|
+
<version>5.1.1</version>
|
|
490
490
|
</dependency>
|
|
491
491
|
```
|
|
492
492
|
|
|
@@ -496,28 +496,28 @@ coordinates:
|
|
|
496
496
|
|
|
497
497
|
```sbtshell
|
|
498
498
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
499
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.
|
|
499
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.1"
|
|
500
500
|
```
|
|
501
501
|
|
|
502
502
|
**spark-nlp-gpu:**
|
|
503
503
|
|
|
504
504
|
```sbtshell
|
|
505
505
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
506
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.
|
|
506
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.1"
|
|
507
507
|
```
|
|
508
508
|
|
|
509
509
|
**spark-nlp-aarch64:**
|
|
510
510
|
|
|
511
511
|
```sbtshell
|
|
512
512
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
513
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.
|
|
513
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.1"
|
|
514
514
|
```
|
|
515
515
|
|
|
516
516
|
**spark-nlp-silicon:**
|
|
517
517
|
|
|
518
518
|
```sbtshell
|
|
519
519
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
520
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.
|
|
520
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.1"
|
|
521
521
|
```
|
|
522
522
|
|
|
523
523
|
Maven
|
|
@@ -539,7 +539,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
539
539
|
Pip:
|
|
540
540
|
|
|
541
541
|
```bash
|
|
542
|
-
pip install spark-nlp==5.1.
|
|
542
|
+
pip install spark-nlp==5.1.1
|
|
543
543
|
```
|
|
544
544
|
|
|
545
545
|
Conda:
|
|
@@ -568,7 +568,7 @@ spark = SparkSession.builder
|
|
|
568
568
|
.config("spark.driver.memory", "16G")
|
|
569
569
|
.config("spark.driver.maxResultSize", "0")
|
|
570
570
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
571
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
571
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
|
|
572
572
|
.getOrCreate()
|
|
573
573
|
```
|
|
574
574
|
|
|
@@ -639,7 +639,7 @@ Use either one of the following options
|
|
|
639
639
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
640
640
|
|
|
641
641
|
```bash
|
|
642
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
642
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
643
643
|
```
|
|
644
644
|
|
|
645
645
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -650,7 +650,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.0
|
|
|
650
650
|
Apart from the previous step, install the python module through pip
|
|
651
651
|
|
|
652
652
|
```bash
|
|
653
|
-
pip install spark-nlp==5.1.
|
|
653
|
+
pip install spark-nlp==5.1.1
|
|
654
654
|
```
|
|
655
655
|
|
|
656
656
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -678,7 +678,7 @@ launch the Jupyter from the same Python environment:
|
|
|
678
678
|
$ conda create -n sparknlp python=3.8 -y
|
|
679
679
|
$ conda activate sparknlp
|
|
680
680
|
# spark-nlp by default is based on pyspark 3.x
|
|
681
|
-
$ pip install spark-nlp==5.1.
|
|
681
|
+
$ pip install spark-nlp==5.1.1 pyspark==3.3.1 jupyter
|
|
682
682
|
$ jupyter notebook
|
|
683
683
|
```
|
|
684
684
|
|
|
@@ -695,7 +695,7 @@ export PYSPARK_PYTHON=python3
|
|
|
695
695
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
696
696
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
697
697
|
|
|
698
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
698
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
699
699
|
```
|
|
700
700
|
|
|
701
701
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -722,7 +722,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
722
722
|
# -s is for spark-nlp
|
|
723
723
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
724
724
|
# by default they are set to the latest
|
|
725
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.
|
|
725
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
|
|
726
726
|
```
|
|
727
727
|
|
|
728
728
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -745,7 +745,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
745
745
|
# -s is for spark-nlp
|
|
746
746
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
747
747
|
# by default they are set to the latest
|
|
748
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.
|
|
748
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
|
|
749
749
|
```
|
|
750
750
|
|
|
751
751
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -764,9 +764,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
764
764
|
|
|
765
765
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
766
766
|
|
|
767
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.1.
|
|
767
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.1.1` -> Install
|
|
768
768
|
|
|
769
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
769
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1` -> Install
|
|
770
770
|
|
|
771
771
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
772
772
|
|
|
@@ -817,7 +817,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
817
817
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
818
818
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
819
819
|
"spark.driver.maxResultSize": "0",
|
|
820
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
820
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1"
|
|
821
821
|
}
|
|
822
822
|
}]
|
|
823
823
|
```
|
|
@@ -826,7 +826,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
826
826
|
|
|
827
827
|
```.sh
|
|
828
828
|
aws emr create-cluster \
|
|
829
|
-
--name "Spark NLP 5.1.
|
|
829
|
+
--name "Spark NLP 5.1.1" \
|
|
830
830
|
--release-label emr-6.2.0 \
|
|
831
831
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
832
832
|
--instance-type m4.4xlarge \
|
|
@@ -890,7 +890,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
890
890
|
--enable-component-gateway \
|
|
891
891
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
892
892
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
893
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
893
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
894
894
|
```
|
|
895
895
|
|
|
896
896
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -929,7 +929,7 @@ spark = SparkSession.builder
|
|
|
929
929
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
930
930
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
931
931
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
932
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
932
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
|
|
933
933
|
.getOrCreate()
|
|
934
934
|
```
|
|
935
935
|
|
|
@@ -943,7 +943,7 @@ spark-shell \
|
|
|
943
943
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
944
944
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
945
945
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
946
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
946
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
947
947
|
```
|
|
948
948
|
|
|
949
949
|
**pyspark:**
|
|
@@ -956,7 +956,7 @@ pyspark \
|
|
|
956
956
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
957
957
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
958
958
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
959
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
959
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
960
960
|
```
|
|
961
961
|
|
|
962
962
|
**Databricks:**
|
|
@@ -1228,7 +1228,7 @@ spark = SparkSession.builder
|
|
|
1228
1228
|
.config("spark.driver.memory", "16G")
|
|
1229
1229
|
.config("spark.driver.maxResultSize", "0")
|
|
1230
1230
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1231
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.1.
|
|
1231
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.1.1.jar")
|
|
1232
1232
|
.getOrCreate()
|
|
1233
1233
|
```
|
|
1234
1234
|
|
|
@@ -1237,7 +1237,7 @@ spark = SparkSession.builder
|
|
|
1237
1237
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x)
|
|
1238
1238
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1239
1239
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1240
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.
|
|
1240
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.1.jar`)
|
|
1241
1241
|
|
|
1242
1242
|
Example of using pretrained Models and Pipelines in offline:
|
|
1243
1243
|
|
|
@@ -41,7 +41,7 @@ setup(
|
|
|
41
41
|
# project code, see
|
|
42
42
|
# https://packaging.python.org/en/latest/single_source_version.html
|
|
43
43
|
|
|
44
|
-
version='5.1.
|
|
44
|
+
version='5.1.1', # Required
|
|
45
45
|
|
|
46
46
|
# This is a one-line description or tagline of what your project does. This
|
|
47
47
|
# corresponds to the 'Summary' metadata field:
|