spark-nlp 5.0.2__tar.gz → 5.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/PKG-INFO +52 -45
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/README.md +51 -44
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/setup.py +1 -1
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/spark_nlp.egg-info/PKG-INFO +52 -45
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/spark_nlp.egg-info/SOURCES.txt +7 -1
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/__init__.py +2 -2
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/__init__.py +1 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/audio/__init__.py +1 -0
- spark-nlp-5.1.1/sparknlp/annotator/audio/whisper_for_ctc.py +250 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/__init__.py +3 -2
- spark-nlp-5.1.1/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/__init__.py +1 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/doc2vec.py +6 -0
- spark-nlp-5.1.1/sparknlp/annotator/embeddings/mpnet_embeddings.py +190 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/word2vec.py +6 -0
- spark-nlp-5.1.1/sparknlp/annotator/openai/__init__.py +16 -0
- spark-nlp-5.1.1/sparknlp/annotator/openai/openai_completion.py +352 -0
- spark-nlp-5.1.1/sparknlp/annotator/openai/openai_embeddings.py +132 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/properties.py +173 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/__init__.py +19 -1
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/com/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/com/johnsnowlabs/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/com/johnsnowlabs/nlp/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/setup.cfg +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/spark_nlp.egg-info/dependency_links.txt +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/spark_nlp.egg-info/top_level.txt +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotation.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotation_audio.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotation_image.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/chunk2_doc.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/chunker.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
- /spark-nlp-5.0.2/sparknlp/annotator/classifier_dl/roberta_bert_for_zero_shot_classification.py → /spark-nlp-5.1.1/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/coref/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/cv/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/date2_chunk.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/dependency/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/document_normalizer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/er/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/er/entity_ruler.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/graph_extraction.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/lemmatizer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/n_gram_generator.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_approach.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_converter.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_crf.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_dl.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/normalizer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/param/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/pos/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/pos/perceptron.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentence/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentiment/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/similarity/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/stemmer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/token/tokenizer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ws/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/audio_assembler.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/doc2_chunk.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/document_assembler.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/embeddings_finisher.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/finisher.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/graph_finisher.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/has_recursive_fit.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/has_recursive_transform.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/image_assembler.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/light_pipeline.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/multi_document_assembler.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/recursive_pipeline.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/table_assembler.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/token2_chunk.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/base/token_assembler.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/annotator_approach.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/annotator_model.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/annotator_properties.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/annotator_type.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/coverage_result.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/match_strategy.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/read_as.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/recursive_annotator_approach.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/storage.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/common/utils.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/functions.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/annotator_java_ml.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/annotator_transformer.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/extended_java_wrapper.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/params_getters_setters.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/internal/recursive.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/logging/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/logging/comet.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/pretrained/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/pretrained/resource_downloader.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/pretrained/utils.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/conll.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/conllu.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/pos.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/pub_tator.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/spacy_to_annotation.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/training/tfgraphs.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/upload_to_hub.py +0 -0
- {spark-nlp-5.0.2 → spark-nlp-5.1.1}/sparknlp/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.1.1
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -144,6 +144,8 @@ documentation and examples
|
|
|
144
144
|
- XLM-RoBerta Sentence Embeddings (HuggingFace models)
|
|
145
145
|
- Instructor Embeddings (HuggingFace models)
|
|
146
146
|
- E5 Embeddings (HuggingFace models)
|
|
147
|
+
- MPNet Embeddings (HuggingFace models)
|
|
148
|
+
- OpenAI Embeddings
|
|
147
149
|
- Sentence Embeddings
|
|
148
150
|
- Chunk Embeddings
|
|
149
151
|
- Unsupervised keywords extraction
|
|
@@ -181,6 +183,7 @@ documentation and examples
|
|
|
181
183
|
- ConvNext Image Classification (Facebook ConvNext)
|
|
182
184
|
- Automatic Speech Recognition (Wav2Vec2)
|
|
183
185
|
- Automatic Speech Recognition (HuBERT)
|
|
186
|
+
- Automatic Speech Recognition (OpenAI Whisper)
|
|
184
187
|
- Named entity recognition (Deep learning)
|
|
185
188
|
- Easy ONNX and TensorFlow integrations
|
|
186
189
|
- GPU Support
|
|
@@ -199,7 +202,7 @@ To use Spark NLP you need the following requirements:
|
|
|
199
202
|
|
|
200
203
|
**GPU (optional):**
|
|
201
204
|
|
|
202
|
-
Spark NLP 5.
|
|
205
|
+
Spark NLP 5.1.1 is built with ONNX 1.15.1 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
203
206
|
|
|
204
207
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
205
208
|
- CUDA® Toolkit 11.2
|
|
@@ -215,7 +218,7 @@ $ java -version
|
|
|
215
218
|
$ conda create -n sparknlp python=3.7 -y
|
|
216
219
|
$ conda activate sparknlp
|
|
217
220
|
# spark-nlp by default is based on pyspark 3.x
|
|
218
|
-
$ pip install spark-nlp==5.
|
|
221
|
+
$ pip install spark-nlp==5.1.1 pyspark==3.3.1
|
|
219
222
|
```
|
|
220
223
|
|
|
221
224
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -260,7 +263,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
260
263
|
|
|
261
264
|
## Apache Spark Support
|
|
262
265
|
|
|
263
|
-
Spark NLP *5.
|
|
266
|
+
Spark NLP *5.1.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x
|
|
264
267
|
|
|
265
268
|
| Spark NLP | Apache Spark 2.3.x | Apache Spark 2.4.x | Apache Spark 3.0.x | Apache Spark 3.1.x | Apache Spark 3.2.x | Apache Spark 3.3.x | Apache Spark 3.4.x |
|
|
266
269
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -299,7 +302,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
299
302
|
|
|
300
303
|
## Databricks Support
|
|
301
304
|
|
|
302
|
-
Spark NLP 5.
|
|
305
|
+
Spark NLP 5.1.1 has been tested and is compatible with the following runtimes:
|
|
303
306
|
|
|
304
307
|
**CPU:**
|
|
305
308
|
|
|
@@ -335,6 +338,8 @@ Spark NLP 5.0.2 has been tested and is compatible with the following runtimes:
|
|
|
335
338
|
- 13.1 ML
|
|
336
339
|
- 13.2
|
|
337
340
|
- 13.2 ML
|
|
341
|
+
- 13.3
|
|
342
|
+
- 13.3 ML
|
|
338
343
|
|
|
339
344
|
**GPU:**
|
|
340
345
|
|
|
@@ -354,10 +359,11 @@ Spark NLP 5.0.2 has been tested and is compatible with the following runtimes:
|
|
|
354
359
|
- 13.0 ML & GPU
|
|
355
360
|
- 13.1 ML & GPU
|
|
356
361
|
- 13.2 ML & GPU
|
|
362
|
+
- 13.3 ML & GPU
|
|
357
363
|
|
|
358
364
|
## EMR Support
|
|
359
365
|
|
|
360
|
-
Spark NLP 5.
|
|
366
|
+
Spark NLP 5.1.1 has been tested and is compatible with the following EMR releases:
|
|
361
367
|
|
|
362
368
|
- emr-6.2.0
|
|
363
369
|
- emr-6.3.0
|
|
@@ -370,6 +376,7 @@ Spark NLP 5.0.2 has been tested and is compatible with the following EMR release
|
|
|
370
376
|
- emr-6.9.0
|
|
371
377
|
- emr-6.10.0
|
|
372
378
|
- emr-6.11.0
|
|
379
|
+
- emr-6.12.0
|
|
373
380
|
|
|
374
381
|
Full list of [Amazon EMR 6.x releases](https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-release-6x.html)
|
|
375
382
|
|
|
@@ -401,11 +408,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
401
408
|
```sh
|
|
402
409
|
# CPU
|
|
403
410
|
|
|
404
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
411
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
405
412
|
|
|
406
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
413
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
407
414
|
|
|
408
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
415
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
409
416
|
```
|
|
410
417
|
|
|
411
418
|
The `spark-nlp` has been published to
|
|
@@ -414,11 +421,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
414
421
|
```sh
|
|
415
422
|
# GPU
|
|
416
423
|
|
|
417
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
424
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
418
425
|
|
|
419
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
426
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
420
427
|
|
|
421
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
428
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
422
429
|
|
|
423
430
|
```
|
|
424
431
|
|
|
@@ -428,11 +435,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
428
435
|
```sh
|
|
429
436
|
# AArch64
|
|
430
437
|
|
|
431
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
438
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
432
439
|
|
|
433
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
440
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
434
441
|
|
|
435
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
442
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
436
443
|
|
|
437
444
|
```
|
|
438
445
|
|
|
@@ -442,11 +449,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
442
449
|
```sh
|
|
443
450
|
# M1/M2 (Apple Silicon)
|
|
444
451
|
|
|
445
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
452
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
446
453
|
|
|
447
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
454
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
448
455
|
|
|
449
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
456
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
450
457
|
|
|
451
458
|
```
|
|
452
459
|
|
|
@@ -460,7 +467,7 @@ set in your SparkSession:
|
|
|
460
467
|
spark-shell \
|
|
461
468
|
--driver-memory 16g \
|
|
462
469
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
463
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
470
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
464
471
|
```
|
|
465
472
|
|
|
466
473
|
## Scala
|
|
@@ -478,7 +485,7 @@ coordinates:
|
|
|
478
485
|
<dependency>
|
|
479
486
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
480
487
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
481
|
-
<version>5.
|
|
488
|
+
<version>5.1.1</version>
|
|
482
489
|
</dependency>
|
|
483
490
|
```
|
|
484
491
|
|
|
@@ -489,7 +496,7 @@ coordinates:
|
|
|
489
496
|
<dependency>
|
|
490
497
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
491
498
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
492
|
-
<version>5.
|
|
499
|
+
<version>5.1.1</version>
|
|
493
500
|
</dependency>
|
|
494
501
|
```
|
|
495
502
|
|
|
@@ -500,7 +507,7 @@ coordinates:
|
|
|
500
507
|
<dependency>
|
|
501
508
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
502
509
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
503
|
-
<version>5.
|
|
510
|
+
<version>5.1.1</version>
|
|
504
511
|
</dependency>
|
|
505
512
|
```
|
|
506
513
|
|
|
@@ -511,7 +518,7 @@ coordinates:
|
|
|
511
518
|
<dependency>
|
|
512
519
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
513
520
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
514
|
-
<version>5.
|
|
521
|
+
<version>5.1.1</version>
|
|
515
522
|
</dependency>
|
|
516
523
|
```
|
|
517
524
|
|
|
@@ -521,28 +528,28 @@ coordinates:
|
|
|
521
528
|
|
|
522
529
|
```sbtshell
|
|
523
530
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
524
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.
|
|
531
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.1"
|
|
525
532
|
```
|
|
526
533
|
|
|
527
534
|
**spark-nlp-gpu:**
|
|
528
535
|
|
|
529
536
|
```sbtshell
|
|
530
537
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
531
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.
|
|
538
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.1"
|
|
532
539
|
```
|
|
533
540
|
|
|
534
541
|
**spark-nlp-aarch64:**
|
|
535
542
|
|
|
536
543
|
```sbtshell
|
|
537
544
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
538
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.
|
|
545
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.1"
|
|
539
546
|
```
|
|
540
547
|
|
|
541
548
|
**spark-nlp-silicon:**
|
|
542
549
|
|
|
543
550
|
```sbtshell
|
|
544
551
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
545
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.
|
|
552
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.1"
|
|
546
553
|
```
|
|
547
554
|
|
|
548
555
|
Maven
|
|
@@ -564,7 +571,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
564
571
|
Pip:
|
|
565
572
|
|
|
566
573
|
```bash
|
|
567
|
-
pip install spark-nlp==5.
|
|
574
|
+
pip install spark-nlp==5.1.1
|
|
568
575
|
```
|
|
569
576
|
|
|
570
577
|
Conda:
|
|
@@ -593,7 +600,7 @@ spark = SparkSession.builder
|
|
|
593
600
|
.config("spark.driver.memory", "16G")
|
|
594
601
|
.config("spark.driver.maxResultSize", "0")
|
|
595
602
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
596
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
603
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
|
|
597
604
|
.getOrCreate()
|
|
598
605
|
```
|
|
599
606
|
|
|
@@ -664,7 +671,7 @@ Use either one of the following options
|
|
|
664
671
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
665
672
|
|
|
666
673
|
```bash
|
|
667
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
674
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
668
675
|
```
|
|
669
676
|
|
|
670
677
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -675,7 +682,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.0.2
|
|
|
675
682
|
Apart from the previous step, install the python module through pip
|
|
676
683
|
|
|
677
684
|
```bash
|
|
678
|
-
pip install spark-nlp==5.
|
|
685
|
+
pip install spark-nlp==5.1.1
|
|
679
686
|
```
|
|
680
687
|
|
|
681
688
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -703,7 +710,7 @@ launch the Jupyter from the same Python environment:
|
|
|
703
710
|
$ conda create -n sparknlp python=3.8 -y
|
|
704
711
|
$ conda activate sparknlp
|
|
705
712
|
# spark-nlp by default is based on pyspark 3.x
|
|
706
|
-
$ pip install spark-nlp==5.
|
|
713
|
+
$ pip install spark-nlp==5.1.1 pyspark==3.3.1 jupyter
|
|
707
714
|
$ jupyter notebook
|
|
708
715
|
```
|
|
709
716
|
|
|
@@ -720,7 +727,7 @@ export PYSPARK_PYTHON=python3
|
|
|
720
727
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
721
728
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
722
729
|
|
|
723
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
730
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
724
731
|
```
|
|
725
732
|
|
|
726
733
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -747,7 +754,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
747
754
|
# -s is for spark-nlp
|
|
748
755
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
749
756
|
# by default they are set to the latest
|
|
750
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
757
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
|
|
751
758
|
```
|
|
752
759
|
|
|
753
760
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -770,7 +777,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
770
777
|
# -s is for spark-nlp
|
|
771
778
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
772
779
|
# by default they are set to the latest
|
|
773
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
780
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
|
|
774
781
|
```
|
|
775
782
|
|
|
776
783
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -789,9 +796,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
789
796
|
|
|
790
797
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
791
798
|
|
|
792
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.
|
|
799
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.1.1` -> Install
|
|
793
800
|
|
|
794
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
801
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1` -> Install
|
|
795
802
|
|
|
796
803
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
797
804
|
|
|
@@ -842,7 +849,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
842
849
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
843
850
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
844
851
|
"spark.driver.maxResultSize": "0",
|
|
845
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
852
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1"
|
|
846
853
|
}
|
|
847
854
|
}]
|
|
848
855
|
```
|
|
@@ -851,7 +858,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
851
858
|
|
|
852
859
|
```.sh
|
|
853
860
|
aws emr create-cluster \
|
|
854
|
-
--name "Spark NLP 5.
|
|
861
|
+
--name "Spark NLP 5.1.1" \
|
|
855
862
|
--release-label emr-6.2.0 \
|
|
856
863
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
857
864
|
--instance-type m4.4xlarge \
|
|
@@ -915,7 +922,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
915
922
|
--enable-component-gateway \
|
|
916
923
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
917
924
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
918
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
925
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
919
926
|
```
|
|
920
927
|
|
|
921
928
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -954,7 +961,7 @@ spark = SparkSession.builder
|
|
|
954
961
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
955
962
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
956
963
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
957
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
964
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
|
|
958
965
|
.getOrCreate()
|
|
959
966
|
```
|
|
960
967
|
|
|
@@ -968,7 +975,7 @@ spark-shell \
|
|
|
968
975
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
969
976
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
970
977
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
971
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
978
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
972
979
|
```
|
|
973
980
|
|
|
974
981
|
**pyspark:**
|
|
@@ -981,7 +988,7 @@ pyspark \
|
|
|
981
988
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
982
989
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
983
990
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
984
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
991
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
985
992
|
```
|
|
986
993
|
|
|
987
994
|
**Databricks:**
|
|
@@ -1253,7 +1260,7 @@ spark = SparkSession.builder
|
|
|
1253
1260
|
.config("spark.driver.memory", "16G")
|
|
1254
1261
|
.config("spark.driver.maxResultSize", "0")
|
|
1255
1262
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1256
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.
|
|
1263
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.1.1.jar")
|
|
1257
1264
|
.getOrCreate()
|
|
1258
1265
|
```
|
|
1259
1266
|
|
|
@@ -1262,7 +1269,7 @@ spark = SparkSession.builder
|
|
|
1262
1269
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x)
|
|
1263
1270
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1264
1271
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1265
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.
|
|
1272
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.1.jar`)
|
|
1266
1273
|
|
|
1267
1274
|
Example of using pretrained Models and Pipelines in offline:
|
|
1268
1275
|
|