spark-nlp 5.4.0__tar.gz → 5.4.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/PKG-INFO +60 -50
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/README.md +59 -49
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/setup.py +1 -1
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/PKG-INFO +60 -50
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/SOURCES.txt +0 -5
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/__init__.py +2 -3
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/__init__.py +0 -1
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +3 -3
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -2
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -2
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -2
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/openai/openai_embeddings.py +69 -43
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/__init__.py +0 -2
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/m2m100_transformer.py +2 -2
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/__init__.py +142 -428
- spark-nlp-5.4.0/com/johnsnowlabs/ml/ai/__init__.py +0 -10
- spark-nlp-5.4.0/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -173
- spark-nlp-5.4.0/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -349
- spark-nlp-5.4.0/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -326
- spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/com/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/com/johnsnowlabs/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/com/johnsnowlabs/nlp/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/setup.cfg +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/.uuid +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/dependency_links.txt +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/spark_nlp.egg-info/top_level.txt +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotation.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotation_audio.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotation_image.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/chunk2_doc.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/chunker.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/coref/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/date2_chunk.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/dependency/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_character_text_splitter.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_normalizer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_token_splitter.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/document_token_splitter_test.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/word2vec.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/er/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/er/entity_ruler.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/graph_extraction.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ld_dl/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/lemmatizer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/date_matcher.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/matcher/text_matcher.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/n_gram_generator.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_approach.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_converter.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_crf.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_dl.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/normalizer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/openai/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/openai/openai_completion.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/param/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/param/classifier_encoder.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/pos/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/pos/perceptron.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentence/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentiment/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
- {spark-nlp-5.4.0/com/johnsnowlabs/ml → spark-nlp-5.4.0rc1/sparknlp/annotator/similarity}/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/stemmer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/stop_words_cleaner.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token/tokenizer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/token2_chunk.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ws/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/annotator/ws/word_segmenter.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/audio_assembler.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/doc2_chunk.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/document_assembler.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/embeddings_finisher.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/finisher.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/graph_finisher.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/has_recursive_fit.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/has_recursive_transform.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/image_assembler.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/light_pipeline.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/multi_document_assembler.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/recursive_pipeline.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/table_assembler.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/base/token_assembler.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_approach.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_model.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_properties.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/annotator_type.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/coverage_result.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/match_strategy.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/properties.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/read_as.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/recursive_annotator_approach.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/storage.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/common/utils.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/functions.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/annotator_java_ml.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/annotator_transformer.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/extended_java_wrapper.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/params_getters_setters.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/internal/recursive.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/logging/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/logging/comet.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/resource_downloader.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/pretrained/utils.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/__init__.py +0 -0
- {spark-nlp-5.4.0/sparknlp/annotator/similarity → spark-nlp-5.4.0rc1/sparknlp/training/_tf_graph_builders}/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
- {spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders → spark-nlp-5.4.0rc1/sparknlp/training/_tf_graph_builders/ner_dl}/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
- {spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders/ner_dl → spark-nlp-5.4.0rc1/sparknlp/training/_tf_graph_builders_1x}/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
- {spark-nlp-5.4.0/sparknlp/training/_tf_graph_builders_1x → spark-nlp-5.4.0rc1/sparknlp/training/_tf_graph_builders_1x/ner_dl}/__init__.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/conll.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/conllu.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/pos.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/pub_tator.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/spacy_to_annotation.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/training/tfgraphs.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/upload_to_hub.py +0 -0
- {spark-nlp-5.4.0 → spark-nlp-5.4.0rc1}/sparknlp/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 5.4.
|
|
3
|
+
Version: 5.4.0rc1
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -146,7 +146,6 @@ documentation and examples
|
|
|
146
146
|
- INSTRUCTOR Embeddings (HuggingFace models)
|
|
147
147
|
- E5 Embeddings (HuggingFace models)
|
|
148
148
|
- MPNet Embeddings (HuggingFace models)
|
|
149
|
-
- UAE Embeddings (HuggingFace models)
|
|
150
149
|
- OpenAI Embeddings
|
|
151
150
|
- Sentence & Chunk Embeddings
|
|
152
151
|
- Unsupervised keywords extraction
|
|
@@ -171,7 +170,7 @@ documentation and examples
|
|
|
171
170
|
- Text-To-Text Transfer Transformer (Google T5)
|
|
172
171
|
- Generative Pre-trained Transformer 2 (OpenAI GPT2)
|
|
173
172
|
- Seq2Seq for NLG, Translation, and Comprehension (Facebook BART)
|
|
174
|
-
- Chat and Conversational LLMs (Facebook Llama-
|
|
173
|
+
- Chat and Conversational LLMs (Facebook Llama-22)
|
|
175
174
|
- Vision Transformer (Google ViT)
|
|
176
175
|
- Swin Image Classification (Microsoft Swin Transformer)
|
|
177
176
|
- ConvNext Image Classification (Facebook ConvNext)
|
|
@@ -181,10 +180,10 @@ documentation and examples
|
|
|
181
180
|
- Automatic Speech Recognition (HuBERT)
|
|
182
181
|
- Automatic Speech Recognition (OpenAI Whisper)
|
|
183
182
|
- Named entity recognition (Deep learning)
|
|
184
|
-
- Easy ONNX
|
|
183
|
+
- Easy ONNX and TensorFlow integrations
|
|
185
184
|
- GPU Support
|
|
186
185
|
- Full integration with Spark ML functions
|
|
187
|
-
- +
|
|
186
|
+
- +30000 pre-trained models in +200 languages!
|
|
188
187
|
- +6000 pre-trained pipelines in +200 languages!
|
|
189
188
|
- Multi-lingual NER models: Arabic, Bengali, Chinese, Danish, Dutch, English, Finnish, French, German, Hebrew, Italian,
|
|
190
189
|
Japanese, Korean, Norwegian, Persian, Polish, Portuguese, Russian, Spanish, Swedish, Urdu, and more.
|
|
@@ -198,7 +197,7 @@ To use Spark NLP you need the following requirements:
|
|
|
198
197
|
|
|
199
198
|
**GPU (optional):**
|
|
200
199
|
|
|
201
|
-
Spark NLP 5.4.0 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
200
|
+
Spark NLP 5.4.0-rc1 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
202
201
|
|
|
203
202
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
204
203
|
- CUDA® Toolkit 11.2
|
|
@@ -214,7 +213,7 @@ $ java -version
|
|
|
214
213
|
$ conda create -n sparknlp python=3.7 -y
|
|
215
214
|
$ conda activate sparknlp
|
|
216
215
|
# spark-nlp by default is based on pyspark 3.x
|
|
217
|
-
$ pip install spark-nlp==5.4.0 pyspark==3.3.1
|
|
216
|
+
$ pip install spark-nlp==5.4.0-rc1 pyspark==3.3.1
|
|
218
217
|
```
|
|
219
218
|
|
|
220
219
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -259,11 +258,10 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
259
258
|
|
|
260
259
|
## Apache Spark Support
|
|
261
260
|
|
|
262
|
-
Spark NLP *5.4.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
261
|
+
Spark NLP *5.4.0-rc1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
263
262
|
|
|
264
263
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
265
264
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
266
|
-
| 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
267
265
|
| 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
268
266
|
| 5.2.x | YES | YES | YES | YES | YES | YES | NO | NO |
|
|
269
267
|
| 5.1.x | Partially | YES | YES | YES | YES | YES | NO | NO |
|
|
@@ -273,6 +271,12 @@ Spark NLP *5.4.0* has been built on top of Apache Spark 3.4 while fully supports
|
|
|
273
271
|
| 4.2.x | NO | NO | YES | YES | YES | YES | NO | NO |
|
|
274
272
|
| 4.1.x | NO | NO | YES | YES | YES | YES | NO | NO |
|
|
275
273
|
| 4.0.x | NO | NO | YES | YES | YES | YES | NO | NO |
|
|
274
|
+
| 3.4.x | NO | NO | N/A | Partially | YES | YES | YES | YES |
|
|
275
|
+
| 3.3.x | NO | NO | NO | NO | YES | YES | YES | YES |
|
|
276
|
+
| 3.2.x | NO | NO | NO | NO | YES | YES | YES | YES |
|
|
277
|
+
| 3.1.x | NO | NO | NO | NO | YES | YES | YES | YES |
|
|
278
|
+
| 3.0.x | NO | NO | NO | NO | YES | YES | YES | YES |
|
|
279
|
+
| 2.7.x | NO | NO | NO | NO | NO | NO | YES | YES |
|
|
276
280
|
|
|
277
281
|
Find out more about `Spark NLP` versions from our [release notes](https://github.com/JohnSnowLabs/spark-nlp/releases).
|
|
278
282
|
|
|
@@ -289,10 +293,16 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
289
293
|
| 4.2.x | YES | YES | YES | YES | YES | NO | YES |
|
|
290
294
|
| 4.1.x | YES | YES | YES | YES | NO | NO | YES |
|
|
291
295
|
| 4.0.x | YES | YES | YES | YES | NO | NO | YES |
|
|
296
|
+
| 3.4.x | YES | YES | YES | YES | NO | YES | YES |
|
|
297
|
+
| 3.3.x | YES | YES | YES | NO | NO | YES | YES |
|
|
298
|
+
| 3.2.x | YES | YES | YES | NO | NO | YES | YES |
|
|
299
|
+
| 3.1.x | YES | YES | YES | NO | NO | YES | YES |
|
|
300
|
+
| 3.0.x | YES | YES | YES | NO | NO | YES | YES |
|
|
301
|
+
| 2.7.x | YES | YES | NO | NO | NO | YES | NO |
|
|
292
302
|
|
|
293
303
|
## Databricks Support
|
|
294
304
|
|
|
295
|
-
Spark NLP 5.4.0 has been tested and is compatible with the following runtimes:
|
|
305
|
+
Spark NLP 5.4.0-rc1 has been tested and is compatible with the following runtimes:
|
|
296
306
|
|
|
297
307
|
**CPU:**
|
|
298
308
|
|
|
@@ -365,7 +375,7 @@ Spark NLP 5.4.0 has been tested and is compatible with the following runtimes:
|
|
|
365
375
|
|
|
366
376
|
## EMR Support
|
|
367
377
|
|
|
368
|
-
Spark NLP 5.4.0 has been tested and is compatible with the following EMR releases:
|
|
378
|
+
Spark NLP 5.4.0-rc1 has been tested and is compatible with the following EMR releases:
|
|
369
379
|
|
|
370
380
|
- emr-6.2.0
|
|
371
381
|
- emr-6.3.0
|
|
@@ -415,11 +425,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
415
425
|
```sh
|
|
416
426
|
# CPU
|
|
417
427
|
|
|
418
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
428
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
419
429
|
|
|
420
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
430
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
421
431
|
|
|
422
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
432
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
423
433
|
```
|
|
424
434
|
|
|
425
435
|
The `spark-nlp` has been published to
|
|
@@ -428,11 +438,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
428
438
|
```sh
|
|
429
439
|
# GPU
|
|
430
440
|
|
|
431
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
|
|
441
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
|
|
432
442
|
|
|
433
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
|
|
443
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
|
|
434
444
|
|
|
435
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
|
|
445
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0-rc1
|
|
436
446
|
|
|
437
447
|
```
|
|
438
448
|
|
|
@@ -442,11 +452,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
442
452
|
```sh
|
|
443
453
|
# AArch64
|
|
444
454
|
|
|
445
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
|
|
455
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
|
|
446
456
|
|
|
447
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
|
|
457
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
|
|
448
458
|
|
|
449
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
|
|
459
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0-rc1
|
|
450
460
|
|
|
451
461
|
```
|
|
452
462
|
|
|
@@ -456,11 +466,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
456
466
|
```sh
|
|
457
467
|
# M1/M2 (Apple Silicon)
|
|
458
468
|
|
|
459
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
|
|
469
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
|
|
460
470
|
|
|
461
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
|
|
471
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
|
|
462
472
|
|
|
463
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
|
|
473
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0-rc1
|
|
464
474
|
|
|
465
475
|
```
|
|
466
476
|
|
|
@@ -474,7 +484,7 @@ set in your SparkSession:
|
|
|
474
484
|
spark-shell \
|
|
475
485
|
--driver-memory 16g \
|
|
476
486
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
477
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
487
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
478
488
|
```
|
|
479
489
|
|
|
480
490
|
## Scala
|
|
@@ -492,7 +502,7 @@ coordinates:
|
|
|
492
502
|
<dependency>
|
|
493
503
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
494
504
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
495
|
-
<version>5.4.0</version>
|
|
505
|
+
<version>5.4.0-rc1</version>
|
|
496
506
|
</dependency>
|
|
497
507
|
```
|
|
498
508
|
|
|
@@ -503,7 +513,7 @@ coordinates:
|
|
|
503
513
|
<dependency>
|
|
504
514
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
505
515
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
506
|
-
<version>5.4.0</version>
|
|
516
|
+
<version>5.4.0-rc1</version>
|
|
507
517
|
</dependency>
|
|
508
518
|
```
|
|
509
519
|
|
|
@@ -514,7 +524,7 @@ coordinates:
|
|
|
514
524
|
<dependency>
|
|
515
525
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
516
526
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
517
|
-
<version>5.4.0</version>
|
|
527
|
+
<version>5.4.0-rc1</version>
|
|
518
528
|
</dependency>
|
|
519
529
|
```
|
|
520
530
|
|
|
@@ -525,7 +535,7 @@ coordinates:
|
|
|
525
535
|
<dependency>
|
|
526
536
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
527
537
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
528
|
-
<version>5.4.0</version>
|
|
538
|
+
<version>5.4.0-rc1</version>
|
|
529
539
|
</dependency>
|
|
530
540
|
```
|
|
531
541
|
|
|
@@ -535,28 +545,28 @@ coordinates:
|
|
|
535
545
|
|
|
536
546
|
```sbtshell
|
|
537
547
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
538
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.0"
|
|
548
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.0-rc1"
|
|
539
549
|
```
|
|
540
550
|
|
|
541
551
|
**spark-nlp-gpu:**
|
|
542
552
|
|
|
543
553
|
```sbtshell
|
|
544
554
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
545
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.0"
|
|
555
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.0-rc1"
|
|
546
556
|
```
|
|
547
557
|
|
|
548
558
|
**spark-nlp-aarch64:**
|
|
549
559
|
|
|
550
560
|
```sbtshell
|
|
551
561
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
552
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.0"
|
|
562
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.0-rc1"
|
|
553
563
|
```
|
|
554
564
|
|
|
555
565
|
**spark-nlp-silicon:**
|
|
556
566
|
|
|
557
567
|
```sbtshell
|
|
558
568
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
559
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.0"
|
|
569
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.0-rc1"
|
|
560
570
|
```
|
|
561
571
|
|
|
562
572
|
Maven
|
|
@@ -578,7 +588,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
578
588
|
Pip:
|
|
579
589
|
|
|
580
590
|
```bash
|
|
581
|
-
pip install spark-nlp==5.4.0
|
|
591
|
+
pip install spark-nlp==5.4.0-rc1
|
|
582
592
|
```
|
|
583
593
|
|
|
584
594
|
Conda:
|
|
@@ -607,7 +617,7 @@ spark = SparkSession.builder
|
|
|
607
617
|
.config("spark.driver.memory", "16G")
|
|
608
618
|
.config("spark.driver.maxResultSize", "0")
|
|
609
619
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
610
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
|
|
620
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1")
|
|
611
621
|
.getOrCreate()
|
|
612
622
|
```
|
|
613
623
|
|
|
@@ -678,7 +688,7 @@ Use either one of the following options
|
|
|
678
688
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
679
689
|
|
|
680
690
|
```bash
|
|
681
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
691
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
682
692
|
```
|
|
683
693
|
|
|
684
694
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -689,7 +699,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
|
689
699
|
Apart from the previous step, install the python module through pip
|
|
690
700
|
|
|
691
701
|
```bash
|
|
692
|
-
pip install spark-nlp==5.4.0
|
|
702
|
+
pip install spark-nlp==5.4.0-rc1
|
|
693
703
|
```
|
|
694
704
|
|
|
695
705
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -717,7 +727,7 @@ launch the Jupyter from the same Python environment:
|
|
|
717
727
|
$ conda create -n sparknlp python=3.8 -y
|
|
718
728
|
$ conda activate sparknlp
|
|
719
729
|
# spark-nlp by default is based on pyspark 3.x
|
|
720
|
-
$ pip install spark-nlp==5.4.0 pyspark==3.3.1 jupyter
|
|
730
|
+
$ pip install spark-nlp==5.4.0-rc1 pyspark==3.3.1 jupyter
|
|
721
731
|
$ jupyter notebook
|
|
722
732
|
```
|
|
723
733
|
|
|
@@ -734,7 +744,7 @@ export PYSPARK_PYTHON=python3
|
|
|
734
744
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
735
745
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
736
746
|
|
|
737
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
747
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
738
748
|
```
|
|
739
749
|
|
|
740
750
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -761,7 +771,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
761
771
|
# -s is for spark-nlp
|
|
762
772
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
763
773
|
# by default they are set to the latest
|
|
764
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
|
|
774
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0-rc1
|
|
765
775
|
```
|
|
766
776
|
|
|
767
777
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -784,7 +794,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
784
794
|
# -s is for spark-nlp
|
|
785
795
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
786
796
|
# by default they are set to the latest
|
|
787
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
|
|
797
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0-rc1
|
|
788
798
|
```
|
|
789
799
|
|
|
790
800
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -803,9 +813,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
803
813
|
|
|
804
814
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
805
815
|
|
|
806
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.4.0` -> Install
|
|
816
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.4.0-rc1` -> Install
|
|
807
817
|
|
|
808
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0` -> Install
|
|
818
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1` -> Install
|
|
809
819
|
|
|
810
820
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
811
821
|
|
|
@@ -856,7 +866,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
856
866
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
857
867
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
858
868
|
"spark.driver.maxResultSize": "0",
|
|
859
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0"
|
|
869
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1"
|
|
860
870
|
}
|
|
861
871
|
}]
|
|
862
872
|
```
|
|
@@ -865,7 +875,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
865
875
|
|
|
866
876
|
```.sh
|
|
867
877
|
aws emr create-cluster \
|
|
868
|
-
--name "Spark NLP 5.4.0" \
|
|
878
|
+
--name "Spark NLP 5.4.0-rc1" \
|
|
869
879
|
--release-label emr-6.2.0 \
|
|
870
880
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
871
881
|
--instance-type m4.4xlarge \
|
|
@@ -929,7 +939,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
929
939
|
--enable-component-gateway \
|
|
930
940
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
931
941
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
932
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
942
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
933
943
|
```
|
|
934
944
|
|
|
935
945
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -972,7 +982,7 @@ spark = SparkSession.builder
|
|
|
972
982
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
973
983
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
974
984
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
975
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
|
|
985
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1")
|
|
976
986
|
.getOrCreate()
|
|
977
987
|
```
|
|
978
988
|
|
|
@@ -986,7 +996,7 @@ spark-shell \
|
|
|
986
996
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
987
997
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
988
998
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
989
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
999
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
990
1000
|
```
|
|
991
1001
|
|
|
992
1002
|
**pyspark:**
|
|
@@ -999,7 +1009,7 @@ pyspark \
|
|
|
999
1009
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
1000
1010
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
1001
1011
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
1002
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
|
|
1012
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0-rc1
|
|
1003
1013
|
```
|
|
1004
1014
|
|
|
1005
1015
|
**Databricks:**
|
|
@@ -1271,7 +1281,7 @@ spark = SparkSession.builder
|
|
|
1271
1281
|
.config("spark.driver.memory", "16G")
|
|
1272
1282
|
.config("spark.driver.maxResultSize", "0")
|
|
1273
1283
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1274
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.4.0.jar")
|
|
1284
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.4.0-rc1.jar")
|
|
1275
1285
|
.getOrCreate()
|
|
1276
1286
|
```
|
|
1277
1287
|
|
|
@@ -1280,7 +1290,7 @@ spark = SparkSession.builder
|
|
|
1280
1290
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
|
|
1281
1291
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1282
1292
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1283
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.0.jar`)
|
|
1293
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.0-rc1.jar`)
|
|
1284
1294
|
|
|
1285
1295
|
Example of using pretrained Models and Pipelines in offline:
|
|
1286
1296
|
|