spark-nlp 6.1.5__tar.gz → 6.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/PKG-INFO +6 -6
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/README.md +5 -5
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/setup.py +1 -1
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/spark_nlp.egg-info/PKG-INFO +6 -6
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/spark_nlp.egg-info/SOURCES.txt +1 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/__init__.py +1 -1
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/document_normalizer.py +36 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +5 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/er/entity_ruler.py +35 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/auto_gguf_model.py +6 -4
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/auto_gguf_reranker.py +5 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +6 -1
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/__init__.py +1 -0
- spark_nlp-6.2.0/sparknlp/common/completion_post_processing.py +37 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/johnsnowlabs/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/johnsnowlabs/ml/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/com/johnsnowlabs/nlp/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/setup.cfg +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/spark_nlp.egg-info/dependency_links.txt +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/spark_nlp.egg-info/top_level.txt +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotation.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotation_audio.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotation_image.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/audio/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/chunk2_doc.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/chunker.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cleaners/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cleaners/cleaner.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cleaners/extractor.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/coref/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/florence2_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/gemma3_for_multimodal.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/internvl_for_multimodal.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/janus_for_multimodal.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/llava_for_multimodal.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/mllama_for_multimodal.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/paligemma_for_multimodal.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/phi3_vision_for_multimodal.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/qwen2vl_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/smolvlm_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/dataframe_optimizer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/date2_chunk.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/dependency/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/document_character_text_splitter.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/document_token_splitter.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/document_token_splitter_test.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/e5v_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/minilm_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/word2vec.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/er/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/graph_extraction.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ld_dl/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/lemmatizer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/date_matcher.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/matcher/text_matcher.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/n_gram_generator.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_approach.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_converter.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_crf.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_dl.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_dl_graph_checker.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/normalizer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/openai/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/openai/openai_completion.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/param/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/param/classifier_encoder.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/pos/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/pos/perceptron.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentence/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentiment/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/cohere_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/llama3_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/olmo_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/phi4_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/similarity/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/spell_check/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/stemmer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/stop_words_cleaner.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token/tokenizer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/token2_chunk.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ws/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/ws/word_segmenter.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/audio_assembler.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/doc2_chunk.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/document_assembler.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/embeddings_finisher.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/finisher.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/gguf_ranking_finisher.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/graph_finisher.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/has_recursive_fit.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/has_recursive_transform.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/image_assembler.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/light_pipeline.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/multi_document_assembler.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/prompt_assembler.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/recursive_pipeline.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/table_assembler.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/base/token_assembler.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/annotator_approach.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/annotator_model.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/annotator_properties.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/annotator_type.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/coverage_result.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/match_strategy.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/properties.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/read_as.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/recursive_annotator_approach.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/storage.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/common/utils.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/functions.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/annotator_java_ml.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/annotator_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/extended_java_wrapper.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/params_getters_setters.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/internal/recursive.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/logging/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/logging/comet.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/partition/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/partition/partition.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/partition/partition_properties.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/partition/partition_transformer.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/pretrained/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/pretrained/resource_downloader.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/pretrained/utils.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/enums.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/pdf_to_text.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/reader2doc.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/reader2image.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/reader2table.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/reader_assembler.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/reader/sparknlp_reader.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/conll.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/conllu.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/pos.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/pub_tator.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/spacy_to_annotation.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/training/tfgraphs.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/upload_to_hub.py +0 -0
- {spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.2.0
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -102,7 +102,7 @@ $ java -version
|
|
|
102
102
|
$ conda create -n sparknlp python=3.7 -y
|
|
103
103
|
$ conda activate sparknlp
|
|
104
104
|
# spark-nlp by default is based on pyspark 3.x
|
|
105
|
-
$ pip install spark-nlp==6.
|
|
105
|
+
$ pip install spark-nlp==6.2.0 pyspark==3.3.1
|
|
106
106
|
```
|
|
107
107
|
|
|
108
108
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
168
168
|
|
|
169
169
|
### Apache Spark Support
|
|
170
170
|
|
|
171
|
-
Spark NLP *6.
|
|
171
|
+
Spark NLP *6.2.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
172
172
|
|
|
173
173
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
174
174
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
198
198
|
|
|
199
199
|
### Databricks Support
|
|
200
200
|
|
|
201
|
-
Spark NLP 6.
|
|
201
|
+
Spark NLP 6.2.0 has been tested and is compatible with the following runtimes:
|
|
202
202
|
|
|
203
203
|
| **CPU** | **GPU** |
|
|
204
204
|
|--------------------|--------------------|
|
|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
216
216
|
|
|
217
217
|
### EMR Support
|
|
218
218
|
|
|
219
|
-
Spark NLP 6.
|
|
219
|
+
Spark NLP 6.2.0 has been tested and is compatible with the following EMR releases:
|
|
220
220
|
|
|
221
221
|
| **EMR Release** |
|
|
222
222
|
|--------------------|
|
|
@@ -306,7 +306,7 @@ Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integr
|
|
|
306
306
|
Need more **examples**? Check out our dedicated [Spark NLP Examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
|
|
307
307
|
repository to showcase all Spark NLP use cases!
|
|
308
308
|
|
|
309
|
-
Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/
|
|
309
|
+
Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/demos) built by Streamlit.
|
|
310
310
|
|
|
311
311
|
#### All examples: [spark-nlp/examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
|
|
312
312
|
|
|
@@ -63,7 +63,7 @@ $ java -version
|
|
|
63
63
|
$ conda create -n sparknlp python=3.7 -y
|
|
64
64
|
$ conda activate sparknlp
|
|
65
65
|
# spark-nlp by default is based on pyspark 3.x
|
|
66
|
-
$ pip install spark-nlp==6.
|
|
66
|
+
$ pip install spark-nlp==6.2.0 pyspark==3.3.1
|
|
67
67
|
```
|
|
68
68
|
|
|
69
69
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -129,7 +129,7 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
129
129
|
|
|
130
130
|
### Apache Spark Support
|
|
131
131
|
|
|
132
|
-
Spark NLP *6.
|
|
132
|
+
Spark NLP *6.2.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
133
133
|
|
|
134
134
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
135
135
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -159,7 +159,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
159
159
|
|
|
160
160
|
### Databricks Support
|
|
161
161
|
|
|
162
|
-
Spark NLP 6.
|
|
162
|
+
Spark NLP 6.2.0 has been tested and is compatible with the following runtimes:
|
|
163
163
|
|
|
164
164
|
| **CPU** | **GPU** |
|
|
165
165
|
|--------------------|--------------------|
|
|
@@ -177,7 +177,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
177
177
|
|
|
178
178
|
### EMR Support
|
|
179
179
|
|
|
180
|
-
Spark NLP 6.
|
|
180
|
+
Spark NLP 6.2.0 has been tested and is compatible with the following EMR releases:
|
|
181
181
|
|
|
182
182
|
| **EMR Release** |
|
|
183
183
|
|--------------------|
|
|
@@ -267,7 +267,7 @@ Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integr
|
|
|
267
267
|
Need more **examples**? Check out our dedicated [Spark NLP Examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
|
|
268
268
|
repository to showcase all Spark NLP use cases!
|
|
269
269
|
|
|
270
|
-
Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/
|
|
270
|
+
Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/demos) built by Streamlit.
|
|
271
271
|
|
|
272
272
|
#### All examples: [spark-nlp/examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
|
|
273
273
|
|
|
@@ -41,7 +41,7 @@ setup(
|
|
|
41
41
|
# project code, see
|
|
42
42
|
# https://packaging.python.org/en/latest/single_source_version.html
|
|
43
43
|
|
|
44
|
-
version='6.
|
|
44
|
+
version='6.2.0', # Required
|
|
45
45
|
|
|
46
46
|
# This is a one-line description or tagline of what your project does. This
|
|
47
47
|
# corresponds to the 'Summary' metadata field:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 6.
|
|
3
|
+
Version: 6.2.0
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -102,7 +102,7 @@ $ java -version
|
|
|
102
102
|
$ conda create -n sparknlp python=3.7 -y
|
|
103
103
|
$ conda activate sparknlp
|
|
104
104
|
# spark-nlp by default is based on pyspark 3.x
|
|
105
|
-
$ pip install spark-nlp==6.
|
|
105
|
+
$ pip install spark-nlp==6.2.0 pyspark==3.3.1
|
|
106
106
|
```
|
|
107
107
|
|
|
108
108
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
168
168
|
|
|
169
169
|
### Apache Spark Support
|
|
170
170
|
|
|
171
|
-
Spark NLP *6.
|
|
171
|
+
Spark NLP *6.2.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
172
172
|
|
|
173
173
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
174
174
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
198
198
|
|
|
199
199
|
### Databricks Support
|
|
200
200
|
|
|
201
|
-
Spark NLP 6.
|
|
201
|
+
Spark NLP 6.2.0 has been tested and is compatible with the following runtimes:
|
|
202
202
|
|
|
203
203
|
| **CPU** | **GPU** |
|
|
204
204
|
|--------------------|--------------------|
|
|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
216
216
|
|
|
217
217
|
### EMR Support
|
|
218
218
|
|
|
219
|
-
Spark NLP 6.
|
|
219
|
+
Spark NLP 6.2.0 has been tested and is compatible with the following EMR releases:
|
|
220
220
|
|
|
221
221
|
| **EMR Release** |
|
|
222
222
|
|--------------------|
|
|
@@ -306,7 +306,7 @@ Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integr
|
|
|
306
306
|
Need more **examples**? Check out our dedicated [Spark NLP Examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
|
|
307
307
|
repository to showcase all Spark NLP use cases!
|
|
308
308
|
|
|
309
|
-
Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/
|
|
309
|
+
Also, don't forget to check [Spark NLP in Action](https://sparknlp.org/demos) built by Streamlit.
|
|
310
310
|
|
|
311
311
|
#### All examples: [spark-nlp/examples](https://github.com/JohnSnowLabs/spark-nlp/tree/master/examples)
|
|
312
312
|
|
|
@@ -231,6 +231,7 @@ sparknlp/common/annotator_approach.py
|
|
|
231
231
|
sparknlp/common/annotator_model.py
|
|
232
232
|
sparknlp/common/annotator_properties.py
|
|
233
233
|
sparknlp/common/annotator_type.py
|
|
234
|
+
sparknlp/common/completion_post_processing.py
|
|
234
235
|
sparknlp/common/coverage_result.py
|
|
235
236
|
sparknlp/common/match_strategy.py
|
|
236
237
|
sparknlp/common/properties.py
|
|
@@ -122,6 +122,21 @@ class DocumentNormalizer(AnnotatorModel):
|
|
|
122
122
|
"file encoding to apply on normalized documents",
|
|
123
123
|
typeConverter=TypeConverters.toString)
|
|
124
124
|
|
|
125
|
+
presetPattern = Param(
|
|
126
|
+
Params._dummy(),
|
|
127
|
+
"presetPattern",
|
|
128
|
+
"Selects a single text cleaning function from the functional presets (e.g., 'CLEAN_BULLETS', 'CLEAN_DASHES', etc.).",
|
|
129
|
+
typeConverter=TypeConverters.toString
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
autoMode = Param(
|
|
133
|
+
Params._dummy(),
|
|
134
|
+
"autoMode",
|
|
135
|
+
"Enables a predefined cleaning mode combining multiple text cleaner functions (e.g., 'light_clean', 'document_clean', 'html_clean', 'full_auto').",
|
|
136
|
+
typeConverter=TypeConverters.toString
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
125
140
|
@keyword_only
|
|
126
141
|
def __init__(self):
|
|
127
142
|
super(DocumentNormalizer, self).__init__(classname="com.johnsnowlabs.nlp.annotators.DocumentNormalizer")
|
|
@@ -197,3 +212,24 @@ class DocumentNormalizer(AnnotatorModel):
|
|
|
197
212
|
File encoding to apply on normalized documents, by default "UTF-8"
|
|
198
213
|
"""
|
|
199
214
|
return self._set(encoding=value)
|
|
215
|
+
|
|
216
|
+
def setPresetPattern(self, value):
|
|
217
|
+
"""Sets a single text cleaning preset pattern.
|
|
218
|
+
|
|
219
|
+
Parameters
|
|
220
|
+
----------
|
|
221
|
+
value : str
|
|
222
|
+
Preset cleaning pattern name, e.g., 'CLEAN_BULLETS', 'CLEAN_DASHES'.
|
|
223
|
+
"""
|
|
224
|
+
return self._set(presetPattern=value)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def setAutoMode(self, value):
|
|
228
|
+
"""Sets an automatic text cleaning mode using predefined groups of cleaning functions.
|
|
229
|
+
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
value : str
|
|
233
|
+
Auto cleaning mode, e.g., 'light_clean', 'document_clean', 'social_clean', 'html_clean', 'full_auto'.
|
|
234
|
+
"""
|
|
235
|
+
return self._set(autoMode=value)
|
|
@@ -532,3 +532,8 @@ class AutoGGUFEmbeddings(AnnotatorModel, HasBatchedAnnotate):
|
|
|
532
532
|
return ResourceDownloader.downloadModel(
|
|
533
533
|
AutoGGUFEmbeddings, name, lang, remote_loc
|
|
534
534
|
)
|
|
535
|
+
|
|
536
|
+
def close(self):
|
|
537
|
+
"""Closes the llama.cpp model backend freeing resources. The model is reloaded when used again.
|
|
538
|
+
"""
|
|
539
|
+
self._java_obj.close()
|
|
@@ -215,6 +215,20 @@ class EntityRulerModel(AnnotatorModel, HasStorageModel):
|
|
|
215
215
|
|
|
216
216
|
outputAnnotatorType = AnnotatorType.CHUNK
|
|
217
217
|
|
|
218
|
+
autoMode = Param(
|
|
219
|
+
Params._dummy(),
|
|
220
|
+
"autoMode",
|
|
221
|
+
"Enable built-in regex presets that combine related entity patterns (e.g., 'communication_entities', 'network_entities', 'media_entities', etc.).",
|
|
222
|
+
typeConverter=TypeConverters.toString
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
extractEntities = Param(
|
|
226
|
+
Params._dummy(),
|
|
227
|
+
"extractEntities",
|
|
228
|
+
"List of entity types to extract. If not set, all entities in the active autoMode or from regexPatterns are used.",
|
|
229
|
+
typeConverter=TypeConverters.toListString
|
|
230
|
+
)
|
|
231
|
+
|
|
218
232
|
def __init__(self, classname="com.johnsnowlabs.nlp.annotators.er.EntityRulerModel", java_model=None):
|
|
219
233
|
super(EntityRulerModel, self).__init__(
|
|
220
234
|
classname=classname,
|
|
@@ -230,3 +244,24 @@ class EntityRulerModel(AnnotatorModel, HasStorageModel):
|
|
|
230
244
|
def loadStorage(path, spark, storage_ref):
|
|
231
245
|
HasStorageModel.loadStorages(path, spark, storage_ref, EntityRulerModel.database)
|
|
232
246
|
|
|
247
|
+
|
|
248
|
+
def setAutoMode(self, value):
|
|
249
|
+
"""Sets the auto mode for predefined regex entity groups.
|
|
250
|
+
|
|
251
|
+
Parameters
|
|
252
|
+
----------
|
|
253
|
+
value : str
|
|
254
|
+
Name of the auto mode to activate (e.g., 'communication_entities', 'network_entities', etc.)
|
|
255
|
+
"""
|
|
256
|
+
return self._set(autoMode=value)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def setExtractEntities(self, value):
|
|
260
|
+
"""Sets specific entities to extract, filtering only those defined in regexPatterns or autoMode.
|
|
261
|
+
|
|
262
|
+
Parameters
|
|
263
|
+
----------
|
|
264
|
+
value : list[str]
|
|
265
|
+
List of entity names to extract, e.g., ['EMAIL_ADDRESS_PATTERN', 'IPV4_PATTERN'].
|
|
266
|
+
"""
|
|
267
|
+
return self._set(extractEntities=value)
|
|
@@ -12,12 +12,10 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
"""Contains classes for the AutoGGUFModel."""
|
|
15
|
-
from typing import List, Dict
|
|
16
|
-
|
|
17
15
|
from sparknlp.common import *
|
|
18
16
|
|
|
19
17
|
|
|
20
|
-
class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties):
|
|
18
|
+
class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties, CompletionPostProcessing):
|
|
21
19
|
"""
|
|
22
20
|
Annotator that uses the llama.cpp library to generate text completions with large language
|
|
23
21
|
models.
|
|
@@ -243,7 +241,6 @@ class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties):
|
|
|
243
241
|
inputAnnotatorTypes = [AnnotatorType.DOCUMENT]
|
|
244
242
|
outputAnnotatorType = AnnotatorType.DOCUMENT
|
|
245
243
|
|
|
246
|
-
|
|
247
244
|
@keyword_only
|
|
248
245
|
def __init__(self, classname="com.johnsnowlabs.nlp.annotators.seq2seq.AutoGGUFModel", java_model=None):
|
|
249
246
|
super(AutoGGUFModel, self).__init__(
|
|
@@ -300,3 +297,8 @@ class AutoGGUFModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties):
|
|
|
300
297
|
"""
|
|
301
298
|
from sparknlp.pretrained import ResourceDownloader
|
|
302
299
|
return ResourceDownloader.downloadModel(AutoGGUFModel, name, lang, remote_loc)
|
|
300
|
+
|
|
301
|
+
def close(self):
|
|
302
|
+
"""Closes the llama.cpp model backend freeing resources. The model is reloaded when used again.
|
|
303
|
+
"""
|
|
304
|
+
self._java_obj.close()
|
|
@@ -327,3 +327,8 @@ class AutoGGUFReranker(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties
|
|
|
327
327
|
"""
|
|
328
328
|
from sparknlp.pretrained import ResourceDownloader
|
|
329
329
|
return ResourceDownloader.downloadModel(AutoGGUFReranker, name, lang, remote_loc)
|
|
330
|
+
|
|
331
|
+
def close(self):
|
|
332
|
+
"""Closes the llama.cpp model backend freeing resources. The model is reloaded when used again.
|
|
333
|
+
"""
|
|
334
|
+
self._java_obj.close()
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from sparknlp.common import *
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
class AutoGGUFVisionModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties):
|
|
18
|
+
class AutoGGUFVisionModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppProperties, CompletionPostProcessing):
|
|
19
19
|
"""Multimodal annotator that uses the llama.cpp library to generate text completions with large
|
|
20
20
|
language models. It supports ingesting images for captioning.
|
|
21
21
|
|
|
@@ -329,3 +329,8 @@ class AutoGGUFVisionModel(AnnotatorModel, HasBatchedAnnotate, HasLlamaCppPropert
|
|
|
329
329
|
"""
|
|
330
330
|
from sparknlp.pretrained import ResourceDownloader
|
|
331
331
|
return ResourceDownloader.downloadModel(AutoGGUFVisionModel, name, lang, remote_loc)
|
|
332
|
+
|
|
333
|
+
def close(self):
|
|
334
|
+
"""Closes the llama.cpp model backend freeing resources. The model is reloaded when used again.
|
|
335
|
+
"""
|
|
336
|
+
self._java_obj.close()
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Copyright 2017-2025 John Snow Labs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from pyspark.ml.param import Param, Params, TypeConverters
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CompletionPostProcessing:
|
|
18
|
+
removeThinkingTag = Param(
|
|
19
|
+
Params._dummy(),
|
|
20
|
+
"removeThinkingTag",
|
|
21
|
+
"Set a thinking tag (e.g. think) to be removed from output. Will match <TAG>...</TAG>",
|
|
22
|
+
typeConverter=TypeConverters.toString,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def setRemoveThinkingTag(self, value: str):
|
|
26
|
+
"""Set a thinking tag (e.g. `think`) to be removed from output.
|
|
27
|
+
Will produce the regex: `(?s)<$TAG>.+?</$TAG>`
|
|
28
|
+
"""
|
|
29
|
+
self._set(removeThinkingTag=value)
|
|
30
|
+
return self
|
|
31
|
+
|
|
32
|
+
def getRemoveThinkingTag(self):
|
|
33
|
+
"""Get the thinking tag to be removed from output."""
|
|
34
|
+
value = None
|
|
35
|
+
if self.removeThinkingTag in self._paramMap:
|
|
36
|
+
value = self._paramMap[self.removeThinkingTag]
|
|
37
|
+
return value
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py
RENAMED
|
File without changes
|
{spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py
RENAMED
|
File without changes
|
{spark_nlp-6.1.5 → spark_nlp-6.2.0}/sparknlp/annotator/cv/convnext_for_image_classification.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|