spark-nlp 6.1.4__tar.gz → 6.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/PKG-INFO +5 -5
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/README.md +4 -4
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/setup.py +1 -1
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/spark_nlp.egg-info/PKG-INFO +5 -5
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/spark_nlp.egg-info/SOURCES.txt +1 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/__init__.py +1 -1
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/partition/partition_properties.py +77 -10
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/reader2doc.py +12 -65
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/reader2table.py +0 -34
- spark_nlp-6.1.5/sparknlp/reader/reader_assembler.py +159 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/johnsnowlabs/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/johnsnowlabs/ml/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/johnsnowlabs/ml/ai/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/com/johnsnowlabs/nlp/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/setup.cfg +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/spark_nlp.egg-info/dependency_links.txt +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/spark_nlp.egg-info/top_level.txt +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotation.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotation_audio.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotation_image.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/audio/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/audio/hubert_for_ctc.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/audio/wav2vec2_for_ctc.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/audio/whisper_for_ctc.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/chunk2_doc.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/chunker.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/classifier_dl.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/multi_classifier_dl.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/sentiment_dl.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cleaners/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cleaners/cleaner.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cleaners/extractor.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/coref/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/coref/spanbert_coref.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/blip_for_question_answering.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/clip_for_zero_shot_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/convnext_for_image_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/florence2_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/gemma3_for_multimodal.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/internvl_for_multimodal.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/janus_for_multimodal.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/llava_for_multimodal.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/mllama_for_multimodal.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/paligemma_for_multimodal.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/phi3_vision_for_multimodal.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/qwen2vl_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/smolvlm_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/swin_for_image_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/cv/vit_for_image_classification.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/dataframe_optimizer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/date2_chunk.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/dependency/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/dependency/dependency_parser.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/dependency/typed_dependency_parser.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/document_character_text_splitter.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/document_normalizer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/document_token_splitter.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/document_token_splitter_test.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/albert_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/auto_gguf_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/bert_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/bert_sentence_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/bge_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/camembert_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/chunk_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/deberta_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/distil_bert_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/doc2vec.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/e5_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/e5v_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/elmo_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/instructor_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/longformer_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/minilm_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/mpnet_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/mxbai_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/nomic_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/roberta_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/sentence_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/snowflake_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/uae_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/universal_sentence_encoder.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/word2vec.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/word_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/embeddings/xlnet_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/er/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/er/entity_ruler.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/graph_extraction.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/keyword_extraction/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ld_dl/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ld_dl/language_detector_dl.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/lemmatizer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/big_text_matcher.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/date_matcher.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/multi_date_matcher.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/regex_matcher.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/matcher/text_matcher.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/n_gram_generator.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_approach.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_converter.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_crf.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_dl.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_dl_graph_checker.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/ner_overwriter.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ner/zero_shot_ner_model.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/normalizer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/openai/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/openai/openai_completion.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/openai/openai_embeddings.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/param/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/param/classifier_encoder.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/param/evaluation_dl_params.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/pos/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/pos/perceptron.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentence/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentence/sentence_detector.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentence/sentence_detector_dl.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentiment/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentiment/sentiment_detector.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/sentiment/vivekn_sentiment.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/auto_gguf_model.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/auto_gguf_reranker.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/bart_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/cohere_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/cpm_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/gpt2_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/llama2_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/llama3_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/m2m100_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/marian_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/mistral_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/nllb_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/olmo_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/phi2_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/phi3_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/phi4_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/qwen_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/starcoder_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/seq2seq/t5_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/similarity/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/similarity/document_similarity_ranker.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/spell_check/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/spell_check/context_spell_checker.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/spell_check/norvig_sweeting.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/spell_check/symmetric_delete.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/stemmer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/stop_words_cleaner.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/tf_ner_dl_graph_builder.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/chunk_tokenizer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/recursive_tokenizer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/regex_tokenizer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token/tokenizer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/token2_chunk.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ws/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/ws/word_segmenter.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/audio_assembler.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/doc2_chunk.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/document_assembler.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/embeddings_finisher.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/finisher.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/gguf_ranking_finisher.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/graph_finisher.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/has_recursive_fit.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/has_recursive_transform.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/image_assembler.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/light_pipeline.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/multi_document_assembler.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/prompt_assembler.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/recursive_pipeline.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/table_assembler.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/base/token_assembler.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/annotator_approach.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/annotator_model.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/annotator_properties.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/annotator_type.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/coverage_result.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/match_strategy.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/properties.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/read_as.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/recursive_annotator_approach.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/storage.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/common/utils.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/functions.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/annotator_java_ml.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/annotator_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/extended_java_wrapper.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/params_getters_setters.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/internal/recursive.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/logging/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/logging/comet.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/partition/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/partition/partition.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/partition/partition_transformer.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/pretrained/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/pretrained/pretrained_pipeline.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/pretrained/resource_downloader.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/pretrained/utils.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/enums.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/pdf_to_text.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/reader2image.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/reader/sparknlp_reader.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/graph_builders.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/graph_builders.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/conll.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/conllu.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/pos.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/pub_tator.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/spacy_to_annotation.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/training/tfgraphs.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/upload_to_hub.py +0 -0
- {spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 6.1.
|
|
3
|
+
Version: 6.1.5
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -102,7 +102,7 @@ $ java -version
|
|
|
102
102
|
$ conda create -n sparknlp python=3.7 -y
|
|
103
103
|
$ conda activate sparknlp
|
|
104
104
|
# spark-nlp by default is based on pyspark 3.x
|
|
105
|
-
$ pip install spark-nlp==6.1.
|
|
105
|
+
$ pip install spark-nlp==6.1.5 pyspark==3.3.1
|
|
106
106
|
```
|
|
107
107
|
|
|
108
108
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
168
168
|
|
|
169
169
|
### Apache Spark Support
|
|
170
170
|
|
|
171
|
-
Spark NLP *6.1.
|
|
171
|
+
Spark NLP *6.1.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
172
172
|
|
|
173
173
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
174
174
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
198
198
|
|
|
199
199
|
### Databricks Support
|
|
200
200
|
|
|
201
|
-
Spark NLP 6.1.
|
|
201
|
+
Spark NLP 6.1.5 has been tested and is compatible with the following runtimes:
|
|
202
202
|
|
|
203
203
|
| **CPU** | **GPU** |
|
|
204
204
|
|--------------------|--------------------|
|
|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
216
216
|
|
|
217
217
|
### EMR Support
|
|
218
218
|
|
|
219
|
-
Spark NLP 6.1.
|
|
219
|
+
Spark NLP 6.1.5 has been tested and is compatible with the following EMR releases:
|
|
220
220
|
|
|
221
221
|
| **EMR Release** |
|
|
222
222
|
|--------------------|
|
|
@@ -63,7 +63,7 @@ $ java -version
|
|
|
63
63
|
$ conda create -n sparknlp python=3.7 -y
|
|
64
64
|
$ conda activate sparknlp
|
|
65
65
|
# spark-nlp by default is based on pyspark 3.x
|
|
66
|
-
$ pip install spark-nlp==6.1.
|
|
66
|
+
$ pip install spark-nlp==6.1.5 pyspark==3.3.1
|
|
67
67
|
```
|
|
68
68
|
|
|
69
69
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -129,7 +129,7 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
129
129
|
|
|
130
130
|
### Apache Spark Support
|
|
131
131
|
|
|
132
|
-
Spark NLP *6.1.
|
|
132
|
+
Spark NLP *6.1.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
133
133
|
|
|
134
134
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
135
135
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -159,7 +159,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
159
159
|
|
|
160
160
|
### Databricks Support
|
|
161
161
|
|
|
162
|
-
Spark NLP 6.1.
|
|
162
|
+
Spark NLP 6.1.5 has been tested and is compatible with the following runtimes:
|
|
163
163
|
|
|
164
164
|
| **CPU** | **GPU** |
|
|
165
165
|
|--------------------|--------------------|
|
|
@@ -177,7 +177,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
177
177
|
|
|
178
178
|
### EMR Support
|
|
179
179
|
|
|
180
|
-
Spark NLP 6.1.
|
|
180
|
+
Spark NLP 6.1.5 has been tested and is compatible with the following EMR releases:
|
|
181
181
|
|
|
182
182
|
| **EMR Release** |
|
|
183
183
|
|--------------------|
|
|
@@ -41,7 +41,7 @@ setup(
|
|
|
41
41
|
# project code, see
|
|
42
42
|
# https://packaging.python.org/en/latest/single_source_version.html
|
|
43
43
|
|
|
44
|
-
version='6.1.
|
|
44
|
+
version='6.1.5', # Required
|
|
45
45
|
|
|
46
46
|
# This is a one-line description or tagline of what your project does. This
|
|
47
47
|
# corresponds to the 'Summary' metadata field:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 6.1.
|
|
3
|
+
Version: 6.1.5
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -102,7 +102,7 @@ $ java -version
|
|
|
102
102
|
$ conda create -n sparknlp python=3.7 -y
|
|
103
103
|
$ conda activate sparknlp
|
|
104
104
|
# spark-nlp by default is based on pyspark 3.x
|
|
105
|
-
$ pip install spark-nlp==6.1.
|
|
105
|
+
$ pip install spark-nlp==6.1.5 pyspark==3.3.1
|
|
106
106
|
```
|
|
107
107
|
|
|
108
108
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
168
168
|
|
|
169
169
|
### Apache Spark Support
|
|
170
170
|
|
|
171
|
-
Spark NLP *6.1.
|
|
171
|
+
Spark NLP *6.1.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
172
172
|
|
|
173
173
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
174
174
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
198
198
|
|
|
199
199
|
### Databricks Support
|
|
200
200
|
|
|
201
|
-
Spark NLP 6.1.
|
|
201
|
+
Spark NLP 6.1.5 has been tested and is compatible with the following runtimes:
|
|
202
202
|
|
|
203
203
|
| **CPU** | **GPU** |
|
|
204
204
|
|--------------------|--------------------|
|
|
@@ -216,7 +216,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
216
216
|
|
|
217
217
|
### EMR Support
|
|
218
218
|
|
|
219
|
-
Spark NLP 6.1.
|
|
219
|
+
Spark NLP 6.1.5 has been tested and is compatible with the following EMR releases:
|
|
220
220
|
|
|
221
221
|
| **EMR Release** |
|
|
222
222
|
|--------------------|
|
|
@@ -260,6 +260,7 @@ sparknlp/reader/pdf_to_text.py
|
|
|
260
260
|
sparknlp/reader/reader2doc.py
|
|
261
261
|
sparknlp/reader/reader2image.py
|
|
262
262
|
sparknlp/reader/reader2table.py
|
|
263
|
+
sparknlp/reader/reader_assembler.py
|
|
263
264
|
sparknlp/reader/sparknlp_reader.py
|
|
264
265
|
sparknlp/training/__init__.py
|
|
265
266
|
sparknlp/training/conll.py
|
|
@@ -18,6 +18,23 @@ from pyspark.ml.param import Param, Params, TypeConverters
|
|
|
18
18
|
|
|
19
19
|
class HasReaderProperties(Params):
|
|
20
20
|
|
|
21
|
+
inputCol = Param(
|
|
22
|
+
Params._dummy(),
|
|
23
|
+
"inputCol",
|
|
24
|
+
"input column name",
|
|
25
|
+
typeConverter=TypeConverters.toString
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
def setInputCol(self, value):
|
|
29
|
+
"""Sets input column name.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
value : str
|
|
34
|
+
Name of the Input Column
|
|
35
|
+
"""
|
|
36
|
+
return self._set(inputCol=value)
|
|
37
|
+
|
|
21
38
|
outputCol = Param(
|
|
22
39
|
Params._dummy(),
|
|
23
40
|
"outputCol",
|
|
@@ -25,6 +42,16 @@ class HasReaderProperties(Params):
|
|
|
25
42
|
typeConverter=TypeConverters.toString
|
|
26
43
|
)
|
|
27
44
|
|
|
45
|
+
def setOutputCol(self, value):
|
|
46
|
+
"""Sets output column name.
|
|
47
|
+
|
|
48
|
+
Parameters
|
|
49
|
+
----------
|
|
50
|
+
value : str
|
|
51
|
+
Name of the Output Column
|
|
52
|
+
"""
|
|
53
|
+
return self._set(outputCol=value)
|
|
54
|
+
|
|
28
55
|
contentPath = Param(
|
|
29
56
|
Params._dummy(),
|
|
30
57
|
"contentPath",
|
|
@@ -167,6 +194,56 @@ class HasReaderProperties(Params):
|
|
|
167
194
|
"""
|
|
168
195
|
return self._set(explodeDocs=value)
|
|
169
196
|
|
|
197
|
+
flattenOutput = Param(
|
|
198
|
+
Params._dummy(),
|
|
199
|
+
"flattenOutput",
|
|
200
|
+
"If true, output is flattened to plain text with minimal metadata",
|
|
201
|
+
typeConverter=TypeConverters.toBoolean
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
def setFlattenOutput(self, value):
|
|
205
|
+
"""Sets whether to flatten the output to plain text with minimal metadata.
|
|
206
|
+
|
|
207
|
+
ParametersF
|
|
208
|
+
----------
|
|
209
|
+
value : bool
|
|
210
|
+
If true, output is flattened to plain text with minimal metadata
|
|
211
|
+
"""
|
|
212
|
+
return self._set(flattenOutput=value)
|
|
213
|
+
|
|
214
|
+
titleThreshold = Param(
|
|
215
|
+
Params._dummy(),
|
|
216
|
+
"titleThreshold",
|
|
217
|
+
"Minimum font size threshold for title detection in PDF docs",
|
|
218
|
+
typeConverter=TypeConverters.toFloat
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
def setTitleThreshold(self, value):
|
|
222
|
+
"""Sets the minimum font size threshold for title detection in PDF documents.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
value : float
|
|
227
|
+
Minimum font size threshold for title detection in PDF docs
|
|
228
|
+
"""
|
|
229
|
+
return self._set(titleThreshold=value)
|
|
230
|
+
|
|
231
|
+
outputAsDocument = Param(
|
|
232
|
+
Params._dummy(),
|
|
233
|
+
"outputAsDocument",
|
|
234
|
+
"Whether to return all sentences joined into a single document",
|
|
235
|
+
typeConverter=TypeConverters.toBoolean
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
def setOutputAsDocument(self, value):
|
|
239
|
+
"""Sets whether to return all sentences joined into a single document.
|
|
240
|
+
|
|
241
|
+
Parameters
|
|
242
|
+
----------
|
|
243
|
+
value : bool
|
|
244
|
+
Whether to return all sentences joined into a single document
|
|
245
|
+
"""
|
|
246
|
+
return self._set(outputAsDocument=value)
|
|
170
247
|
|
|
171
248
|
class HasEmailReaderProperties(Params):
|
|
172
249
|
|
|
@@ -683,13 +760,3 @@ class HasPdfProperties(Params):
|
|
|
683
760
|
True to read as images, False otherwise.
|
|
684
761
|
"""
|
|
685
762
|
return self._set(readAsImage=value)
|
|
686
|
-
|
|
687
|
-
def setOutputCol(self, value):
|
|
688
|
-
"""Sets output column name.
|
|
689
|
-
|
|
690
|
-
Parameters
|
|
691
|
-
----------
|
|
692
|
-
value : str
|
|
693
|
-
Name of the Output Column
|
|
694
|
-
"""
|
|
695
|
-
return self._set(outputCol=value)
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
from pyspark import keyword_only
|
|
15
|
-
from pyspark.ml.param import TypeConverters, Params, Param
|
|
16
15
|
|
|
17
16
|
from sparknlp.common import AnnotatorType
|
|
18
17
|
from sparknlp.internal import AnnotatorTransformer
|
|
@@ -69,32 +68,11 @@ class Reader2Doc(
|
|
|
69
68
|
|[{'document', 15, 38, 'This is a narrative text', {'pageNumber': 1, 'elementType': 'NarrativeText', 'fileName': 'pdf-title.pdf'}, []}]|
|
|
70
69
|
|[{'document', 39, 68, 'This is another narrative text', {'pageNumber': 1, 'elementType': 'NarrativeText', 'fileName': 'pdf-title.pdf'}, []}]|
|
|
71
70
|
+------------------------------------------------------------------------------------------------------------------------------------+
|
|
72
|
-
"""
|
|
71
|
+
"""
|
|
73
72
|
|
|
74
73
|
name = "Reader2Doc"
|
|
75
|
-
outputAnnotatorType = AnnotatorType.DOCUMENT
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
flattenOutput = Param(
|
|
79
|
-
Params._dummy(),
|
|
80
|
-
"flattenOutput",
|
|
81
|
-
"If true, output is flattened to plain text with minimal metadata",
|
|
82
|
-
typeConverter=TypeConverters.toBoolean
|
|
83
|
-
)
|
|
84
74
|
|
|
85
|
-
|
|
86
|
-
Params._dummy(),
|
|
87
|
-
"titleThreshold",
|
|
88
|
-
"Minimum font size threshold for title detection in PDF docs",
|
|
89
|
-
typeConverter=TypeConverters.toFloat
|
|
90
|
-
)
|
|
91
|
-
|
|
92
|
-
outputAsDocument = Param(
|
|
93
|
-
Params._dummy(),
|
|
94
|
-
"outputAsDocument",
|
|
95
|
-
"Whether to return all sentences joined into a single document",
|
|
96
|
-
typeConverter=TypeConverters.toBoolean
|
|
97
|
-
)
|
|
75
|
+
outputAnnotatorType = AnnotatorType.DOCUMENT
|
|
98
76
|
|
|
99
77
|
excludeNonText = Param(
|
|
100
78
|
Params._dummy(),
|
|
@@ -103,6 +81,16 @@ class Reader2Doc(
|
|
|
103
81
|
typeConverter=TypeConverters.toBoolean
|
|
104
82
|
)
|
|
105
83
|
|
|
84
|
+
def setExcludeNonText(self, value):
|
|
85
|
+
"""Sets whether to exclude non-text content from the output.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
value : bool
|
|
90
|
+
Whether to exclude non-text content from the output. Default is False.
|
|
91
|
+
"""
|
|
92
|
+
return self._set(excludeNonText=value)
|
|
93
|
+
|
|
106
94
|
@keyword_only
|
|
107
95
|
def __init__(self):
|
|
108
96
|
super(Reader2Doc, self).__init__(classname="com.johnsnowlabs.reader.Reader2Doc")
|
|
@@ -117,44 +105,3 @@ class Reader2Doc(
|
|
|
117
105
|
def setParams(self):
|
|
118
106
|
kwargs = self._input_kwargs
|
|
119
107
|
return self._set(**kwargs)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def setFlattenOutput(self, value):
|
|
123
|
-
"""Sets whether to flatten the output to plain text with minimal metadata.
|
|
124
|
-
|
|
125
|
-
ParametersF
|
|
126
|
-
----------
|
|
127
|
-
value : bool
|
|
128
|
-
If true, output is flattened to plain text with minimal metadata
|
|
129
|
-
"""
|
|
130
|
-
return self._set(flattenOutput=value)
|
|
131
|
-
|
|
132
|
-
def setTitleThreshold(self, value):
|
|
133
|
-
"""Sets the minimum font size threshold for title detection in PDF documents.
|
|
134
|
-
|
|
135
|
-
Parameters
|
|
136
|
-
----------
|
|
137
|
-
value : float
|
|
138
|
-
Minimum font size threshold for title detection in PDF docs
|
|
139
|
-
"""
|
|
140
|
-
return self._set(titleThreshold=value)
|
|
141
|
-
|
|
142
|
-
def setOutputAsDocument(self, value):
|
|
143
|
-
"""Sets whether to return all sentences joined into a single document.
|
|
144
|
-
|
|
145
|
-
Parameters
|
|
146
|
-
----------
|
|
147
|
-
value : bool
|
|
148
|
-
Whether to return all sentences joined into a single document
|
|
149
|
-
"""
|
|
150
|
-
return self._set(outputAsDocument=value)
|
|
151
|
-
|
|
152
|
-
def setExcludeNonText(self, value):
|
|
153
|
-
"""Sets whether to exclude non-text content from the output.
|
|
154
|
-
|
|
155
|
-
Parameters
|
|
156
|
-
----------
|
|
157
|
-
value : bool
|
|
158
|
-
Whether to exclude non-text content from the output. Default is False.
|
|
159
|
-
"""
|
|
160
|
-
return self._set(excludeNonText=value)
|
|
@@ -32,20 +32,6 @@ class Reader2Table(
|
|
|
32
32
|
|
|
33
33
|
outputAnnotatorType = AnnotatorType.DOCUMENT
|
|
34
34
|
|
|
35
|
-
flattenOutput = Param(
|
|
36
|
-
Params._dummy(),
|
|
37
|
-
"flattenOutput",
|
|
38
|
-
"If true, output is flattened to plain text with minimal metadata",
|
|
39
|
-
typeConverter=TypeConverters.toBoolean
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
titleThreshold = Param(
|
|
43
|
-
Params._dummy(),
|
|
44
|
-
"titleThreshold",
|
|
45
|
-
"Minimum font size threshold for title detection in PDF docs",
|
|
46
|
-
typeConverter=TypeConverters.toFloat
|
|
47
|
-
)
|
|
48
|
-
|
|
49
35
|
@keyword_only
|
|
50
36
|
def __init__(self):
|
|
51
37
|
super(Reader2Table, self).__init__(classname="com.johnsnowlabs.reader.Reader2Table")
|
|
@@ -55,23 +41,3 @@ class Reader2Table(
|
|
|
55
41
|
def setParams(self):
|
|
56
42
|
kwargs = self._input_kwargs
|
|
57
43
|
return self._set(**kwargs)
|
|
58
|
-
|
|
59
|
-
def setFlattenOutput(self, value):
|
|
60
|
-
"""Sets whether to flatten the output to plain text with minimal metadata.
|
|
61
|
-
|
|
62
|
-
Parameters
|
|
63
|
-
----------
|
|
64
|
-
value : bool
|
|
65
|
-
If true, output is flattened to plain text with minimal metadata
|
|
66
|
-
"""
|
|
67
|
-
return self._set(flattenOutput=value)
|
|
68
|
-
|
|
69
|
-
def setTitleThreshold(self, value):
|
|
70
|
-
"""Sets the minimum font size threshold for title detection in PDF documents.
|
|
71
|
-
|
|
72
|
-
Parameters
|
|
73
|
-
----------
|
|
74
|
-
value : float
|
|
75
|
-
Minimum font size threshold for title detection in PDF docs
|
|
76
|
-
"""
|
|
77
|
-
return self._set(titleThreshold=value)
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# Copyright 2017-2025 John Snow Labs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from pyspark import keyword_only
|
|
16
|
+
|
|
17
|
+
from sparknlp.common import AnnotatorType
|
|
18
|
+
from sparknlp.internal import AnnotatorTransformer
|
|
19
|
+
from sparknlp.partition.partition_properties import *
|
|
20
|
+
|
|
21
|
+
class ReaderAssembler(
|
|
22
|
+
AnnotatorTransformer,
|
|
23
|
+
HasReaderProperties,
|
|
24
|
+
HasHTMLReaderProperties,
|
|
25
|
+
HasEmailReaderProperties,
|
|
26
|
+
HasExcelReaderProperties,
|
|
27
|
+
HasPowerPointProperties,
|
|
28
|
+
HasTextReaderProperties,
|
|
29
|
+
HasPdfProperties
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
The ReaderAssembler annotator provides a unified interface for combining multiple Spark NLP
|
|
33
|
+
readers (such as Reader2Doc, Reader2Table, and Reader2Image) into a single, configurable
|
|
34
|
+
component. It automatically orchestrates the execution of different readers based on input type,
|
|
35
|
+
configured priorities, and fallback strategies allowing you to handle diverse content formats
|
|
36
|
+
without manually chaining multiple readers in your pipeline.
|
|
37
|
+
|
|
38
|
+
ReaderAssembler simplifies the process of building flexible pipelines capable of ingesting and
|
|
39
|
+
processing documents, tables, and images in a consistent way. It handles reader selection,
|
|
40
|
+
ordering, and fault-tolerance internally, ensuring that pipelines remain concise, robust, and
|
|
41
|
+
easy to maintain.
|
|
42
|
+
|
|
43
|
+
Examples
|
|
44
|
+
--------
|
|
45
|
+
>>> from johnsnowlabs.reader import ReaderAssembler
|
|
46
|
+
>>> from pyspark.ml import Pipeline
|
|
47
|
+
>>>
|
|
48
|
+
>>> reader_assembler = ReaderAssembler() \\
|
|
49
|
+
... .setContentType("text/html") \\
|
|
50
|
+
... .setContentPath("/table-image.html") \\
|
|
51
|
+
... .setOutputCol("document")
|
|
52
|
+
>>>
|
|
53
|
+
>>> pipeline = Pipeline(stages=[reader_assembler])
|
|
54
|
+
>>> pipeline_model = pipeline.fit(empty_data_set)
|
|
55
|
+
>>> result_df = pipeline_model.transform(empty_data_set)
|
|
56
|
+
>>>
|
|
57
|
+
>>> result_df.show()
|
|
58
|
+
+--------+--------------------+--------------------+--------------------+---------+
|
|
59
|
+
|fileName| document_text| document_table| document_image|exception|
|
|
60
|
+
+--------+--------------------+--------------------+--------------------+---------+
|
|
61
|
+
| null|[{'document', 0, 26...|[{'document', 0, 50...|[{'image', , 5, 5, ...| null|
|
|
62
|
+
+--------+--------------------+--------------------+--------------------+---------+
|
|
63
|
+
|
|
64
|
+
This annotator is especially useful when working with heterogeneous input data — for example,
|
|
65
|
+
when a dataset includes PDFs, spreadsheets, and images — allowing Spark NLP to automatically
|
|
66
|
+
invoke the appropriate reader for each file type while preserving a unified schema in the output.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
name = 'ReaderAssembler'
|
|
71
|
+
|
|
72
|
+
outputAnnotatorType = AnnotatorType.DOCUMENT
|
|
73
|
+
|
|
74
|
+
excludeNonText = Param(
|
|
75
|
+
Params._dummy(),
|
|
76
|
+
"excludeNonText",
|
|
77
|
+
"Whether to exclude non-text content from the output. Default is False.",
|
|
78
|
+
typeConverter=TypeConverters.toBoolean
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
userMessage = Param(
|
|
82
|
+
Params._dummy(),
|
|
83
|
+
"userMessage",
|
|
84
|
+
"Custom user message.",
|
|
85
|
+
typeConverter=TypeConverters.toString
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
promptTemplate = Param(
|
|
89
|
+
Params._dummy(),
|
|
90
|
+
"promptTemplate",
|
|
91
|
+
"Format of the output prompt.",
|
|
92
|
+
typeConverter=TypeConverters.toString
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
customPromptTemplate = Param(
|
|
96
|
+
Params._dummy(),
|
|
97
|
+
"customPromptTemplate",
|
|
98
|
+
"Custom prompt template for image models.",
|
|
99
|
+
typeConverter=TypeConverters.toString
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
@keyword_only
|
|
103
|
+
def __init__(self):
|
|
104
|
+
super(ReaderAssembler, self).__init__(classname="com.johnsnowlabs.reader.ReaderAssembler")
|
|
105
|
+
self._setDefault(contentType="",
|
|
106
|
+
explodeDocs=False,
|
|
107
|
+
userMessage="Describe this image",
|
|
108
|
+
promptTemplate="qwen2vl-chat",
|
|
109
|
+
readAsImage=True,
|
|
110
|
+
customPromptTemplate="",
|
|
111
|
+
ignoreExceptions=True,
|
|
112
|
+
flattenOutput=False,
|
|
113
|
+
titleThreshold=18)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@keyword_only
|
|
117
|
+
def setParams(self):
|
|
118
|
+
kwargs = self._input_kwargs
|
|
119
|
+
return self._set(**kwargs)
|
|
120
|
+
|
|
121
|
+
def setExcludeNonText(self, value):
|
|
122
|
+
"""Sets whether to exclude non-text content from the output.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
value : bool
|
|
127
|
+
Whether to exclude non-text content from the output. Default is False.
|
|
128
|
+
"""
|
|
129
|
+
return self._set(excludeNonText=value)
|
|
130
|
+
|
|
131
|
+
def setUserMessage(self, value: str):
|
|
132
|
+
"""Sets custom user message.
|
|
133
|
+
|
|
134
|
+
Parameters
|
|
135
|
+
----------
|
|
136
|
+
value : str
|
|
137
|
+
Custom user message to include.
|
|
138
|
+
"""
|
|
139
|
+
return self._set(userMessage=value)
|
|
140
|
+
|
|
141
|
+
def setPromptTemplate(self, value: str):
|
|
142
|
+
"""Sets format of the output prompt.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
value : str
|
|
147
|
+
Prompt template format.
|
|
148
|
+
"""
|
|
149
|
+
return self._set(promptTemplate=value)
|
|
150
|
+
|
|
151
|
+
def setCustomPromptTemplate(self, value: str):
|
|
152
|
+
"""Sets custom prompt template for image models.
|
|
153
|
+
|
|
154
|
+
Parameters
|
|
155
|
+
----------
|
|
156
|
+
value : str
|
|
157
|
+
Custom prompt template string.
|
|
158
|
+
"""
|
|
159
|
+
return self._set(customPromptTemplate=value)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py
RENAMED
|
File without changes
|
{spark_nlp-6.1.4 → spark_nlp-6.1.5}/sparknlp/annotator/classifier_dl/bert_for_question_answering.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|