spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- com/johnsnowlabs/ml/__init__.py +0 -0
- com/johnsnowlabs/ml/ai/__init__.py +10 -0
- com/johnsnowlabs/nlp/__init__.py +4 -2
- spark_nlp-6.2.1.dist-info/METADATA +362 -0
- spark_nlp-6.2.1.dist-info/RECORD +292 -0
- {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
- sparknlp/__init__.py +281 -27
- sparknlp/annotation.py +137 -6
- sparknlp/annotation_audio.py +61 -0
- sparknlp/annotation_image.py +82 -0
- sparknlp/annotator/__init__.py +93 -0
- sparknlp/annotator/audio/__init__.py +16 -0
- sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
- sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
- sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
- sparknlp/annotator/chunk2_doc.py +85 -0
- sparknlp/annotator/chunker.py +137 -0
- sparknlp/annotator/classifier_dl/__init__.py +61 -0
- sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
- sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
- sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
- sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
- sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
- sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
- sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
- sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
- sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
- sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
- sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
- sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
- sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
- sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
- sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
- sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
- sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
- sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
- sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
- sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
- sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
- sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
- sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
- sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
- sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
- sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
- sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
- sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
- sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
- sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
- sparknlp/annotator/cleaners/__init__.py +15 -0
- sparknlp/annotator/cleaners/cleaner.py +202 -0
- sparknlp/annotator/cleaners/extractor.py +191 -0
- sparknlp/annotator/coref/__init__.py +1 -0
- sparknlp/annotator/coref/spanbert_coref.py +221 -0
- sparknlp/annotator/cv/__init__.py +29 -0
- sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
- sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
- sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
- sparknlp/annotator/cv/florence2_transformer.py +180 -0
- sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
- sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
- sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
- sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
- sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
- sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
- sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
- sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
- sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
- sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
- sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
- sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
- sparknlp/annotator/dataframe_optimizer.py +216 -0
- sparknlp/annotator/date2_chunk.py +88 -0
- sparknlp/annotator/dependency/__init__.py +17 -0
- sparknlp/annotator/dependency/dependency_parser.py +294 -0
- sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
- sparknlp/annotator/document_character_text_splitter.py +228 -0
- sparknlp/annotator/document_normalizer.py +235 -0
- sparknlp/annotator/document_token_splitter.py +175 -0
- sparknlp/annotator/document_token_splitter_test.py +85 -0
- sparknlp/annotator/embeddings/__init__.py +45 -0
- sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
- sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
- sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
- sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
- sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
- sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
- sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
- sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
- sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
- sparknlp/annotator/embeddings/doc2vec.py +352 -0
- sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
- sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
- sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
- sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
- sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
- sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
- sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
- sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
- sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
- sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
- sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
- sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
- sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
- sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
- sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
- sparknlp/annotator/embeddings/word2vec.py +353 -0
- sparknlp/annotator/embeddings/word_embeddings.py +385 -0
- sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
- sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
- sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
- sparknlp/annotator/er/__init__.py +16 -0
- sparknlp/annotator/er/entity_ruler.py +267 -0
- sparknlp/annotator/graph_extraction.py +368 -0
- sparknlp/annotator/keyword_extraction/__init__.py +16 -0
- sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
- sparknlp/annotator/ld_dl/__init__.py +16 -0
- sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
- sparknlp/annotator/lemmatizer.py +250 -0
- sparknlp/annotator/matcher/__init__.py +20 -0
- sparknlp/annotator/matcher/big_text_matcher.py +272 -0
- sparknlp/annotator/matcher/date_matcher.py +303 -0
- sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
- sparknlp/annotator/matcher/regex_matcher.py +221 -0
- sparknlp/annotator/matcher/text_matcher.py +290 -0
- sparknlp/annotator/n_gram_generator.py +141 -0
- sparknlp/annotator/ner/__init__.py +21 -0
- sparknlp/annotator/ner/ner_approach.py +94 -0
- sparknlp/annotator/ner/ner_converter.py +148 -0
- sparknlp/annotator/ner/ner_crf.py +397 -0
- sparknlp/annotator/ner/ner_dl.py +591 -0
- sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
- sparknlp/annotator/ner/ner_overwriter.py +166 -0
- sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
- sparknlp/annotator/normalizer.py +230 -0
- sparknlp/annotator/openai/__init__.py +16 -0
- sparknlp/annotator/openai/openai_completion.py +349 -0
- sparknlp/annotator/openai/openai_embeddings.py +106 -0
- sparknlp/annotator/param/__init__.py +17 -0
- sparknlp/annotator/param/classifier_encoder.py +98 -0
- sparknlp/annotator/param/evaluation_dl_params.py +130 -0
- sparknlp/annotator/pos/__init__.py +16 -0
- sparknlp/annotator/pos/perceptron.py +263 -0
- sparknlp/annotator/sentence/__init__.py +17 -0
- sparknlp/annotator/sentence/sentence_detector.py +290 -0
- sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
- sparknlp/annotator/sentiment/__init__.py +17 -0
- sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
- sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
- sparknlp/annotator/seq2seq/__init__.py +35 -0
- sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
- sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
- sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
- sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
- sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
- sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
- sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
- sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
- sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
- sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
- sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
- sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
- sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
- sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
- sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
- sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
- sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
- sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
- sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
- sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
- sparknlp/annotator/similarity/__init__.py +0 -0
- sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
- sparknlp/annotator/spell_check/__init__.py +18 -0
- sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
- sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
- sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
- sparknlp/annotator/stemmer.py +79 -0
- sparknlp/annotator/stop_words_cleaner.py +190 -0
- sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
- sparknlp/annotator/token/__init__.py +19 -0
- sparknlp/annotator/token/chunk_tokenizer.py +118 -0
- sparknlp/annotator/token/recursive_tokenizer.py +205 -0
- sparknlp/annotator/token/regex_tokenizer.py +208 -0
- sparknlp/annotator/token/tokenizer.py +561 -0
- sparknlp/annotator/token2_chunk.py +76 -0
- sparknlp/annotator/ws/__init__.py +16 -0
- sparknlp/annotator/ws/word_segmenter.py +429 -0
- sparknlp/base/__init__.py +30 -0
- sparknlp/base/audio_assembler.py +95 -0
- sparknlp/base/doc2_chunk.py +169 -0
- sparknlp/base/document_assembler.py +164 -0
- sparknlp/base/embeddings_finisher.py +201 -0
- sparknlp/base/finisher.py +217 -0
- sparknlp/base/gguf_ranking_finisher.py +234 -0
- sparknlp/base/graph_finisher.py +125 -0
- sparknlp/base/has_recursive_fit.py +24 -0
- sparknlp/base/has_recursive_transform.py +22 -0
- sparknlp/base/image_assembler.py +172 -0
- sparknlp/base/light_pipeline.py +429 -0
- sparknlp/base/multi_document_assembler.py +164 -0
- sparknlp/base/prompt_assembler.py +207 -0
- sparknlp/base/recursive_pipeline.py +107 -0
- sparknlp/base/table_assembler.py +145 -0
- sparknlp/base/token_assembler.py +124 -0
- sparknlp/common/__init__.py +26 -0
- sparknlp/common/annotator_approach.py +41 -0
- sparknlp/common/annotator_model.py +47 -0
- sparknlp/common/annotator_properties.py +114 -0
- sparknlp/common/annotator_type.py +38 -0
- sparknlp/common/completion_post_processing.py +37 -0
- sparknlp/common/coverage_result.py +22 -0
- sparknlp/common/match_strategy.py +33 -0
- sparknlp/common/properties.py +1298 -0
- sparknlp/common/read_as.py +33 -0
- sparknlp/common/recursive_annotator_approach.py +35 -0
- sparknlp/common/storage.py +149 -0
- sparknlp/common/utils.py +39 -0
- sparknlp/functions.py +315 -5
- sparknlp/internal/__init__.py +1199 -0
- sparknlp/internal/annotator_java_ml.py +32 -0
- sparknlp/internal/annotator_transformer.py +37 -0
- sparknlp/internal/extended_java_wrapper.py +63 -0
- sparknlp/internal/params_getters_setters.py +71 -0
- sparknlp/internal/recursive.py +70 -0
- sparknlp/logging/__init__.py +15 -0
- sparknlp/logging/comet.py +467 -0
- sparknlp/partition/__init__.py +16 -0
- sparknlp/partition/partition.py +244 -0
- sparknlp/partition/partition_properties.py +902 -0
- sparknlp/partition/partition_transformer.py +200 -0
- sparknlp/pretrained/__init__.py +17 -0
- sparknlp/pretrained/pretrained_pipeline.py +158 -0
- sparknlp/pretrained/resource_downloader.py +216 -0
- sparknlp/pretrained/utils.py +35 -0
- sparknlp/reader/__init__.py +15 -0
- sparknlp/reader/enums.py +19 -0
- sparknlp/reader/pdf_to_text.py +190 -0
- sparknlp/reader/reader2doc.py +124 -0
- sparknlp/reader/reader2image.py +136 -0
- sparknlp/reader/reader2table.py +44 -0
- sparknlp/reader/reader_assembler.py +159 -0
- sparknlp/reader/sparknlp_reader.py +461 -0
- sparknlp/training/__init__.py +20 -0
- sparknlp/training/_tf_graph_builders/__init__.py +0 -0
- sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
- sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
- sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
- sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
- sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
- sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
- sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
- sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
- sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
- sparknlp/training/conll.py +150 -0
- sparknlp/training/conllu.py +103 -0
- sparknlp/training/pos.py +103 -0
- sparknlp/training/pub_tator.py +76 -0
- sparknlp/training/spacy_to_annotation.py +57 -0
- sparknlp/training/tfgraphs.py +5 -0
- sparknlp/upload_to_hub.py +149 -0
- sparknlp/util.py +51 -5
- com/__init__.pyc +0 -0
- com/__pycache__/__init__.cpython-36.pyc +0 -0
- com/johnsnowlabs/__init__.pyc +0 -0
- com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
- com/johnsnowlabs/nlp/__init__.pyc +0 -0
- com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
- spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
- spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
- sparknlp/__init__.pyc +0 -0
- sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
- sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
- sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
- sparknlp/__pycache__/base.cpython-36.pyc +0 -0
- sparknlp/__pycache__/common.cpython-36.pyc +0 -0
- sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
- sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
- sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
- sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
- sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
- sparknlp/__pycache__/training.cpython-36.pyc +0 -0
- sparknlp/__pycache__/util.cpython-36.pyc +0 -0
- sparknlp/annotation.pyc +0 -0
- sparknlp/annotator.py +0 -3006
- sparknlp/annotator.pyc +0 -0
- sparknlp/base.py +0 -347
- sparknlp/base.pyc +0 -0
- sparknlp/common.py +0 -193
- sparknlp/common.pyc +0 -0
- sparknlp/embeddings.py +0 -40
- sparknlp/embeddings.pyc +0 -0
- sparknlp/internal.py +0 -288
- sparknlp/internal.pyc +0 -0
- sparknlp/pretrained.py +0 -123
- sparknlp/pretrained.pyc +0 -0
- sparknlp/storage.py +0 -32
- sparknlp/storage.pyc +0 -0
- sparknlp/training.py +0 -62
- sparknlp/training.pyc +0 -0
- sparknlp/util.pyc +0 -0
- {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
com/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
com/johnsnowlabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
com/johnsnowlabs/ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
com/johnsnowlabs/ml/ai/__init__.py,sha256=YQiK2M7U4d8y5irPy_HB8ae0mSpqS9583MH44pnKJXc,295
|
|
5
|
+
com/johnsnowlabs/nlp/__init__.py,sha256=DPIVXtONO5xXyOk-HB0-sNiHAcco17NN13zPS_6Uw8c,294
|
|
6
|
+
sparknlp/__init__.py,sha256=1yySDOPAvnNsnh-kDNNWD4e5OrUvr3GyGFNPCmF_8cs,13814
|
|
7
|
+
sparknlp/annotation.py,sha256=I5zOxG5vV2RfPZfqN9enT1i4mo6oBcn3Lrzs37QiOiA,5635
|
|
8
|
+
sparknlp/annotation_audio.py,sha256=iRV_InSVhgvAwSRe9NTbUH9v6OGvTM-FPCpSAKVu0mE,1917
|
|
9
|
+
sparknlp/annotation_image.py,sha256=xhCe8Ko-77XqWVuuYHFrjKqF6zPd8Z-RY_rmZXNwCXU,2547
|
|
10
|
+
sparknlp/functions.py,sha256=4cVRyBjlF1YttcMNs5z7gf9NPW7q9qzGb5KOf44Phgs,12120
|
|
11
|
+
sparknlp/upload_to_hub.py,sha256=toULNLeXK3MmTpmza9dR8R8od2QJEw1eTgBDM-O9_I0,6018
|
|
12
|
+
sparknlp/util.py,sha256=2Z499Psal-NuEJ4CHQNgHnAJrS73QQNyCzKPo1MavU8,2279
|
|
13
|
+
sparknlp/annotator/__init__.py,sha256=G746SY8dRM_AOf-gaoSKlh7D-2TKGpqqHhGr4XF-b2A,3534
|
|
14
|
+
sparknlp/annotator/chunk2_doc.py,sha256=IJ3_vQHvzjqono90AZUzZ67QSYjwquuMYbN9_HSOVcg,3141
|
|
15
|
+
sparknlp/annotator/chunker.py,sha256=8nz9B7R_mxKxcfJRfKvz2x_T29W3u4izE9k0wfYPzgE,5174
|
|
16
|
+
sparknlp/annotator/dataframe_optimizer.py,sha256=P4GySLzz1lRCZX0UBRF9_IDuXlRS1XvRWz-B2L0zqMA,7771
|
|
17
|
+
sparknlp/annotator/date2_chunk.py,sha256=tW3m_LExmhx8LMFWOGXqMyfNRXSr2dnoEHD-6DrnpXI,3153
|
|
18
|
+
sparknlp/annotator/document_character_text_splitter.py,sha256=oNrOKJAKO2h1wr0bEuSqYrrltIU_Y6J6cTHy70yKy6s,9877
|
|
19
|
+
sparknlp/annotator/document_normalizer.py,sha256=OOqPd6zp7FbtmlLHn1zAxPg9oxDzYRPKLYKr5k0Y5ck,12155
|
|
20
|
+
sparknlp/annotator/document_token_splitter.py,sha256=-9xbQ9pVAjcKHQQrSk6Cb7f8W1cblCLwWXTNR8kFptA,7499
|
|
21
|
+
sparknlp/annotator/document_token_splitter_test.py,sha256=NWO9mwhAIUJFuxPofB3c39iUm_6vKp4pteDsBOTH8ng,2684
|
|
22
|
+
sparknlp/annotator/graph_extraction.py,sha256=b4SB3B_hFgCJT4e5Jcscyxdzfbvw3ujKTa6UNgX5Lhc,14471
|
|
23
|
+
sparknlp/annotator/lemmatizer.py,sha256=w1nNMln2HgM4O1DOaISo5AGyzzFHuaIDQqWFcr4JmcA,8970
|
|
24
|
+
sparknlp/annotator/n_gram_generator.py,sha256=KRX5xfxmorOfYQkQHZWkkXjwjC13gDTAXnukwgz6hKU,5274
|
|
25
|
+
sparknlp/annotator/normalizer.py,sha256=7AkAOB-e8b2uyUBwYoq9HvMPijOwV3wEoxcB3BVsr4w,8780
|
|
26
|
+
sparknlp/annotator/stemmer.py,sha256=Tl48voyG9wqbT5MAA1hDKW90NorU8rIDhttJxOo1s3Q,2948
|
|
27
|
+
sparknlp/annotator/stop_words_cleaner.py,sha256=Z9yI9AWDIAXbPM2X6n84voiW31Z20XofCL-tTQNo5ro,7015
|
|
28
|
+
sparknlp/annotator/tf_ner_dl_graph_builder.py,sha256=ovsRBUfw9lJkuetmrcYRmW1Ll-33sdDPi4xJ0M_Fs7k,6379
|
|
29
|
+
sparknlp/annotator/token2_chunk.py,sha256=FtS2Doav9xL1IrC9ZUU4iXqyipp-iT3g68kZt-7YCcQ,2674
|
|
30
|
+
sparknlp/annotator/audio/__init__.py,sha256=dXjtvi5c0aTZFq1Q_JciUd1uFTBVSJoUdcq0hiYd8yk,757
|
|
31
|
+
sparknlp/annotator/audio/hubert_for_ctc.py,sha256=76PfwPZZvOHU5kfDqLueCFbmqa4W8pMNRGoCvOqjsEA,7859
|
|
32
|
+
sparknlp/annotator/audio/wav2vec2_for_ctc.py,sha256=K78P1U6vA4O1UufsLYzy0H7arsKNmwPcIV7kzDFsA5Q,6210
|
|
33
|
+
sparknlp/annotator/audio/whisper_for_ctc.py,sha256=uII51umuohqwnAW0Q7VdxEFyr_j5LMnfpcRlf8TbetA,9800
|
|
34
|
+
sparknlp/annotator/classifier_dl/__init__.py,sha256=lQUdV9ynjn7hG2Wxb37iwc89gfOLC6g_UPgtFaIwm3g,4311
|
|
35
|
+
sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py,sha256=oaV3pTFNCnEpyaML1ydOPOdO40OtC_tOACrcm3IqPgU,5984
|
|
36
|
+
sparknlp/annotator/classifier_dl/albert_for_question_answering.py,sha256=LG2dL6Fky1T35yXTUZBfIihIIGnkRFQ7ECQ3HRXXEG8,6517
|
|
37
|
+
sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py,sha256=kWx7f9pcKE2qw319gn8FN0Md5dX38gbmfeoY9gWCLNk,7842
|
|
38
|
+
sparknlp/annotator/classifier_dl/albert_for_token_classification.py,sha256=5rdsjWnsAVmtP-idU7ATKJ8lkH2rtlKZLnpi4Mq27eI,6839
|
|
39
|
+
sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py,sha256=_TgV6EiIOiD_djA3fxfoz-o37mzMeKbn6iL2kZ6GzO0,8366
|
|
40
|
+
sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py,sha256=yqQeDdpLbNOKuSZejZjSAjT8ydYyxsTVf2aFDgSSDfc,8767
|
|
41
|
+
sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py,sha256=vq9Y2d253B6bN3ShKwGq1GVBDCFGKkmz_psShbnx-e8,5930
|
|
42
|
+
sparknlp/annotator/classifier_dl/bert_for_question_answering.py,sha256=2euY_RAdMPA4IHJXZAd5MkQojFOtFNhB_hSc1iVQ5DQ,6433
|
|
43
|
+
sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py,sha256=AzD3RQcRuQc0DDTbL6vGiacTtHlZnbAqksNvRQq7EQE,7800
|
|
44
|
+
sparknlp/annotator/classifier_dl/bert_for_token_classification.py,sha256=uJXoDLPfPWiRmKqtw_3lLBvneIirj87S2JWwfd33zq8,6668
|
|
45
|
+
sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py,sha256=mli7_TZjrFs6GPwWtgpPty6HrRKIXrEZKjcR00NKyBo,8318
|
|
46
|
+
sparknlp/annotator/classifier_dl/camembert_for_question_answering.py,sha256=BeE-62tFkXMoyiy3PtcnwgT2-wqzTFo5VZHrWUqsWmM,6510
|
|
47
|
+
sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py,sha256=06bkwhNBcmNS5gR_JrMjBDW3jAdjEI5YL4SuV16Va7E,7962
|
|
48
|
+
sparknlp/annotator/classifier_dl/camembert_for_token_classification.py,sha256=vjwDE_kZiBupENaYvUZOTTqVOb3KCsGse-QX3QOutz4,6522
|
|
49
|
+
sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py,sha256=YUfohQ-qIG3jntfYgrjx8bOFxGTTMrpB-Sj49PNAEEU,8360
|
|
50
|
+
sparknlp/annotator/classifier_dl/classifier_dl.py,sha256=Dj-T5ByCgzgFpah7LVz_07QKBB0qNdqXB6tkvPE-nsQ,12672
|
|
51
|
+
sparknlp/annotator/classifier_dl/deberta_for_question_answering.py,sha256=oikVBeVohsSR9HPV_yq_0U7zHps94UO4lXbYu9G7MF0,6486
|
|
52
|
+
sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py,sha256=H2LDT8ttD9hxfFDrymsyCq0EwCuWl5FE2-XVqT9LcRQ,7773
|
|
53
|
+
sparknlp/annotator/classifier_dl/deberta_for_token_classification.py,sha256=jj5hB9AV-0Of505E6z62lYPIWmsqNeTX0vRRq3_7T9I,6807
|
|
54
|
+
sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py,sha256=R7eVMChBY_wm7oM2j1Y18ZJ9dcIm5ysq8XBSIiVxZKw,8280
|
|
55
|
+
sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py,sha256=yA4LrI4RN4f44wbIrdpwqderTJBhAkjAHpUxcCeCROE,6552
|
|
56
|
+
sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py,sha256=Cax3LcVLppiHs1dyahsBSq_TLHSwI2-K7LGCZHZNs1I,7926
|
|
57
|
+
sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py,sha256=y9S83LW0Mfn4fRzopRXFj8l2gb-Nrm1rr9zRftOckJU,6832
|
|
58
|
+
sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py,sha256=DqQOSr-TutHS6y165QJ-Pg6EEkG9JOFN0FxgyCi5SCg,8485
|
|
59
|
+
sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py,sha256=xRSs2B7YMSfqAHGzR79NzHq-rBEkxUl-pUNiXVxVWuk,6048
|
|
60
|
+
sparknlp/annotator/classifier_dl/longformer_for_question_answering.py,sha256=VKbOKSTtwdeSsSzB2oKiRlFwSOcpHuMfkvgGM3ofBIo,6553
|
|
61
|
+
sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py,sha256=_XO3Ufl_wHyUgUIechZ6J1VCE2G2W-FUPZfHmJSfQvk,7932
|
|
62
|
+
sparknlp/annotator/classifier_dl/longformer_for_token_classification.py,sha256=RmiFuBRhIAoJoQ8Rgcu997-PxBK1hhWuLVlS1qztMyk,6848
|
|
63
|
+
sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py,sha256=w9hHLrQbDIUHAdCKiXNDneAbohMKopixAKU2wkYkqbs,5522
|
|
64
|
+
sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py,sha256=M__giFElL6Q3I88QD6OoXDzdQDk_Zp5sS__Kh_XpLdo,7308
|
|
65
|
+
sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py,sha256=SgFAJcv7ZE3BmJOehK_CjAaueqaaK6PR33zA5aE9-Ww,6754
|
|
66
|
+
sparknlp/annotator/classifier_dl/multi_classifier_dl.py,sha256=ylKQzS7ROyeKeiOF4BZiIkQV1sfrnfUUQ9LXFSFK_Vo,16045
|
|
67
|
+
sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py,sha256=SlzkA_fKurWOQDhvWlEBiMUfLgIoaRRglIdENMv7u38,6008
|
|
68
|
+
sparknlp/annotator/classifier_dl/roberta_for_question_answering.py,sha256=WRxu1uhXnY9C4UHdtJ8qiVGhPSX7sCdSaML0AWHOdJw,6471
|
|
69
|
+
sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py,sha256=z97uH5WkG8kPX1Y9qtpLwD7egl0kzbVmxtq4xzZgNNI,7857
|
|
70
|
+
sparknlp/annotator/classifier_dl/roberta_for_token_classification.py,sha256=hvnG31FonfirdLcIy4_bkhbdQalRlqS8x3woScQeRVg,7220
|
|
71
|
+
sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py,sha256=nP0D_jg8xPFUMP9uGNvmACIqfwAneDddVXbioHLHFJ0,8818
|
|
72
|
+
sparknlp/annotator/classifier_dl/sentiment_dl.py,sha256=6Z7X3-ykxoaUz6vz-YIXkv2m2_lxIDEwKAd1yHIzcvU,14416
|
|
73
|
+
sparknlp/annotator/classifier_dl/tapas_for_question_answering.py,sha256=2YBODMDUZT-j5ceOFTixrEkOqrztIM1kU-tsW_wao18,6317
|
|
74
|
+
sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py,sha256=D9Gdw3UbnoamRfS_RXocIuqyZVap8uirX8IpT41RaYU,5600
|
|
75
|
+
sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py,sha256=t_zCnKGCjDccKNj_2mjRkysOaNCWNBMKXehbuFSphQc,6538
|
|
76
|
+
sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py,sha256=sudgwa8_QZQzaYvEMSt6J1bDDwyK2Hp1VFhh98P08hY,7930
|
|
77
|
+
sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py,sha256=ub5mMiZYKP4eBmXRzjkjfv_FFFR8E01XJs0RC__RxPo,6808
|
|
78
|
+
sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py,sha256=4dBzpPj-VJcZul5hGcyjYkVMQ1PiaXZEGwvEaob3rss,8899
|
|
79
|
+
sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py,sha256=CI9Ah2lyHkqwDHWGCbkk_gPbQd0NudpC7oXiHtWOucs,7811
|
|
80
|
+
sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py,sha256=SndQpIfslsSYEOX-myLjpUS6-wVIeDG8MOhJYcu2_7M,6739
|
|
81
|
+
sparknlp/annotator/cleaners/__init__.py,sha256=tqevy1reFBls_EQdvD1f8Hhj5o7s153-NNLCXRoKJtQ,693
|
|
82
|
+
sparknlp/annotator/cleaners/cleaner.py,sha256=r_0ImrtGT-S-ytOknKoP844FVSv0J9YVKQyNrmSBTjs,6839
|
|
83
|
+
sparknlp/annotator/cleaners/extractor.py,sha256=nml8mnOToZYPF5fTp9VWdDfnWTXryLDzp3RWfQoJkWY,5805
|
|
84
|
+
sparknlp/annotator/coref/__init__.py,sha256=SG8MAaVxQpoYYAsyKaoOlvlHjorDzj3DHt4nnEdBWm8,53
|
|
85
|
+
sparknlp/annotator/coref/spanbert_coref.py,sha256=AXWJhvVquY2uoApO_Np1fz7_KyJhxnZB4i-xk78sBfc,8407
|
|
86
|
+
sparknlp/annotator/cv/__init__.py,sha256=hUeHb0iTDBZCS3sDAjypgs8PzU3eMgKtBa8xDkCueWE,1591
|
|
87
|
+
sparknlp/annotator/cv/blip_for_question_answering.py,sha256=At7L5pPBNDR1r-JGLKM5b3dTrq5Ecz9r0M1gToUVZTs,6551
|
|
88
|
+
sparknlp/annotator/cv/clip_for_zero_shot_classification.py,sha256=_1pLc9BiFrFN10eJPCDJLJT-vdnTSG9OnB25Y_kKJIA,7528
|
|
89
|
+
sparknlp/annotator/cv/convnext_for_image_classification.py,sha256=KzaAlYW5M2l73zUozzgg8_p14eGDz9k9PYVAUZLN25k,11874
|
|
90
|
+
sparknlp/annotator/cv/florence2_transformer.py,sha256=jyXTY0om_8J_9GwlH5Dkussb1g6ra642ATzLaqv7TSI,8525
|
|
91
|
+
sparknlp/annotator/cv/gemma3_for_multimodal.py,sha256=475lYkf05Naw3tsdTScTs-tnHgFkbcCab7r6UwEXzvg,13017
|
|
92
|
+
sparknlp/annotator/cv/internvl_for_multimodal.py,sha256=1j_lZwZw4Mt9_EucxemuPo2buq_uj5aIkniE9mbegag,10851
|
|
93
|
+
sparknlp/annotator/cv/janus_for_multimodal.py,sha256=BGkXGvIf-upDai216m12-WHJCUmv-NRQ9ArEW2Wt0V0,14502
|
|
94
|
+
sparknlp/annotator/cv/llava_for_multimodal.py,sha256=okfBIuWgPK7VhRxznZDoPXNIIxc6g_NM0l5aH4yibVY,12068
|
|
95
|
+
sparknlp/annotator/cv/mllama_for_multimodal.py,sha256=e4_bOGycy-gPYrl8en0mOP3eF8p17Xt85nwE5kmez5g,13071
|
|
96
|
+
sparknlp/annotator/cv/paligemma_for_multimodal.py,sha256=C2fRsppihEJK4YK-hoGrirGUyof4fV_r28HWOYb4Yms,11214
|
|
97
|
+
sparknlp/annotator/cv/phi3_vision_for_multimodal.py,sha256=94nlNbcDgJW0zX2QIjJl1cMXi1i6wqQeNG61CdoPLKA,12131
|
|
98
|
+
sparknlp/annotator/cv/qwen2vl_transformer.py,sha256=q2BCs1qA8fyqbMsnW4-6wbGJim4QjVUuDHOXhtMXJcY,12570
|
|
99
|
+
sparknlp/annotator/cv/smolvlm_transformer.py,sha256=g7eypRcMrPW4pYyTHmxBjP8tA_B_4rRwPbUCe3XHz-0,16964
|
|
100
|
+
sparknlp/annotator/cv/swin_for_image_classification.py,sha256=iZ1KY0GInbQmGzkmuNbds4PGPwCheLXc-Syv2HRmqug,10694
|
|
101
|
+
sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py,sha256=rEWJte-qN6PI6ML2cGhsZ37wAzjHUtN_WD5pcKAez7M,10167
|
|
102
|
+
sparknlp/annotator/cv/vit_for_image_classification.py,sha256=D2V3pxAd3rBi1817lxVOqaVvCw4trcVyorQgIPdLNAE,9148
|
|
103
|
+
sparknlp/annotator/dependency/__init__.py,sha256=eV43oXAGaYl2N1XKIEAAZJLNP8gpHm8VxuXDeDlQzR4,774
|
|
104
|
+
sparknlp/annotator/dependency/dependency_parser.py,sha256=SxyvHPp8Hs1Xnm5X1nLTMi095XoQMtfL8pbys15mYAI,11212
|
|
105
|
+
sparknlp/annotator/dependency/typed_dependency_parser.py,sha256=60vPdYkbFk9MPGegg3m9Uik9cMXpMZd8tBvXG39gNww,12456
|
|
106
|
+
sparknlp/annotator/embeddings/__init__.py,sha256=Aw1oaP5DI0OS6259c0TEZZ6j3VFSvYFEerah5a-udVw,2528
|
|
107
|
+
sparknlp/annotator/embeddings/albert_embeddings.py,sha256=6Rd1LIn8oFIpq_ALcJh-RUjPEO7Ht8wsHY6JHSFyMkw,9995
|
|
108
|
+
sparknlp/annotator/embeddings/auto_gguf_embeddings.py,sha256=-64uQKkvWsE2By3LEP9Hv10Eox10QAyVz0vSc_BduvY,20146
|
|
109
|
+
sparknlp/annotator/embeddings/bert_embeddings.py,sha256=HVUjkg56kBcpGZCo-fmPG5uatMDF3swW_lnbpy1SgSI,8463
|
|
110
|
+
sparknlp/annotator/embeddings/bert_sentence_embeddings.py,sha256=NQy9KuXT9aKsTpYCR5RAeoFWI2YqEGorbdYrf_0KKmw,9148
|
|
111
|
+
sparknlp/annotator/embeddings/bge_embeddings.py,sha256=ZGbxssjJFaSfbcgqAPV5hsu81SnC0obgCVNOoJkArDA,8105
|
|
112
|
+
sparknlp/annotator/embeddings/camembert_embeddings.py,sha256=dBTXas-2Tas_JUR9Xt_GtHLcyqi_cdvT5EHRnyVrSSQ,8817
|
|
113
|
+
sparknlp/annotator/embeddings/chunk_embeddings.py,sha256=WUmkJimSuFkdcLJnvcxOV0QlCLgGlhub29ZTrZb70WE,6052
|
|
114
|
+
sparknlp/annotator/embeddings/deberta_embeddings.py,sha256=_b5nzLb7heFQNN-uT2oBNO6-YmM8bHmAdnGXg47HOWw,8649
|
|
115
|
+
sparknlp/annotator/embeddings/distil_bert_embeddings.py,sha256=4pyMCsbvvXYeTGIMVUir9wCDKR_1f_HKtXZrTDO1Thc,9275
|
|
116
|
+
sparknlp/annotator/embeddings/doc2vec.py,sha256=Xk3MdEkXatX9lRgbFbAdnIDrLgIxzUIGWFBZeo9BTq0,13226
|
|
117
|
+
sparknlp/annotator/embeddings/e5_embeddings.py,sha256=Esuvrq9JlogGaSSzFVVDkOFMwgYwFwr17I62ZiCDm0k,7858
|
|
118
|
+
sparknlp/annotator/embeddings/e5v_embeddings.py,sha256=NFHO2nxDcgVzyKQ6yz1BWyqtjwt9QHwlkKbBXFwhsO8,5951
|
|
119
|
+
sparknlp/annotator/embeddings/elmo_embeddings.py,sha256=KV-KPs0Pq_OpPaHsnqBz2k_S7VdzyFZ4632IeFNKqJ8,9858
|
|
120
|
+
sparknlp/annotator/embeddings/instructor_embeddings.py,sha256=CTKmbuBOx_KBM4JM-Y1U5LyR-6rrnpoBGbgGE_axS1c,8670
|
|
121
|
+
sparknlp/annotator/embeddings/longformer_embeddings.py,sha256=jS4fxB5O0-d9ta9VKv8ai-17n5YHt5rML8QxUw7K4Io,8754
|
|
122
|
+
sparknlp/annotator/embeddings/minilm_embeddings.py,sha256=iKO3FPA6qkGjJAyPlhYpAWssnrNvlYjYxZaZEC0QDhc,7436
|
|
123
|
+
sparknlp/annotator/embeddings/mpnet_embeddings.py,sha256=7d6E4lS7jjkppDPvty1UHNNrbykkriFiysrxZ_RzL0U,7875
|
|
124
|
+
sparknlp/annotator/embeddings/mxbai_embeddings.py,sha256=kCaYcM3lLYJjhElLK5isdxzJqIvoGZlUKKNkySMUkE8,6017
|
|
125
|
+
sparknlp/annotator/embeddings/nomic_embeddings.py,sha256=WTllH3htx9wDD2Le8pZgKVPM_U8XNmroJb6f4PeVeP8,7347
|
|
126
|
+
sparknlp/annotator/embeddings/roberta_embeddings.py,sha256=q_WHby2lDcPc5bVHkGc6X_GwT3qyDUBLUVz5ZW4HCSY,9229
|
|
127
|
+
sparknlp/annotator/embeddings/roberta_sentence_embeddings.py,sha256=KVrD4z_tIU-sphK6dmbbnHBBt8-Y89C_BFQAkN99kZo,8181
|
|
128
|
+
sparknlp/annotator/embeddings/sentence_embeddings.py,sha256=azuA1FKMtTJ9suwJqTEHeWHumT6kYdfURTe_1fsqcB8,5402
|
|
129
|
+
sparknlp/annotator/embeddings/snowflake_embeddings.py,sha256=QzMSzmgSTedEAk0TlpHHBMjV0pPj8efLqVAVJqKgTgs,7253
|
|
130
|
+
sparknlp/annotator/embeddings/uae_embeddings.py,sha256=sqTT67vcegVxcyoATISLPJSmOnA6J_otB6iREKOb6e4,8794
|
|
131
|
+
sparknlp/annotator/embeddings/universal_sentence_encoder.py,sha256=_fTo-K78RjxiIKptpsI32mpW87RFCdXM16epHv4RVQY,8571
|
|
132
|
+
sparknlp/annotator/embeddings/word2vec.py,sha256=UBhA4qUczQOx1t82Eu51lxx1-wJ_RLnCb__ncowSNhk,13229
|
|
133
|
+
sparknlp/annotator/embeddings/word_embeddings.py,sha256=CQxjx2yDdmSM9s8D-bzsbUQhT8t1cqC4ynxlf9INpMU,15388
|
|
134
|
+
sparknlp/annotator/embeddings/xlm_roberta_embeddings.py,sha256=S2HHXOrSFXMAyloZUXJFNXL0-9wrZ32blsAhLB3Za1w,9582
|
|
135
|
+
sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py,sha256=ojxD3H2VgDEn-RzDdCz0X485pojHBAFrlzsNemI05bY,8602
|
|
136
|
+
sparknlp/annotator/embeddings/xlnet_embeddings.py,sha256=hJrlsJeO3D7uz54xiEiqqXEbq24YGuWz8U652PV9fNE,9336
|
|
137
|
+
sparknlp/annotator/er/__init__.py,sha256=eF9Z-PanVfZWSVN2HSFbE7QjCDb6NYV5ESn6geYKlek,692
|
|
138
|
+
sparknlp/annotator/er/entity_ruler.py,sha256=eg9-I9yWQ_vjaKI5g5T4s575VZEjN1Sq7WJJpCImSVg,10007
|
|
139
|
+
sparknlp/annotator/keyword_extraction/__init__.py,sha256=KotCR238x7LgisinsRGaARgPygWUIwC624FmH-sHacE,720
|
|
140
|
+
sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py,sha256=oeB-8qdMoljG-mgFOCsfnpxyK5jFBZnX7jAUQwsnHTc,13215
|
|
141
|
+
sparknlp/annotator/ld_dl/__init__.py,sha256=gWNGOaozABT83J4Mn7JmNQsXzm27s3PHpMQmlXl-5L8,704
|
|
142
|
+
sparknlp/annotator/ld_dl/language_detector_dl.py,sha256=Z1ej9cdIZueWQ71pdZ1BaogBNZifZiUNGXs4BnnVS2Q,8079
|
|
143
|
+
sparknlp/annotator/matcher/__init__.py,sha256=S-GGVeU1Lf6fZUgJKiFHKvHOjAGrP4TlgdGEK_85KaI,920
|
|
144
|
+
sparknlp/annotator/matcher/big_text_matcher.py,sha256=UeGBP6ibEmYt1Bi-HwnoPotiCosMrg-IHZ3ZNCqQvvg,9785
|
|
145
|
+
sparknlp/annotator/matcher/date_matcher.py,sha256=FrjTVoNBq1Z7E4qSJKvfV5rC7Mlm9R1gdohPEQswCNI,11737
|
|
146
|
+
sparknlp/annotator/matcher/multi_date_matcher.py,sha256=-zCp4HugIpSN6U4-c1uN_dGn7x69xYy6SUoQz6dY34s,4475
|
|
147
|
+
sparknlp/annotator/matcher/regex_matcher.py,sha256=GYhJNjBnCvfeEMv7d2_kDnqjfHtC7Hts--Ttlkjdozs,8380
|
|
148
|
+
sparknlp/annotator/matcher/text_matcher.py,sha256=kpP1-5BEFEW9kantoHvjhcCbX1qsGm4gEDtFHATWOwA,10636
|
|
149
|
+
sparknlp/annotator/ner/__init__.py,sha256=cd13PX6O92BIasE8NWd3JUR6up2fRbe7chRx4lQZRcY,1006
|
|
150
|
+
sparknlp/annotator/ner/ner_approach.py,sha256=4Y4gcusVtDaQpXfBbBe8XMAZ5hw1mvdh5A7g1j_T1KY,2793
|
|
151
|
+
sparknlp/annotator/ner/ner_converter.py,sha256=ANPp_Xe0DaK4z4n-0KujBj3Xp5jIbsFXBXvmp-aCKlM,5924
|
|
152
|
+
sparknlp/annotator/ner/ner_crf.py,sha256=eFylEz3-CENW0dyc6K4jodz9Kig3tnCyfZ3s-KZMvH4,14283
|
|
153
|
+
sparknlp/annotator/ner/ner_dl.py,sha256=SOALbvWhcPXpqXAvqDapp-vx60sAmuYgkOmiKr-9Ezk,22477
|
|
154
|
+
sparknlp/annotator/ner/ner_dl_graph_checker.py,sha256=xlAidGpkPcxKUyLhpWa7PAkuRpchEGftk-Q_IG4OuLE,9834
|
|
155
|
+
sparknlp/annotator/ner/ner_overwriter.py,sha256=en5OxXIP46yTXokIE96YDP9kcHA9oxiRPgwXMo0otew,6798
|
|
156
|
+
sparknlp/annotator/ner/zero_shot_ner_model.py,sha256=DohhnkGSG-JxjW72t8AOx3GY7R_qT-LA3I0KF9TBz-Y,7501
|
|
157
|
+
sparknlp/annotator/openai/__init__.py,sha256=u6SpV_xS8UpBE95WnTl0IefOI5TrTRl7ZHuYoeTetiA,759
|
|
158
|
+
sparknlp/annotator/openai/openai_completion.py,sha256=vetyDRGs-ge0pxMojEaNFhVy50Sf7Sm0tsh0I71i2Ss,16867
|
|
159
|
+
sparknlp/annotator/openai/openai_embeddings.py,sha256=i1ABDRmK6vMzzWP1rVxFiWnvXG4zfrTGGDjq4lvWQeE,108802
|
|
160
|
+
sparknlp/annotator/param/__init__.py,sha256=MKBZs6NWRKxrpeof3Jr4PVmoa75wyRSdWzSt0A9lpfY,750
|
|
161
|
+
sparknlp/annotator/param/classifier_encoder.py,sha256=PDyOdUX2GOFVr6MLtB7RUPBdtDrzDNJNRe_r9bY5JpE,3005
|
|
162
|
+
sparknlp/annotator/param/evaluation_dl_params.py,sha256=qxMP_98zaKbO1Y20yOvvarmrTCiU24VskJRo8NNI9CA,4998
|
|
163
|
+
sparknlp/annotator/pos/__init__.py,sha256=K180gwmrrwjSOX5iKC5rxjCX5VcI3L_XNdaz67i3828,696
|
|
164
|
+
sparknlp/annotator/pos/perceptron.py,sha256=3pluieVkNGcsFRUIa-ibeL1Z95zOXimYvfYAuHFELpk,9863
|
|
165
|
+
sparknlp/annotator/sentence/__init__.py,sha256=_Js9TH43waeI8GxM4lX9s-Ex5iwN2GzZwVUT5pnTnHA,767
|
|
166
|
+
sparknlp/annotator/sentence/sentence_detector.py,sha256=Kd5Vxk5RMdy0VZDpKWKRrQG-3KZ2rx7k_3bw_qVh4tY,10806
|
|
167
|
+
sparknlp/annotator/sentence/sentence_detector_dl.py,sha256=-Osj9Bm9KyZRTAWkOsK9cGIG8pFXBprG53a2LO9EwBA,17866
|
|
168
|
+
sparknlp/annotator/sentiment/__init__.py,sha256=Lq3vKaZS1YATLMg0VNXSVtkWL5q5G9taGBvdrvSwnfg,766
|
|
169
|
+
sparknlp/annotator/sentiment/sentiment_detector.py,sha256=m545NGU0Xzg_PO6_qIfpli1uZj7JQcyFgqe9R6wAPFI,8154
|
|
170
|
+
sparknlp/annotator/sentiment/vivekn_sentiment.py,sha256=4rpXWDgzU6ddnbrSCp9VdLb2epCc9oZ3c6XcqxEw8nk,9655
|
|
171
|
+
sparknlp/annotator/seq2seq/__init__.py,sha256=aDiph00Hyq7L8uDY0frtyuHtqFodBqTMbixx_nq4z1I,1841
|
|
172
|
+
sparknlp/annotator/seq2seq/auto_gguf_model.py,sha256=FaKxJaF7BdlQcf3T-nPZWnXRClF8dcYa71QHIaXFigI,11912
|
|
173
|
+
sparknlp/annotator/seq2seq/auto_gguf_reranker.py,sha256=a_70sNooY_9N6KHXVeuM4cDEbHVDlHa1KUWwu0A-l9s,12809
|
|
174
|
+
sparknlp/annotator/seq2seq/auto_gguf_vision_model.py,sha256=59UZKJbI6oYnSNkk2qqf1nhHtB8h3upGRcjZJyl9bGQ,15494
|
|
175
|
+
sparknlp/annotator/seq2seq/bart_transformer.py,sha256=I1flM4yeCzEAKOdQllBC30XuedxVJ7ferkFhZ6gwEbE,18481
|
|
176
|
+
sparknlp/annotator/seq2seq/cohere_transformer.py,sha256=43LZBVazZMgJRCsN7HaYjVYfJ5hRMV95QZyxMtXq-m4,13496
|
|
177
|
+
sparknlp/annotator/seq2seq/cpm_transformer.py,sha256=0CnBFMlxMu0pD2QZMHyoGtIYgXqfUQm68vr6zEAa6Eg,13290
|
|
178
|
+
sparknlp/annotator/seq2seq/gpt2_transformer.py,sha256=Oz95R_NRR4tWHu_bW6Ak2832ZILXycp3ify7LfRSi8o,15310
|
|
179
|
+
sparknlp/annotator/seq2seq/llama2_transformer.py,sha256=3LzTR0VerFdFmOizsrs2Q7HTnjELJ5WtfUgx5XnOqGM,13898
|
|
180
|
+
sparknlp/annotator/seq2seq/llama3_transformer.py,sha256=wmhgWQkO__H1vIGnAMjUU14Gtit4qOcE1m9YpM6YkB4,14950
|
|
181
|
+
sparknlp/annotator/seq2seq/m2m100_transformer.py,sha256=brlOWjvdbDPfycTUMWnXnQjA9qY9I8ljJJpEd1Gwq5Q,16128
|
|
182
|
+
sparknlp/annotator/seq2seq/marian_transformer.py,sha256=mQ4Ylh7ZzXAOue8f-x0gqzfS3vAz3XUdD7eQ2XhcEs4,13781
|
|
183
|
+
sparknlp/annotator/seq2seq/mistral_transformer.py,sha256=6lVrhWvW8b_3DaQtpegigFL25tELThRHZQRxxNhZuAU,14250
|
|
184
|
+
sparknlp/annotator/seq2seq/nllb_transformer.py,sha256=1ys01yaC0nVzXETy8oD2wZHyombfLzK1cetKrVGTVyY,19546
|
|
185
|
+
sparknlp/annotator/seq2seq/olmo_transformer.py,sha256=B_zhYkAfYycw5uBq1tVNPmaKuYtpJOxRC6PArit7XiE,13634
|
|
186
|
+
sparknlp/annotator/seq2seq/phi2_transformer.py,sha256=WwKCUOH8qGFv62YF63HjuT7bMVldh06gHvaZH3tbSDk,13787
|
|
187
|
+
sparknlp/annotator/seq2seq/phi3_transformer.py,sha256=arIcw5NDMv3ubBwWz3KYRdLMsspTiEI8vk4s00lyq1c,14293
|
|
188
|
+
sparknlp/annotator/seq2seq/phi4_transformer.py,sha256=iVbsqIzKS2MG-LmA3tljjsjeCUzBqATw1btDBOnFviM,14324
|
|
189
|
+
sparknlp/annotator/seq2seq/qwen_transformer.py,sha256=IYxveoHGWWoiwzJ_VMLMgUBe6jr1JSHKSY0PApnTCOI,14640
|
|
190
|
+
sparknlp/annotator/seq2seq/starcoder_transformer.py,sha256=BTXbSMRpXnDvrfh-6iFS5k6g6EcPV9zBl4U-SSC19wA,14293
|
|
191
|
+
sparknlp/annotator/seq2seq/t5_transformer.py,sha256=wDVxNLluIU1HGZFqaKKc4YTt4l-elPlAtQ7EEa0f5tg,17308
|
|
192
|
+
sparknlp/annotator/similarity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
193
|
+
sparknlp/annotator/similarity/document_similarity_ranker.py,sha256=BHV2XWA18YvBn_OKOVvR0TmPPnHSgiAgpZpaPz7ar_s,15826
|
|
194
|
+
sparknlp/annotator/spell_check/__init__.py,sha256=sdnPR3f3Q9mHiv-n4g_O7KpRWPRPweyATSF6Tth_Niw,830
|
|
195
|
+
sparknlp/annotator/spell_check/context_spell_checker.py,sha256=-YFymfJoMr_4yTSoiWMcrPq4JbyygaRONX-tMQXHw9k,31989
|
|
196
|
+
sparknlp/annotator/spell_check/norvig_sweeting.py,sha256=6ET9KnAqXIQDJ5U9px1ixUbC6R63ln_ljruvh_oLiwA,13197
|
|
197
|
+
sparknlp/annotator/spell_check/symmetric_delete.py,sha256=ms8BPrFOITqppShZmEEiqVhjE-pwVb7jI5NSCQ4XM0A,11058
|
|
198
|
+
sparknlp/annotator/token/__init__.py,sha256=cu5njC9peW4_toFOvnHgYzHXsMqxlpPf1p7C4VxoDMk,861
|
|
199
|
+
sparknlp/annotator/token/chunk_tokenizer.py,sha256=vijOAsE7eFRXd3Z5UcYJ-jSh8-1F2qRq0wJfN2xwUHY,4686
|
|
200
|
+
sparknlp/annotator/token/recursive_tokenizer.py,sha256=0uiSRd541TAwGu18iAfiBln2aKSjEbrk52or3nv8bUE,7967
|
|
201
|
+
sparknlp/annotator/token/regex_tokenizer.py,sha256=FG2HvFwMb1G_4grfyIQaeBpaAgKv_xotLtrGlmejDsg,7639
|
|
202
|
+
sparknlp/annotator/token/tokenizer.py,sha256=Me3P3wogUKUJ7O7_2wLdPzF00vKpp_sHuiztpGWRVpU,19939
|
|
203
|
+
sparknlp/annotator/ws/__init__.py,sha256=-l8bnl8Z6lGXWOBdRIBZ6958fzTHt4o87QhhLHIFF8A,693
|
|
204
|
+
sparknlp/annotator/ws/word_segmenter.py,sha256=rrbshwn5wzXIHpCCDji6ZcsmiARpuA82_p_6TgNHfRc,16365
|
|
205
|
+
sparknlp/base/__init__.py,sha256=ug0e79fv03D67mUqmb-i-a89vOa6EcRx49Z0fS6rT3g,1361
|
|
206
|
+
sparknlp/base/audio_assembler.py,sha256=HKa9mXvmuMUrjTihUZkppGj-WJjcUrm2BGapNuPifyI,3320
|
|
207
|
+
sparknlp/base/doc2_chunk.py,sha256=TyvbdJNkVo9favHlOEoH5JwKbjpk5ZVJ75p8Cilp9jM,6551
|
|
208
|
+
sparknlp/base/document_assembler.py,sha256=zl-SXWMTR3B0EZ8z6SWYchCwEo-61FhU6u7dHUKDIOg,6697
|
|
209
|
+
sparknlp/base/embeddings_finisher.py,sha256=5QU1Okgl2ULrPVf4ze1H0SsRCMYXWGARtUsT7dagBYA,7659
|
|
210
|
+
sparknlp/base/finisher.py,sha256=V4wkMm9Ug09q4zTQc9T9Wr-awmu2Hu-eNaJ039YgZXM,8583
|
|
211
|
+
sparknlp/base/gguf_ranking_finisher.py,sha256=tzoisuD70myfHo3t4WFtBs8i1jwdjkwXCbWknDEXOHk,8315
|
|
212
|
+
sparknlp/base/graph_finisher.py,sha256=a8fxk3ei2YQw6s0Y9Yy8oMOF1i1XUrgqaiwVE0VPt4w,4834
|
|
213
|
+
sparknlp/base/has_recursive_fit.py,sha256=P55rSHLIXhihXWS2bOC_DskcQTc3njieVD1JkjS2bcA,849
|
|
214
|
+
sparknlp/base/has_recursive_transform.py,sha256=UkGNgo4LMsjQC-Coeefg4bJcg7FoPcPiG382zEa6Ywk,841
|
|
215
|
+
sparknlp/base/image_assembler.py,sha256=-ylzVaDdjJBDQNkTixsCn7WvFB8cqC3_lPdvdiJu0aM,6168
|
|
216
|
+
sparknlp/base/light_pipeline.py,sha256=2lOstyyK0o6L3BHPIZWQBpIKtJ7LcSz3Pvgo6eZDs5U,17023
|
|
217
|
+
sparknlp/base/multi_document_assembler.py,sha256=4htET1fRAeOB6zhsNXsBq5rKZvn-LGD4vrFRjPZeqow,7070
|
|
218
|
+
sparknlp/base/prompt_assembler.py,sha256=_C_9MdHqsxUjSOa3TqCV-6sSfSiRyhfHBQG5m7RlqxY,11578
|
|
219
|
+
sparknlp/base/recursive_pipeline.py,sha256=V9rTnu8KMwgjoceykN9pF1mKGtOkkuiC_n9v8dE3LDk,4279
|
|
220
|
+
sparknlp/base/table_assembler.py,sha256=Kxu3R2fY6JgCxEc07ibsMsjip6dgcPDHLiWAZ8gC_d8,5102
|
|
221
|
+
sparknlp/base/token_assembler.py,sha256=qiHry07L7mVCqeHSH6hHxLygv1AsfZIE4jy1L75L3Do,5075
|
|
222
|
+
sparknlp/common/__init__.py,sha256=bdnDseYWsKnsBk4KdO_NbPJshF_CeqhO2NFXV1Vu_Ts,1205
|
|
223
|
+
sparknlp/common/annotator_approach.py,sha256=CbkyaWl6rRX_VaXz2xJCjofijRGJGeJCsqQTDQgNTAw,1765
|
|
224
|
+
sparknlp/common/annotator_model.py,sha256=l1vDFi2m_WbWg47Jq0F8DygjndUQhv9Ftfcc8Iceb8s,1880
|
|
225
|
+
sparknlp/common/annotator_properties.py,sha256=7B1os7pBUfHo6b7IPQAXQ-nir0u3tQLzDpAg83h_iqQ,4332
|
|
226
|
+
sparknlp/common/annotator_type.py,sha256=ash2Ip1IOOiJamPVyy_XQj8Ja_DRHm0b9Vj4Ni75oKM,1225
|
|
227
|
+
sparknlp/common/completion_post_processing.py,sha256=sqcjewfrpIBZ4KFQ1XPYJI7luHIStnv6PovkehFxeOg,1423
|
|
228
|
+
sparknlp/common/coverage_result.py,sha256=No4PSh1HSs3PyRI1zC47x65tWgfirqPI290icHQoXEI,823
|
|
229
|
+
sparknlp/common/match_strategy.py,sha256=kt1MUPqU1wCwk5qCdYk6jubHbU-5yfAYxb9jjAOrdnY,1678
|
|
230
|
+
sparknlp/common/properties.py,sha256=7eBxODxKmFQAgOtrxUH9ly4LugUlkNRVXNQcM60AUK4,53025
|
|
231
|
+
sparknlp/common/read_as.py,sha256=imxPGwV7jr4Li_acbo0OAHHRGCBbYv-akzEGaBWEfcY,1226
|
|
232
|
+
sparknlp/common/recursive_annotator_approach.py,sha256=vqugBw22cE3Ff7PIpRlnYFuOlchgL0nM26D8j-NdpqU,1449
|
|
233
|
+
sparknlp/common/storage.py,sha256=D91H3p8EIjNspjqAYu6ephRpCUtdcAir4_PrAbkIQWE,4842
|
|
234
|
+
sparknlp/common/utils.py,sha256=Yne6yYcwKxhOZC-U4qfYoDhWUP_6BIaAjI5X_P_df1E,1306
|
|
235
|
+
sparknlp/internal/__init__.py,sha256=m7Y7y-IPkB6aJuGUCM54eOueGOEt65C3ujAzN16hegQ,40995
|
|
236
|
+
sparknlp/internal/annotator_java_ml.py,sha256=UGPoThG0rGXUOXGSQnDzEDW81Mu1s5RPF29v7DFyE3c,1187
|
|
237
|
+
sparknlp/internal/annotator_transformer.py,sha256=fXmc2IWXGybqZpbEU9obmbdBYPc798y42zvSB4tqV9U,1448
|
|
238
|
+
sparknlp/internal/extended_java_wrapper.py,sha256=hwP0133-hDiDf5sBF-P3MtUsuuDj1PpQbtGZQIRwzfk,2240
|
|
239
|
+
sparknlp/internal/params_getters_setters.py,sha256=LtaKAzD8fEupEPy9vYOI2St7A3PcZkRbZnjSB8d1i_c,2136
|
|
240
|
+
sparknlp/internal/recursive.py,sha256=YfsjOmUUccFOHxheIXf-rps8yc4PPqFCje94_7dS4iw,2756
|
|
241
|
+
sparknlp/logging/__init__.py,sha256=DoROFF5KLZe4t4Q-OHxqk1nhqbw9NQ-wb64y8icNwgw,642
|
|
242
|
+
sparknlp/logging/comet.py,sha256=_ZBi9-hlilCAnd4lvdYMWiq4Vqsppv8kow3k0cf-NG4,15958
|
|
243
|
+
sparknlp/partition/__init__.py,sha256=L0w-yv_HnnvoKlSX5MzI2GKHW3RLLfGyq8bgWYVeKjU,749
|
|
244
|
+
sparknlp/partition/partition.py,sha256=GXEAUvOea04Vc_JK0z112cAKFrJ4AEpjLJ8xlzZt6Kw,8551
|
|
245
|
+
sparknlp/partition/partition_properties.py,sha256=J8vuJpJ9MkOJR2AJnsKJYR-IyZPjjKGC-OjR214NnfQ,26014
|
|
246
|
+
sparknlp/partition/partition_transformer.py,sha256=lRR1h-IMlHR8M0VeB50SbU39GHHF5PgMaJ42qOriS6A,6855
|
|
247
|
+
sparknlp/pretrained/__init__.py,sha256=GV-x9UBK8F2_IR6zYatrzFcVJtkSUIMbxqWsxRUePmQ,793
|
|
248
|
+
sparknlp/pretrained/pretrained_pipeline.py,sha256=lquxiaABuA68Rmu7csamJPqBoRJqMUO0oNHsmEZDAIs,5740
|
|
249
|
+
sparknlp/pretrained/resource_downloader.py,sha256=8_-rpvO2LsX_Lq4wMPif2ca3RlJZWEabt8pDm2xymiI,7806
|
|
250
|
+
sparknlp/pretrained/utils.py,sha256=T1MrvW_DaWk_jcOjVLOea0NMFE9w8fe0ZT_5urZ_nEY,1099
|
|
251
|
+
sparknlp/reader/__init__.py,sha256=-Toj3AIBki-zXPpV8ezFTI2LX1yP_rK2bhpoa8nBkTw,685
|
|
252
|
+
sparknlp/reader/enums.py,sha256=MNGug9oJ1BBLM1Pbske13kAabalDzHa2kucF5xzFpHs,770
|
|
253
|
+
sparknlp/reader/pdf_to_text.py,sha256=eWw-cwjosmcSZ9eHso0F5QQoeGBBnwsOhzhCXXvMjZA,7169
|
|
254
|
+
sparknlp/reader/reader2doc.py,sha256=FFMnKXdBeJYj3HIGMSj2PClbllgC0YO0KTuCukFHnEg,4823
|
|
255
|
+
sparknlp/reader/reader2image.py,sha256=k3gb4LEiqDV-pnD-HEaA1KHoAxXmoYys2Y817i1yvP0,4557
|
|
256
|
+
sparknlp/reader/reader2table.py,sha256=3_HwFkHy9tYkSqBWCiWIeolAXlUZQMfXJaPo4GcigPI,1472
|
|
257
|
+
sparknlp/reader/reader_assembler.py,sha256=AgkA3BaZ_00Eor4D84lZLxx04n2pDE_uatO535RAs9M,5655
|
|
258
|
+
sparknlp/reader/sparknlp_reader.py,sha256=MJs8v_ECYaV1SOabI1L_2MkVYEDVImtwgbYypO7DJSY,20623
|
|
259
|
+
sparknlp/training/__init__.py,sha256=qREi9u-5Vc2VjpL6-XZsyvu5jSEIdIhowW7_kKaqMqo,852
|
|
260
|
+
sparknlp/training/conll.py,sha256=wKBiSTrjc6mjsl7Nyt6B8f4yXsDJkZb-sn8iOjix9cE,6961
|
|
261
|
+
sparknlp/training/conllu.py,sha256=8r3i-tmyrLsyk1DtZ9uo2mMDCWb1yw2Y5W6UsV13MkY,4953
|
|
262
|
+
sparknlp/training/pos.py,sha256=YchvPWksMAYvM6XPLNzbS-Kr96CRpLMSr21qe76m1PY,4091
|
|
263
|
+
sparknlp/training/pub_tator.py,sha256=2DWuNJqAl81_Izaa_qoQMHkpeqDP8sA7mhehc6vI7Gw,3331
|
|
264
|
+
sparknlp/training/spacy_to_annotation.py,sha256=b8WCqwr0kBRWa695ajDAxNVFg_z0yD1U1_dS3ryMY0I,6826
|
|
265
|
+
sparknlp/training/tfgraphs.py,sha256=Hl3vp9VhLX_IinqOvtIH0r9Bf2BgcGrFKG5wBu3qTmc,244
|
|
266
|
+
sparknlp/training/_tf_graph_builders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
267
|
+
sparknlp/training/_tf_graph_builders/graph_builders.py,sha256=0WqVTRnZValftGpGm6tYJCOQmJUL_B2pjZSQu8dLqs0,10819
|
|
268
|
+
sparknlp/training/_tf_graph_builders/ner_dl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
269
|
+
sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py,sha256=MGS6hq9Ne3o0-49uJ7hGM7WG_Go9BCoTiv4VVgwi5rg,1311
|
|
270
|
+
sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py,sha256=vBkL6CIMi_o4JT_gc1IHvYTip9kOFK9XYQeLqp7ZxVo,2593
|
|
271
|
+
sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py,sha256=P7iHf5UEqC-Gj5BRBpT8TmUcXHDJTcRxwX0TyS9DMM0,22502
|
|
272
|
+
sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py,sha256=k5CQ7gKV6HZbZMB8cKLUJuZxoZWlP_DFWdZ--aIDwsc,2356
|
|
273
|
+
sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py,sha256=pAxjWhjazSX8Vg0MFqJiuRVw1IbnQNSs-8Xp26L4nko,870
|
|
274
|
+
sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py,sha256=a2WHiHzJMrjyGv2TeT6WM3qx08IjbLn9p4OiRsxPqiQ,1228
|
|
275
|
+
sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py,sha256=Vxj5SXhjJ4MuUzuqfBDLp-l3EZiUk1hRz-et3AWdNdM,14939
|
|
276
|
+
sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py,sha256=nVsj4Ae0wgidK0vaM_FXcXrj7wCzBVBYosOHBG8YWfw,6616
|
|
277
|
+
sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py,sha256=IvJydBFTmhzgMiqZNJWe7p0fMqETQHReM4OsEnjM6PA,8072
|
|
278
|
+
sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py,sha256=nVTM4HBh2w6R3PXwkdXXOXJCjWfJNI-p2HGImISkXmg,25976
|
|
279
|
+
sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py,sha256=y5rIxEySSYKjLC7yIMdUEcmCmPGaPmqezi3g_kzHaJQ,11803
|
|
280
|
+
sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py,sha256=Lh0R9PbrxxKa1mXK7KFcH2qBt-n4c8qPPbMbwdi4HPU,166408
|
|
281
|
+
sparknlp/training/_tf_graph_builders_1x/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
282
|
+
sparknlp/training/_tf_graph_builders_1x/graph_builders.py,sha256=UnR0Hw5rfvldIlm6ZyQbELqaoSUoaHEcP5EKQro65wY,9794
|
|
283
|
+
sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
284
|
+
sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py,sha256=jRpq_938GZwyEiqYJpToEM21hgvv3YPEIlbC8iQ8Ez0,1099
|
|
285
|
+
sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py,sha256=R4yHFN3doOKZLKgUn399y08v_iFaIxmnvE-1_vag86k,2594
|
|
286
|
+
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py,sha256=EoCSdcIjqQ3wv13MAuuWrKV8wyVBP0SbOEW41omHlR0,23189
|
|
287
|
+
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py,sha256=k5CQ7gKV6HZbZMB8cKLUJuZxoZWlP_DFWdZ--aIDwsc,2356
|
|
288
|
+
sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py,sha256=pAxjWhjazSX8Vg0MFqJiuRVw1IbnQNSs-8Xp26L4nko,870
|
|
289
|
+
spark_nlp-6.2.1.dist-info/METADATA,sha256=XJ_4ZQEgaGdhhheNK7fbIY4xx6LV_1Owq5b2XGqPmGc,19775
|
|
290
|
+
spark_nlp-6.2.1.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
|
|
291
|
+
spark_nlp-6.2.1.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
|
|
292
|
+
spark_nlp-6.2.1.dist-info/RECORD,,
|
sparknlp/__init__.py
CHANGED
|
@@ -1,10 +1,34 @@
|
|
|
1
|
+
# Copyright 2017-2022 John Snow Labs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import subprocess
|
|
1
16
|
import sys
|
|
17
|
+
import threading
|
|
18
|
+
|
|
19
|
+
from pyspark.conf import SparkConf
|
|
20
|
+
from pyspark.context import SparkContext
|
|
21
|
+
from pyspark.java_gateway import launch_gateway
|
|
2
22
|
from pyspark.sql import SparkSession
|
|
23
|
+
|
|
3
24
|
from sparknlp import annotator
|
|
4
|
-
|
|
25
|
+
# Must be declared here one by one or else PretrainedPipeline will fail with AttributeError
|
|
26
|
+
from sparknlp.base import DocumentAssembler, MultiDocumentAssembler, Finisher, EmbeddingsFinisher, TokenAssembler, \
|
|
27
|
+
Doc2Chunk, AudioAssembler, GraphFinisher, ImageAssembler, TableAssembler
|
|
28
|
+
from sparknlp.reader import SparkNLPReader
|
|
5
29
|
|
|
6
30
|
sys.modules['com.johnsnowlabs.nlp.annotators'] = annotator
|
|
7
|
-
sys.modules['com.
|
|
31
|
+
sys.modules['com.johnsnsowlabs.nlp.annotators.tokenizer'] = annotator
|
|
8
32
|
sys.modules['com.johnsnowlabs.nlp.annotators.tokenizer.wordpiece'] = annotator
|
|
9
33
|
sys.modules['com.johnsnowlabs.nlp.annotators.ner'] = annotator
|
|
10
34
|
sys.modules['com.johnsnowlabs.nlp.annotators.ner.regex'] = annotator
|
|
@@ -31,37 +55,267 @@ sys.modules['com.johnsnowlabs.nlp.annotators.spell.context'] = annotator
|
|
|
31
55
|
sys.modules['com.johnsnowlabs.nlp.annotators.ld'] = annotator
|
|
32
56
|
sys.modules['com.johnsnowlabs.nlp.annotators.ld.dl'] = annotator
|
|
33
57
|
sys.modules['com.johnsnowlabs.nlp.annotators.sentence_detector_dl'] = annotator
|
|
58
|
+
sys.modules['com.johnsnowlabs.nlp.annotators.seq2seq'] = annotator
|
|
59
|
+
sys.modules['com.johnsnowlabs.nlp.annotators.ws'] = annotator
|
|
60
|
+
sys.modules['com.johnsnowlabs.nlp.annotators.er'] = annotator
|
|
61
|
+
sys.modules['com.johnsnowlabs.nlp.annotators.coref'] = annotator
|
|
62
|
+
sys.modules['com.johnsnowlabs.nlp.annotators.cv'] = annotator
|
|
63
|
+
sys.modules['com.johnsnowlabs.nlp.annotators.audio'] = annotator
|
|
64
|
+
sys.modules['com.johnsnowlabs.ml.ai'] = annotator
|
|
34
65
|
|
|
35
66
|
annotators = annotator
|
|
36
67
|
embeddings = annotator
|
|
37
68
|
|
|
69
|
+
__version__ = "6.2.1"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def start(gpu=False,
|
|
73
|
+
apple_silicon=False,
|
|
74
|
+
aarch64=False,
|
|
75
|
+
memory="16G",
|
|
76
|
+
cache_folder="",
|
|
77
|
+
log_folder="",
|
|
78
|
+
cluster_tmp_dir="",
|
|
79
|
+
params=None,
|
|
80
|
+
real_time_output=False,
|
|
81
|
+
output_level=1):
|
|
82
|
+
"""Starts a PySpark instance with default parameters for Spark NLP.
|
|
83
|
+
|
|
84
|
+
The default parameters would result in the equivalent of:
|
|
85
|
+
|
|
86
|
+
.. code-block:: python
|
|
87
|
+
|
|
88
|
+
SparkSession.builder \\
|
|
89
|
+
.appName("Spark NLP") \\
|
|
90
|
+
.master("local[*]") \\
|
|
91
|
+
.config("spark.driver.memory", "16G") \\
|
|
92
|
+
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \\
|
|
93
|
+
.config("spark.kryoserializer.buffer.max", "2000M") \\
|
|
94
|
+
.config("spark.driver.maxResultSize", "0") \\
|
|
95
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:|release|") \\
|
|
96
|
+
.getOrCreate()
|
|
97
|
+
|
|
98
|
+
Parameters
|
|
99
|
+
----------
|
|
100
|
+
gpu : bool, optional
|
|
101
|
+
Whether to enable GPU acceleration (must be set up correctly), by default False
|
|
102
|
+
apple_silicon : bool, optional
|
|
103
|
+
Whether to enable Apple Silicon support for macOS
|
|
104
|
+
aarch64 : bool, optional
|
|
105
|
+
Whether to enable Linux Aarch64 support
|
|
106
|
+
memory : str, optional
|
|
107
|
+
How much memory to allocate for the Spark driver, by default "16G"
|
|
108
|
+
cache_folder : str, optional
|
|
109
|
+
The location to download and extract pretrained Models and Pipelines. If not
|
|
110
|
+
set, it will be in the users home directory under `cache_pretrained`.
|
|
111
|
+
log_folder : str, optional
|
|
112
|
+
The location to use on a cluster for temporarily files such as unpacking indexes
|
|
113
|
+
for WordEmbeddings. By default, this locations is the location of
|
|
114
|
+
`hadoop.tmp.dir` set via Hadoop configuration for Apache Spark. NOTE: `S3` is
|
|
115
|
+
not supported and it must be local, HDFS, or DBFS.
|
|
116
|
+
params : dict, optional
|
|
117
|
+
Custom parameters to set for the Spark configuration, by default None.
|
|
118
|
+
cluster_tmp_dir : str, optional
|
|
119
|
+
The location to save logs from annotators during training. If not set, it will
|
|
120
|
+
be in the users home directory under `annotator_logs`.
|
|
121
|
+
real_time_output : bool, optional
|
|
122
|
+
Whether to read and print JVM output in real time, by default False
|
|
123
|
+
output_level : int, optional
|
|
124
|
+
Output level for logs, by default 1
|
|
125
|
+
|
|
126
|
+
Notes
|
|
127
|
+
-----
|
|
128
|
+
Since Spark version 3.2, Python 3.6 is deprecated. If you are using this
|
|
129
|
+
python version, consider sticking to lower versions of Spark.
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
:class:`SparkSession`
|
|
134
|
+
The initiated Spark session.
|
|
135
|
+
|
|
136
|
+
"""
|
|
137
|
+
current_version = __version__
|
|
38
138
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
maven_spark24 = "com.johnsnowlabs.nlp:spark-nlp_2.11:{}".format(current_version)
|
|
42
|
-
maven_gpu_spark24 = "com.johnsnowlabs.nlp:spark-nlp-gpu_2.11:{}".format(current_version)
|
|
43
|
-
maven_spark23 = "com.johnsnowlabs.nlp:spark-nlp-spark23_2.11:{}".format(current_version)
|
|
44
|
-
maven_gpu_spark23 = "com.johnsnowlabs.nlp:spark-nlp-gpu-spark23_2.11:{}".format(current_version)
|
|
45
|
-
|
|
46
|
-
builder = SparkSession.builder \
|
|
47
|
-
.appName("Spark NLP") \
|
|
48
|
-
.master("local[*]") \
|
|
49
|
-
.config("spark.driver.memory", "16G") \
|
|
50
|
-
.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") \
|
|
51
|
-
.config("spark.kryoserializer.buffer.max", "1000M") \
|
|
52
|
-
.config("spark.driver.maxResultSize", "0")
|
|
53
|
-
|
|
54
|
-
if gpu and spark23:
|
|
55
|
-
builder.config("spark.jars.packages", maven_gpu_spark23)
|
|
56
|
-
elif spark23:
|
|
57
|
-
builder.config("spark.jars.packages", maven_spark23)
|
|
58
|
-
elif gpu:
|
|
59
|
-
builder.config("spark.jars.packages", maven_gpu_spark24)
|
|
139
|
+
if params is None:
|
|
140
|
+
params = {}
|
|
60
141
|
else:
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
142
|
+
if not isinstance(params, dict):
|
|
143
|
+
raise TypeError('params must be a dictionary like {"spark.executor.memory": "8G"}')
|
|
144
|
+
|
|
145
|
+
if '_instantiatedSession' in dir(SparkSession) and SparkSession._instantiatedSession is not None:
|
|
146
|
+
print('Warning::Spark Session already created, some configs may not take.')
|
|
147
|
+
|
|
148
|
+
driver_cores = "*"
|
|
149
|
+
for key, value in params.items():
|
|
150
|
+
if key == "spark.driver.cores":
|
|
151
|
+
driver_cores = f"{value}"
|
|
152
|
+
else:
|
|
153
|
+
driver_cores = "*"
|
|
154
|
+
|
|
155
|
+
class SparkNLPConfig:
|
|
156
|
+
|
|
157
|
+
def __init__(self):
|
|
158
|
+
self.master, self.app_name = "local[{}]".format(driver_cores), "Spark NLP"
|
|
159
|
+
self.serializer, self.serializer_max_buffer = "org.apache.spark.serializer.KryoSerializer", "2000M"
|
|
160
|
+
self.driver_max_result_size = "0"
|
|
161
|
+
# Spark NLP on CPU or GPU
|
|
162
|
+
self.maven_spark3 = "com.johnsnowlabs.nlp:spark-nlp_2.12:{}".format(current_version)
|
|
163
|
+
self.maven_gpu_spark3 = "com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:{}".format(current_version)
|
|
164
|
+
# Spark NLP on Apple Silicon
|
|
165
|
+
self.maven_silicon = "com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:{}".format(current_version)
|
|
166
|
+
# Spark NLP on Linux Aarch64
|
|
167
|
+
self.maven_aarch64 = "com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:{}".format(current_version)
|
|
168
|
+
|
|
169
|
+
def start_without_realtime_output():
|
|
170
|
+
builder = SparkSession.builder \
|
|
171
|
+
.appName(spark_nlp_config.app_name) \
|
|
172
|
+
.master(spark_nlp_config.master) \
|
|
173
|
+
.config("spark.driver.memory", memory) \
|
|
174
|
+
.config("spark.serializer", spark_nlp_config.serializer) \
|
|
175
|
+
.config("spark.kryoserializer.buffer.max", spark_nlp_config.serializer_max_buffer) \
|
|
176
|
+
.config("spark.driver.maxResultSize", spark_nlp_config.driver_max_result_size)
|
|
177
|
+
|
|
178
|
+
if apple_silicon:
|
|
179
|
+
spark_jars_packages = spark_nlp_config.maven_silicon
|
|
180
|
+
elif aarch64:
|
|
181
|
+
spark_jars_packages = spark_nlp_config.maven_aarch64
|
|
182
|
+
elif gpu:
|
|
183
|
+
spark_jars_packages = spark_nlp_config.maven_gpu_spark3
|
|
184
|
+
else:
|
|
185
|
+
spark_jars_packages = spark_nlp_config.maven_spark3
|
|
186
|
+
|
|
187
|
+
if cache_folder != '':
|
|
188
|
+
builder.config("spark.jsl.settings.pretrained.cache_folder", cache_folder)
|
|
189
|
+
if log_folder != '':
|
|
190
|
+
builder.config("spark.jsl.settings.annotator.log_folder", log_folder)
|
|
191
|
+
if cluster_tmp_dir != '':
|
|
192
|
+
builder.config("spark.jsl.settings.storage.cluster_tmp_dir", cluster_tmp_dir)
|
|
193
|
+
|
|
194
|
+
if params.get("spark.jars.packages") is None:
|
|
195
|
+
builder.config("spark.jars.packages", spark_jars_packages)
|
|
196
|
+
|
|
197
|
+
for key, value in params.items():
|
|
198
|
+
if key == "spark.jars.packages":
|
|
199
|
+
packages = spark_jars_packages + "," + value
|
|
200
|
+
builder.config(key, packages)
|
|
201
|
+
else:
|
|
202
|
+
builder.config(key, value)
|
|
203
|
+
|
|
204
|
+
return builder.getOrCreate()
|
|
205
|
+
|
|
206
|
+
def start_with_realtime_output():
|
|
64
207
|
|
|
208
|
+
class SparkWithCustomGateway:
|
|
209
|
+
|
|
210
|
+
def __init__(self):
|
|
211
|
+
spark_conf = SparkConf()
|
|
212
|
+
spark_conf.setAppName(spark_nlp_config.app_name)
|
|
213
|
+
spark_conf.setMaster(spark_nlp_config.master)
|
|
214
|
+
spark_conf.set("spark.driver.memory", memory)
|
|
215
|
+
spark_conf.set("spark.serializer", spark_nlp_config.serializer)
|
|
216
|
+
spark_conf.set("spark.kryoserializer.buffer.max", spark_nlp_config.serializer_max_buffer)
|
|
217
|
+
spark_conf.set("spark.driver.maxResultSize", spark_nlp_config.driver_max_result_size)
|
|
218
|
+
|
|
219
|
+
if apple_silicon:
|
|
220
|
+
spark_jars_packages = spark_nlp_config.maven_silicon
|
|
221
|
+
elif aarch64:
|
|
222
|
+
spark_jars_packages = spark_nlp_config.maven_aarch64
|
|
223
|
+
elif gpu:
|
|
224
|
+
spark_jars_packages = spark_nlp_config.maven_gpu_spark3
|
|
225
|
+
else:
|
|
226
|
+
spark_jars_packages = spark_nlp_config.maven_spark3
|
|
227
|
+
|
|
228
|
+
if cache_folder != '':
|
|
229
|
+
spark_conf.set("spark.jsl.settings.pretrained.cache_folder", cache_folder)
|
|
230
|
+
if log_folder != '':
|
|
231
|
+
spark_conf.set("spark.jsl.settings.annotator.log_folder", log_folder)
|
|
232
|
+
if cluster_tmp_dir != '':
|
|
233
|
+
spark_conf.set("spark.jsl.settings.storage.cluster_tmp_dir", cluster_tmp_dir)
|
|
234
|
+
|
|
235
|
+
if params.get("spark.jars.packages") is None:
|
|
236
|
+
spark_conf.set("spark.jars.packages", spark_jars_packages)
|
|
237
|
+
|
|
238
|
+
for key, value in params.items():
|
|
239
|
+
if key == "spark.jars.packages":
|
|
240
|
+
packages = spark_jars_packages + "," + value
|
|
241
|
+
spark_conf.set(key, packages)
|
|
242
|
+
else:
|
|
243
|
+
spark_conf.set(key, value)
|
|
244
|
+
|
|
245
|
+
# Make the py4j JVM stdout and stderr available without buffering
|
|
246
|
+
popen_kwargs = {
|
|
247
|
+
'stdout': subprocess.PIPE,
|
|
248
|
+
'stderr': subprocess.PIPE,
|
|
249
|
+
'bufsize': 0
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
# Launch the gateway with our custom settings
|
|
253
|
+
self.gateway = launch_gateway(conf=spark_conf, popen_kwargs=popen_kwargs)
|
|
254
|
+
self.process = self.gateway.proc
|
|
255
|
+
# Use the gateway we launched
|
|
256
|
+
spark_context = SparkContext(gateway=self.gateway)
|
|
257
|
+
self.spark_session = SparkSession(spark_context)
|
|
258
|
+
|
|
259
|
+
self.out_thread = threading.Thread(target=self.output_reader)
|
|
260
|
+
self.error_thread = threading.Thread(target=self.error_reader)
|
|
261
|
+
self.std_background_listeners()
|
|
262
|
+
|
|
263
|
+
def std_background_listeners(self):
|
|
264
|
+
self.out_thread.start()
|
|
265
|
+
self.error_thread.start()
|
|
266
|
+
|
|
267
|
+
def output_reader(self):
|
|
268
|
+
for line in iter(self.process.stdout.readline, b''):
|
|
269
|
+
print('{0}'.format(line.decode('utf-8')), end='')
|
|
270
|
+
|
|
271
|
+
def error_reader(self):
|
|
272
|
+
RED = '\033[91m'
|
|
273
|
+
RESET = '\033[0m'
|
|
274
|
+
for line in iter(self.process.stderr.readline, b''):
|
|
275
|
+
if output_level == 0:
|
|
276
|
+
print(RED + '{0}'.format(line.decode('utf-8')) + RESET, end='')
|
|
277
|
+
else:
|
|
278
|
+
# output just info
|
|
279
|
+
pass
|
|
280
|
+
|
|
281
|
+
def shutdown(self):
|
|
282
|
+
self.spark_session.stop()
|
|
283
|
+
self.gateway.shutdown()
|
|
284
|
+
self.process.communicate()
|
|
285
|
+
|
|
286
|
+
self.out_thread.join()
|
|
287
|
+
self.error_thread.join()
|
|
288
|
+
|
|
289
|
+
return SparkWithCustomGateway()
|
|
290
|
+
|
|
291
|
+
spark_nlp_config = SparkNLPConfig()
|
|
292
|
+
|
|
293
|
+
if real_time_output:
|
|
294
|
+
# Available from Spark 3.0.x
|
|
295
|
+
class SparkRealTimeOutput:
|
|
296
|
+
|
|
297
|
+
def __init__(self):
|
|
298
|
+
self.__spark_with_custom_gateway = start_with_realtime_output()
|
|
299
|
+
self.spark_session = self.__spark_with_custom_gateway.spark_session
|
|
300
|
+
|
|
301
|
+
def shutdown(self):
|
|
302
|
+
self.__spark_with_custom_gateway.shutdown()
|
|
303
|
+
|
|
304
|
+
return SparkRealTimeOutput().spark_session
|
|
305
|
+
else:
|
|
306
|
+
spark_session = start_without_realtime_output()
|
|
307
|
+
return spark_session
|
|
308
|
+
|
|
309
|
+
def read(params=None):
|
|
310
|
+
spark_session = start()
|
|
311
|
+
return SparkNLPReader(spark_session, params)
|
|
65
312
|
|
|
66
313
|
def version():
|
|
67
|
-
|
|
314
|
+
"""Returns the current Spark NLP version.
|
|
315
|
+
|
|
316
|
+
Returns
|
|
317
|
+
-------
|
|
318
|
+
str
|
|
319
|
+
The current Spark NLP version.
|
|
320
|
+
"""
|
|
321
|
+
return __version__
|