spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- com/johnsnowlabs/ml/__init__.py +0 -0
- com/johnsnowlabs/ml/ai/__init__.py +10 -0
- com/johnsnowlabs/nlp/__init__.py +4 -2
- spark_nlp-6.2.1.dist-info/METADATA +362 -0
- spark_nlp-6.2.1.dist-info/RECORD +292 -0
- {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
- sparknlp/__init__.py +281 -27
- sparknlp/annotation.py +137 -6
- sparknlp/annotation_audio.py +61 -0
- sparknlp/annotation_image.py +82 -0
- sparknlp/annotator/__init__.py +93 -0
- sparknlp/annotator/audio/__init__.py +16 -0
- sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
- sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
- sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
- sparknlp/annotator/chunk2_doc.py +85 -0
- sparknlp/annotator/chunker.py +137 -0
- sparknlp/annotator/classifier_dl/__init__.py +61 -0
- sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
- sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
- sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
- sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
- sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
- sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
- sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
- sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
- sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
- sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
- sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
- sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
- sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
- sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
- sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
- sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
- sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
- sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
- sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
- sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
- sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
- sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
- sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
- sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
- sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
- sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
- sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
- sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
- sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
- sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
- sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
- sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
- sparknlp/annotator/cleaners/__init__.py +15 -0
- sparknlp/annotator/cleaners/cleaner.py +202 -0
- sparknlp/annotator/cleaners/extractor.py +191 -0
- sparknlp/annotator/coref/__init__.py +1 -0
- sparknlp/annotator/coref/spanbert_coref.py +221 -0
- sparknlp/annotator/cv/__init__.py +29 -0
- sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
- sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
- sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
- sparknlp/annotator/cv/florence2_transformer.py +180 -0
- sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
- sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
- sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
- sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
- sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
- sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
- sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
- sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
- sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
- sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
- sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
- sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
- sparknlp/annotator/dataframe_optimizer.py +216 -0
- sparknlp/annotator/date2_chunk.py +88 -0
- sparknlp/annotator/dependency/__init__.py +17 -0
- sparknlp/annotator/dependency/dependency_parser.py +294 -0
- sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
- sparknlp/annotator/document_character_text_splitter.py +228 -0
- sparknlp/annotator/document_normalizer.py +235 -0
- sparknlp/annotator/document_token_splitter.py +175 -0
- sparknlp/annotator/document_token_splitter_test.py +85 -0
- sparknlp/annotator/embeddings/__init__.py +45 -0
- sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
- sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
- sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
- sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
- sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
- sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
- sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
- sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
- sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
- sparknlp/annotator/embeddings/doc2vec.py +352 -0
- sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
- sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
- sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
- sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
- sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
- sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
- sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
- sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
- sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
- sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
- sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
- sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
- sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
- sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
- sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
- sparknlp/annotator/embeddings/word2vec.py +353 -0
- sparknlp/annotator/embeddings/word_embeddings.py +385 -0
- sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
- sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
- sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
- sparknlp/annotator/er/__init__.py +16 -0
- sparknlp/annotator/er/entity_ruler.py +267 -0
- sparknlp/annotator/graph_extraction.py +368 -0
- sparknlp/annotator/keyword_extraction/__init__.py +16 -0
- sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
- sparknlp/annotator/ld_dl/__init__.py +16 -0
- sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
- sparknlp/annotator/lemmatizer.py +250 -0
- sparknlp/annotator/matcher/__init__.py +20 -0
- sparknlp/annotator/matcher/big_text_matcher.py +272 -0
- sparknlp/annotator/matcher/date_matcher.py +303 -0
- sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
- sparknlp/annotator/matcher/regex_matcher.py +221 -0
- sparknlp/annotator/matcher/text_matcher.py +290 -0
- sparknlp/annotator/n_gram_generator.py +141 -0
- sparknlp/annotator/ner/__init__.py +21 -0
- sparknlp/annotator/ner/ner_approach.py +94 -0
- sparknlp/annotator/ner/ner_converter.py +148 -0
- sparknlp/annotator/ner/ner_crf.py +397 -0
- sparknlp/annotator/ner/ner_dl.py +591 -0
- sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
- sparknlp/annotator/ner/ner_overwriter.py +166 -0
- sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
- sparknlp/annotator/normalizer.py +230 -0
- sparknlp/annotator/openai/__init__.py +16 -0
- sparknlp/annotator/openai/openai_completion.py +349 -0
- sparknlp/annotator/openai/openai_embeddings.py +106 -0
- sparknlp/annotator/param/__init__.py +17 -0
- sparknlp/annotator/param/classifier_encoder.py +98 -0
- sparknlp/annotator/param/evaluation_dl_params.py +130 -0
- sparknlp/annotator/pos/__init__.py +16 -0
- sparknlp/annotator/pos/perceptron.py +263 -0
- sparknlp/annotator/sentence/__init__.py +17 -0
- sparknlp/annotator/sentence/sentence_detector.py +290 -0
- sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
- sparknlp/annotator/sentiment/__init__.py +17 -0
- sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
- sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
- sparknlp/annotator/seq2seq/__init__.py +35 -0
- sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
- sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
- sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
- sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
- sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
- sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
- sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
- sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
- sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
- sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
- sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
- sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
- sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
- sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
- sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
- sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
- sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
- sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
- sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
- sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
- sparknlp/annotator/similarity/__init__.py +0 -0
- sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
- sparknlp/annotator/spell_check/__init__.py +18 -0
- sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
- sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
- sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
- sparknlp/annotator/stemmer.py +79 -0
- sparknlp/annotator/stop_words_cleaner.py +190 -0
- sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
- sparknlp/annotator/token/__init__.py +19 -0
- sparknlp/annotator/token/chunk_tokenizer.py +118 -0
- sparknlp/annotator/token/recursive_tokenizer.py +205 -0
- sparknlp/annotator/token/regex_tokenizer.py +208 -0
- sparknlp/annotator/token/tokenizer.py +561 -0
- sparknlp/annotator/token2_chunk.py +76 -0
- sparknlp/annotator/ws/__init__.py +16 -0
- sparknlp/annotator/ws/word_segmenter.py +429 -0
- sparknlp/base/__init__.py +30 -0
- sparknlp/base/audio_assembler.py +95 -0
- sparknlp/base/doc2_chunk.py +169 -0
- sparknlp/base/document_assembler.py +164 -0
- sparknlp/base/embeddings_finisher.py +201 -0
- sparknlp/base/finisher.py +217 -0
- sparknlp/base/gguf_ranking_finisher.py +234 -0
- sparknlp/base/graph_finisher.py +125 -0
- sparknlp/base/has_recursive_fit.py +24 -0
- sparknlp/base/has_recursive_transform.py +22 -0
- sparknlp/base/image_assembler.py +172 -0
- sparknlp/base/light_pipeline.py +429 -0
- sparknlp/base/multi_document_assembler.py +164 -0
- sparknlp/base/prompt_assembler.py +207 -0
- sparknlp/base/recursive_pipeline.py +107 -0
- sparknlp/base/table_assembler.py +145 -0
- sparknlp/base/token_assembler.py +124 -0
- sparknlp/common/__init__.py +26 -0
- sparknlp/common/annotator_approach.py +41 -0
- sparknlp/common/annotator_model.py +47 -0
- sparknlp/common/annotator_properties.py +114 -0
- sparknlp/common/annotator_type.py +38 -0
- sparknlp/common/completion_post_processing.py +37 -0
- sparknlp/common/coverage_result.py +22 -0
- sparknlp/common/match_strategy.py +33 -0
- sparknlp/common/properties.py +1298 -0
- sparknlp/common/read_as.py +33 -0
- sparknlp/common/recursive_annotator_approach.py +35 -0
- sparknlp/common/storage.py +149 -0
- sparknlp/common/utils.py +39 -0
- sparknlp/functions.py +315 -5
- sparknlp/internal/__init__.py +1199 -0
- sparknlp/internal/annotator_java_ml.py +32 -0
- sparknlp/internal/annotator_transformer.py +37 -0
- sparknlp/internal/extended_java_wrapper.py +63 -0
- sparknlp/internal/params_getters_setters.py +71 -0
- sparknlp/internal/recursive.py +70 -0
- sparknlp/logging/__init__.py +15 -0
- sparknlp/logging/comet.py +467 -0
- sparknlp/partition/__init__.py +16 -0
- sparknlp/partition/partition.py +244 -0
- sparknlp/partition/partition_properties.py +902 -0
- sparknlp/partition/partition_transformer.py +200 -0
- sparknlp/pretrained/__init__.py +17 -0
- sparknlp/pretrained/pretrained_pipeline.py +158 -0
- sparknlp/pretrained/resource_downloader.py +216 -0
- sparknlp/pretrained/utils.py +35 -0
- sparknlp/reader/__init__.py +15 -0
- sparknlp/reader/enums.py +19 -0
- sparknlp/reader/pdf_to_text.py +190 -0
- sparknlp/reader/reader2doc.py +124 -0
- sparknlp/reader/reader2image.py +136 -0
- sparknlp/reader/reader2table.py +44 -0
- sparknlp/reader/reader_assembler.py +159 -0
- sparknlp/reader/sparknlp_reader.py +461 -0
- sparknlp/training/__init__.py +20 -0
- sparknlp/training/_tf_graph_builders/__init__.py +0 -0
- sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
- sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
- sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
- sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
- sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
- sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
- sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
- sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
- sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
- sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
- sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
- sparknlp/training/conll.py +150 -0
- sparknlp/training/conllu.py +103 -0
- sparknlp/training/pos.py +103 -0
- sparknlp/training/pub_tator.py +76 -0
- sparknlp/training/spacy_to_annotation.py +57 -0
- sparknlp/training/tfgraphs.py +5 -0
- sparknlp/upload_to_hub.py +149 -0
- sparknlp/util.py +51 -5
- com/__init__.pyc +0 -0
- com/__pycache__/__init__.cpython-36.pyc +0 -0
- com/johnsnowlabs/__init__.pyc +0 -0
- com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
- com/johnsnowlabs/nlp/__init__.pyc +0 -0
- com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
- spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
- spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
- sparknlp/__init__.pyc +0 -0
- sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
- sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
- sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
- sparknlp/__pycache__/base.cpython-36.pyc +0 -0
- sparknlp/__pycache__/common.cpython-36.pyc +0 -0
- sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
- sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
- sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
- sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
- sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
- sparknlp/__pycache__/training.cpython-36.pyc +0 -0
- sparknlp/__pycache__/util.cpython-36.pyc +0 -0
- sparknlp/annotation.pyc +0 -0
- sparknlp/annotator.py +0 -3006
- sparknlp/annotator.pyc +0 -0
- sparknlp/base.py +0 -347
- sparknlp/base.pyc +0 -0
- sparknlp/common.py +0 -193
- sparknlp/common.pyc +0 -0
- sparknlp/embeddings.py +0 -40
- sparknlp/embeddings.pyc +0 -0
- sparknlp/internal.py +0 -288
- sparknlp/internal.pyc +0 -0
- sparknlp/pretrained.py +0 -123
- sparknlp/pretrained.pyc +0 -0
- sparknlp/storage.py +0 -32
- sparknlp/storage.pyc +0 -0
- sparknlp/training.py +0 -62
- sparknlp/training.pyc +0 -0
- sparknlp/util.pyc +0 -0
- {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
sparknlp/util.py
CHANGED
|
@@ -1,4 +1,23 @@
|
|
|
1
|
+
# Copyright 2017-2022 John Snow Labs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Contains various utilities."""
|
|
15
|
+
|
|
16
|
+
|
|
1
17
|
import sparknlp.internal as _internal
|
|
18
|
+
import numpy as np
|
|
19
|
+
from pyspark.sql import Row
|
|
20
|
+
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, BinaryType
|
|
2
21
|
|
|
3
22
|
|
|
4
23
|
def get_config_path():
|
|
@@ -7,9 +26,36 @@ def get_config_path():
|
|
|
7
26
|
|
|
8
27
|
class CoNLLGenerator:
|
|
9
28
|
@staticmethod
|
|
10
|
-
def exportConllFiles(
|
|
11
|
-
|
|
29
|
+
def exportConllFiles(*args):
|
|
30
|
+
num_args = len(args)
|
|
31
|
+
if num_args == 2:
|
|
32
|
+
_internal._CoNLLGeneratorExportFromDataFrame(*args).apply()
|
|
33
|
+
elif num_args == 3:
|
|
34
|
+
_internal._CoNLLGeneratorExportFromDataFrameAndField(*args).apply()
|
|
35
|
+
elif num_args == 4:
|
|
36
|
+
_internal._CoNLLGeneratorExportFromTargetAndPipeline(*args).apply()
|
|
37
|
+
else:
|
|
38
|
+
raise NotImplementedError(f"No exportConllFiles alternative takes {num_args} parameters")
|
|
12
39
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
40
|
+
|
|
41
|
+
class EmbeddingsDataFrameUtils:
|
|
42
|
+
"""
|
|
43
|
+
Utility for creating DataFrames compatible with multimodal embedding models (e.g., E5VEmbeddings) for text-only scenarios.
|
|
44
|
+
Provides:
|
|
45
|
+
- imageSchema: the expected schema for Spark image DataFrames
|
|
46
|
+
- emptyImageRow: a dummy image row for text-only embedding
|
|
47
|
+
"""
|
|
48
|
+
imageSchema = StructType([
|
|
49
|
+
StructField(
|
|
50
|
+
"image",
|
|
51
|
+
StructType([
|
|
52
|
+
StructField("origin", StringType(), True),
|
|
53
|
+
StructField("height", IntegerType(), True),
|
|
54
|
+
StructField("width", IntegerType(), True),
|
|
55
|
+
StructField("nChannels", IntegerType(), True),
|
|
56
|
+
StructField("mode", IntegerType(), True),
|
|
57
|
+
StructField("data", BinaryType(), True),
|
|
58
|
+
]),
|
|
59
|
+
)
|
|
60
|
+
])
|
|
61
|
+
emptyImageRow = Row(Row("", 0, 0, 0, 0, bytes()))
|
com/__init__.pyc
DELETED
|
Binary file
|
|
Binary file
|
com/johnsnowlabs/__init__.pyc
DELETED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: spark-nlp
|
|
3
|
-
Version: 2.6.3rc1
|
|
4
|
-
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
|
-
Home-page: http://nlp.johnsnowlabs.com
|
|
6
|
-
Author: John Snow Labs
|
|
7
|
-
License: UNKNOWN
|
|
8
|
-
Keywords: NLP spark development
|
|
9
|
-
Platform: UNKNOWN
|
|
10
|
-
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
-
Classifier: Intended Audience :: Developers
|
|
12
|
-
Classifier: Topic :: Software Development :: Build Tools
|
|
13
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
-
Classifier: Programming Language :: Python :: 2
|
|
15
|
-
Classifier: Programming Language :: Python :: 2.7
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.4
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.5
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.6
|
|
20
|
-
Description-Content-Type: text/markdown
|
|
21
|
-
|
|
22
|
-
# Spark-NLP
|
|
23
|
-
John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
24
|
-
|
|
25
|
-
## Requirements
|
|
26
|
-
Spark NLP is built on top of Apache Spark 2.4.4 and works with any user provided Spark 2.x.x it is advised to have basic knowledge of the framework and a working environment before using Spark NLP.
|
|
27
|
-
|
|
28
|
-
# Spark-NLP for Python
|
|
29
|
-
|
|
30
|
-
Dependencies on `python3-devel` and `wheel` python module
|
|
31
|
-
|
|
32
|
-
Build python package with `python3 setup.py sdist bdist_wheel`
|
|
33
|
-
|
|
34
|
-
Install with `python3 -m pip install --force-reinstall --user dist/spark_nlp-2.2.2-py2.py3-none-any.whl`
|
|
35
|
-
|
|
36
|
-
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
com/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
com/__init__.pyc,sha256=hLEyuwbCEI-_KJ-HaauMjaJ7_Eova3f2qlUtg7wbZrc,170
|
|
3
|
-
com/__pycache__/__init__.cpython-36.pyc,sha256=urKd549TaG3Rs3c1uaIi-IaQZECP9IOqchonF2WaDzo,166
|
|
4
|
-
com/johnsnowlabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
com/johnsnowlabs/__init__.pyc,sha256=bhTbkZlXKxXRMmbNNd2yekBL1C8FCS5hTX4-gocBSvQ,183
|
|
6
|
-
com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc,sha256=LdHBGR1z6S4-qxQSfqD4rXfTDucnAM-MLuIjERw163k,179
|
|
7
|
-
com/johnsnowlabs/nlp/__init__.py,sha256=gMSxTenGVOtckW0NxAv1P-19QIMyUrjJv5dnGX03cgQ,209
|
|
8
|
-
com/johnsnowlabs/nlp/__init__.pyc,sha256=uaxKCZLcbjekEaN71rRDNYoKsbRkBLd2-u48MN4N1DY,411
|
|
9
|
-
com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc,sha256=k4w8UDG2BbV03Lbq5FXs0gNDABbjV7RRnZ_6L21OvUk,353
|
|
10
|
-
sparknlp/__init__.py,sha256=ni0RVqb4SKfSdOGmc7-sbZhL5cMCCH4yisDd_F6fC54,3332
|
|
11
|
-
sparknlp/__init__.pyc,sha256=nVwWsqKvIazlucT_z3PIrNz2XsU_AXRP53HYP5SMFu4,3561
|
|
12
|
-
sparknlp/annotation.py,sha256=juoczyZFnFNVzsiZRKYObcINr7P_Ip2U5WmoPlsPKmk,1151
|
|
13
|
-
sparknlp/annotation.pyc,sha256=1pEno03-yOWvSALAxZmt-9YW8MVfikCn8Sco8QbWta8,2058
|
|
14
|
-
sparknlp/annotator.py,sha256=5FkjHNQo4zwqGf4nvWazfZnq0uCXotCd_fmou-TsFlI,116896
|
|
15
|
-
sparknlp/annotator.pyc,sha256=PUQNs7guLUc06EbctLHr_5qT-W5B7J4sYlcocreqcEo,148679
|
|
16
|
-
sparknlp/base.py,sha256=-85zXkkQdaDMaCNbfUU-nMSVnqW6HggOf9xcYryVVeA,14057
|
|
17
|
-
sparknlp/base.pyc,sha256=EcT6yLv3eqemHhICMkoc7Oyqadg7wBeUWAoHz2eMXPg,20729
|
|
18
|
-
sparknlp/common.py,sha256=GVykc3sXgz6Dgdi44ofra4uLpXUjXRyVwg_BaaqyCUE,6627
|
|
19
|
-
sparknlp/common.pyc,sha256=qy_9mRVn8jc0RtdgMKSUjOD19Wun_iUnn29LZ2Y6MwE,12042
|
|
20
|
-
sparknlp/embeddings.py,sha256=vzXs8ZI9_cppUxPQkhuCzLckESnL0ayRu06ZBAixeNQ,1291
|
|
21
|
-
sparknlp/embeddings.pyc,sha256=kXR7XOsmxYfCpfs5uYCYuu1vVxTWN5aFmUugRWVJ--4,2709
|
|
22
|
-
sparknlp/functions.py,sha256=NlnyhpT-tXAkkjR-ul_21wSMDalqbOdSSl9In_EHC7I,953
|
|
23
|
-
sparknlp/internal.py,sha256=LB5_3kQkBSrttbda4sP3ep4RDm6fjAuoXjw_2vOZGXo,11379
|
|
24
|
-
sparknlp/internal.pyc,sha256=kGVic2zcIkrE9Pxf9RdOGhGv97IQ_YcSXTvhO91HsSY,22311
|
|
25
|
-
sparknlp/pretrained.py,sha256=-SOiKd_1Su2d2QdN4wnmJy6_e9jN2cOSxIk8GXNsGDE,4517
|
|
26
|
-
sparknlp/pretrained.pyc,sha256=KXdsluyIkxsYZXQmTMHU57yso1HLpqB1_hCLxaNqmew,6663
|
|
27
|
-
sparknlp/storage.py,sha256=X4UNOUxt5Ia2FDFSZHA1xxArvKNjD8u2xMCa2kBO_xI,872
|
|
28
|
-
sparknlp/storage.pyc,sha256=q03gTcxlAfq35ejKIIlUpsBZca3dGJaAy0vdhi5LcJI,2063
|
|
29
|
-
sparknlp/training.py,sha256=6T5RYlrv7DQ9KpxpRZnN5RHjkkj13rLeD4Qi-4eLDrc,2215
|
|
30
|
-
sparknlp/training.pyc,sha256=oPZ6qCF6DDXmOPzm0SmR7JUdvXRk2zTDiUdtnNlm2Ts,3222
|
|
31
|
-
sparknlp/util.py,sha256=J5XhwhnaslqdtFQK6Sd9p4MTJsTC3su9yE3SBzqik2w,455
|
|
32
|
-
sparknlp/util.pyc,sha256=WbmCQ3jssVdZNYhhqmkfEV0OTX7ezmFHPjQ91OnfuSc,1275
|
|
33
|
-
sparknlp/__pycache__/__init__.cpython-36.pyc,sha256=_TjoLI9fCqNQN8MdK86RgkFYh16KQZRBn1z_uoyBMiE,2888
|
|
34
|
-
sparknlp/__pycache__/annotation.cpython-36.pyc,sha256=8Cs2Xl2qQ_YbQ83wVrgE4hPiafq4yQiuEGd3HgZ9sMw,1425
|
|
35
|
-
sparknlp/__pycache__/annotator.cpython-36.pyc,sha256=9xhdxdwc2JsOzI_etgMQG3_arI180BnZnpdmNH8PgJE,112153
|
|
36
|
-
sparknlp/__pycache__/base.cpython-36.pyc,sha256=uKQ9gTFZzch9tUUBxfFmmq_Z9BOoqp6V9ZHW9Lcx0h0,15366
|
|
37
|
-
sparknlp/__pycache__/common.cpython-36.pyc,sha256=hHNRFQd4F8Evq-Oc22-Y2O1C37nIryR1_bii841zCh8,8634
|
|
38
|
-
sparknlp/__pycache__/embeddings.cpython-36.pyc,sha256=Y8KqPtGmEfJaOUFrFDvRIGlmzdlA0hXJoy2IS6kNH40,1948
|
|
39
|
-
sparknlp/__pycache__/functions.cpython-36.pyc,sha256=T0F-HDDZWFwfijd-cyVxZgZH79-XGVbt9Q0AFI6aHo8,1646
|
|
40
|
-
sparknlp/__pycache__/internal.cpython-36.pyc,sha256=1a0YbyewntnY6mJifOpwjMH2QXsATL-8UXyA8ngF6O4,16122
|
|
41
|
-
sparknlp/__pycache__/pretrained.cpython-36.pyc,sha256=WuvTW4FbpY3yj__2ou851z8XF2bKAP9mQ9L-sVlR5bY,4981
|
|
42
|
-
sparknlp/__pycache__/storage.cpython-36.pyc,sha256=qlrlSHWXN2bRJ5cUTWNzb4xk2I4-IkBVIvs-OU0DRP8,1494
|
|
43
|
-
sparknlp/__pycache__/training.cpython-36.pyc,sha256=V0JrDYafhBQ7K17cwzNdNy7dEQPIm1jABg5CJEgs5gU,2361
|
|
44
|
-
sparknlp/__pycache__/util.cpython-36.pyc,sha256=oE_Rgp3Eu3mTqoeAvXiMGm2XUdrwEnlTNyzhRHeIWAs,889
|
|
45
|
-
spark_nlp-2.6.3rc1.dist-info/METADATA,sha256=3nht_gCw84Wr3P0vkOm-440Y4ZOzBBMEu2x4_4e0tuI,1707
|
|
46
|
-
spark_nlp-2.6.3rc1.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
|
|
47
|
-
spark_nlp-2.6.3rc1.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
|
|
48
|
-
spark_nlp-2.6.3rc1.dist-info/RECORD,,
|
sparknlp/__init__.pyc
DELETED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
sparknlp/annotation.pyc
DELETED
|
Binary file
|