spark-nlp 4.2.6__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. spark_nlp-6.2.1.dist-info/METADATA +362 -0
  4. spark_nlp-6.2.1.dist-info/RECORD +292 -0
  5. {spark_nlp-4.2.6.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
  6. sparknlp/__init__.py +81 -28
  7. sparknlp/annotation.py +3 -2
  8. sparknlp/annotator/__init__.py +6 -0
  9. sparknlp/annotator/audio/__init__.py +2 -0
  10. sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
  11. sparknlp/annotator/audio/wav2vec2_for_ctc.py +14 -14
  12. sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
  13. sparknlp/{base → annotator}/chunk2_doc.py +4 -7
  14. sparknlp/annotator/chunker.py +1 -2
  15. sparknlp/annotator/classifier_dl/__init__.py +17 -0
  16. sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  17. sparknlp/annotator/classifier_dl/albert_for_question_answering.py +3 -15
  18. sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +4 -18
  19. sparknlp/annotator/classifier_dl/albert_for_token_classification.py +3 -17
  20. sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  21. sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  22. sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
  23. sparknlp/annotator/classifier_dl/bert_for_question_answering.py +6 -20
  24. sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +3 -17
  25. sparknlp/annotator/classifier_dl/bert_for_token_classification.py +3 -17
  26. sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
  27. sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
  28. sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +5 -19
  29. sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +5 -19
  30. sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  31. sparknlp/annotator/classifier_dl/classifier_dl.py +4 -4
  32. sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +3 -17
  33. sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +4 -19
  34. sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +5 -21
  35. sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
  36. sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +3 -17
  37. sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +4 -18
  38. sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +3 -17
  39. sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
  40. sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  41. sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +3 -17
  42. sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +4 -18
  43. sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +3 -17
  44. sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
  45. sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
  46. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  47. sparknlp/annotator/classifier_dl/multi_classifier_dl.py +3 -3
  48. sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  49. sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +3 -17
  50. sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +4 -18
  51. sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +1 -1
  52. sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
  53. sparknlp/annotator/classifier_dl/sentiment_dl.py +4 -4
  54. sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +2 -2
  55. sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  56. sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +3 -17
  57. sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +4 -18
  58. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +6 -20
  59. sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
  60. sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +4 -18
  61. sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +3 -17
  62. sparknlp/annotator/cleaners/__init__.py +15 -0
  63. sparknlp/annotator/cleaners/cleaner.py +202 -0
  64. sparknlp/annotator/cleaners/extractor.py +191 -0
  65. sparknlp/annotator/coref/spanbert_coref.py +4 -18
  66. sparknlp/annotator/cv/__init__.py +15 -0
  67. sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
  68. sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
  69. sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
  70. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  71. sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
  72. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  73. sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
  74. sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  75. sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  76. sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  77. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  78. sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  79. sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
  80. sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
  81. sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
  82. sparknlp/annotator/cv/vit_for_image_classification.py +36 -4
  83. sparknlp/annotator/dataframe_optimizer.py +216 -0
  84. sparknlp/annotator/date2_chunk.py +88 -0
  85. sparknlp/annotator/dependency/dependency_parser.py +2 -3
  86. sparknlp/annotator/dependency/typed_dependency_parser.py +3 -4
  87. sparknlp/annotator/document_character_text_splitter.py +228 -0
  88. sparknlp/annotator/document_normalizer.py +37 -1
  89. sparknlp/annotator/document_token_splitter.py +175 -0
  90. sparknlp/annotator/document_token_splitter_test.py +85 -0
  91. sparknlp/annotator/embeddings/__init__.py +11 -0
  92. sparknlp/annotator/embeddings/albert_embeddings.py +4 -18
  93. sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
  94. sparknlp/annotator/embeddings/bert_embeddings.py +9 -22
  95. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +12 -24
  96. sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
  97. sparknlp/annotator/embeddings/camembert_embeddings.py +4 -20
  98. sparknlp/annotator/embeddings/chunk_embeddings.py +1 -2
  99. sparknlp/annotator/embeddings/deberta_embeddings.py +2 -16
  100. sparknlp/annotator/embeddings/distil_bert_embeddings.py +5 -19
  101. sparknlp/annotator/embeddings/doc2vec.py +7 -1
  102. sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
  103. sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
  104. sparknlp/annotator/embeddings/elmo_embeddings.py +2 -2
  105. sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
  106. sparknlp/annotator/embeddings/longformer_embeddings.py +3 -17
  107. sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
  108. sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
  109. sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  110. sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  111. sparknlp/annotator/embeddings/roberta_embeddings.py +9 -21
  112. sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +7 -21
  113. sparknlp/annotator/embeddings/sentence_embeddings.py +2 -3
  114. sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  115. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  116. sparknlp/annotator/embeddings/universal_sentence_encoder.py +3 -3
  117. sparknlp/annotator/embeddings/word2vec.py +7 -1
  118. sparknlp/annotator/embeddings/word_embeddings.py +4 -5
  119. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +9 -21
  120. sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +7 -21
  121. sparknlp/annotator/embeddings/xlnet_embeddings.py +4 -18
  122. sparknlp/annotator/er/entity_ruler.py +37 -23
  123. sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +2 -3
  124. sparknlp/annotator/ld_dl/language_detector_dl.py +2 -2
  125. sparknlp/annotator/lemmatizer.py +3 -4
  126. sparknlp/annotator/matcher/date_matcher.py +35 -3
  127. sparknlp/annotator/matcher/multi_date_matcher.py +1 -2
  128. sparknlp/annotator/matcher/regex_matcher.py +3 -3
  129. sparknlp/annotator/matcher/text_matcher.py +2 -3
  130. sparknlp/annotator/n_gram_generator.py +1 -2
  131. sparknlp/annotator/ner/__init__.py +3 -1
  132. sparknlp/annotator/ner/ner_converter.py +18 -0
  133. sparknlp/annotator/ner/ner_crf.py +4 -5
  134. sparknlp/annotator/ner/ner_dl.py +10 -5
  135. sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
  136. sparknlp/annotator/ner/ner_overwriter.py +2 -2
  137. sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
  138. sparknlp/annotator/normalizer.py +2 -2
  139. sparknlp/annotator/openai/__init__.py +16 -0
  140. sparknlp/annotator/openai/openai_completion.py +349 -0
  141. sparknlp/annotator/openai/openai_embeddings.py +106 -0
  142. sparknlp/annotator/pos/perceptron.py +6 -7
  143. sparknlp/annotator/sentence/sentence_detector.py +2 -2
  144. sparknlp/annotator/sentence/sentence_detector_dl.py +3 -3
  145. sparknlp/annotator/sentiment/sentiment_detector.py +4 -5
  146. sparknlp/annotator/sentiment/vivekn_sentiment.py +4 -5
  147. sparknlp/annotator/seq2seq/__init__.py +17 -0
  148. sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
  149. sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
  150. sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
  151. sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
  152. sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  153. sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  154. sparknlp/annotator/seq2seq/gpt2_transformer.py +1 -1
  155. sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
  156. sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  157. sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
  158. sparknlp/annotator/seq2seq/marian_transformer.py +124 -3
  159. sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
  160. sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  161. sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  162. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  163. sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  164. sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  165. sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
  166. sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  167. sparknlp/annotator/seq2seq/t5_transformer.py +54 -4
  168. sparknlp/annotator/similarity/__init__.py +0 -0
  169. sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
  170. sparknlp/annotator/spell_check/context_spell_checker.py +116 -17
  171. sparknlp/annotator/spell_check/norvig_sweeting.py +3 -6
  172. sparknlp/annotator/spell_check/symmetric_delete.py +1 -1
  173. sparknlp/annotator/stemmer.py +2 -3
  174. sparknlp/annotator/stop_words_cleaner.py +3 -4
  175. sparknlp/annotator/tf_ner_dl_graph_builder.py +1 -1
  176. sparknlp/annotator/token/__init__.py +0 -1
  177. sparknlp/annotator/token/recursive_tokenizer.py +2 -3
  178. sparknlp/annotator/token/tokenizer.py +2 -3
  179. sparknlp/annotator/ws/word_segmenter.py +35 -10
  180. sparknlp/base/__init__.py +2 -3
  181. sparknlp/base/doc2_chunk.py +0 -3
  182. sparknlp/base/document_assembler.py +5 -5
  183. sparknlp/base/embeddings_finisher.py +14 -2
  184. sparknlp/base/finisher.py +15 -4
  185. sparknlp/base/gguf_ranking_finisher.py +234 -0
  186. sparknlp/base/image_assembler.py +69 -0
  187. sparknlp/base/light_pipeline.py +53 -21
  188. sparknlp/base/multi_document_assembler.py +9 -13
  189. sparknlp/base/prompt_assembler.py +207 -0
  190. sparknlp/base/token_assembler.py +1 -2
  191. sparknlp/common/__init__.py +2 -0
  192. sparknlp/common/annotator_type.py +1 -0
  193. sparknlp/common/completion_post_processing.py +37 -0
  194. sparknlp/common/match_strategy.py +33 -0
  195. sparknlp/common/properties.py +914 -9
  196. sparknlp/internal/__init__.py +841 -116
  197. sparknlp/internal/annotator_java_ml.py +1 -1
  198. sparknlp/internal/annotator_transformer.py +3 -0
  199. sparknlp/logging/comet.py +2 -2
  200. sparknlp/partition/__init__.py +16 -0
  201. sparknlp/partition/partition.py +244 -0
  202. sparknlp/partition/partition_properties.py +902 -0
  203. sparknlp/partition/partition_transformer.py +200 -0
  204. sparknlp/pretrained/pretrained_pipeline.py +1 -1
  205. sparknlp/pretrained/resource_downloader.py +126 -2
  206. sparknlp/reader/__init__.py +15 -0
  207. sparknlp/reader/enums.py +19 -0
  208. sparknlp/reader/pdf_to_text.py +190 -0
  209. sparknlp/reader/reader2doc.py +124 -0
  210. sparknlp/reader/reader2image.py +136 -0
  211. sparknlp/reader/reader2table.py +44 -0
  212. sparknlp/reader/reader_assembler.py +159 -0
  213. sparknlp/reader/sparknlp_reader.py +461 -0
  214. sparknlp/training/__init__.py +1 -0
  215. sparknlp/training/conll.py +8 -2
  216. sparknlp/training/spacy_to_annotation.py +57 -0
  217. sparknlp/util.py +26 -0
  218. spark_nlp-4.2.6.dist-info/METADATA +0 -1256
  219. spark_nlp-4.2.6.dist-info/RECORD +0 -196
  220. {spark_nlp-4.2.6.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
  221. /sparknlp/annotator/{token/token2_chunk.py → token2_chunk.py} +0 -0
@@ -0,0 +1,426 @@
1
+ # Copyright 2017-2024 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from sparknlp.common import *
16
+
17
+ class SmolVLMTransformer(AnnotatorModel,
18
+ HasBatchedAnnotateImage,
19
+ HasImageFeatureProperties,
20
+ HasEngine,
21
+ HasCandidateLabelsProperties,
22
+ HasRescaleFactor):
23
+ """
24
+ SmolVLMTransformer can load SmolVLM models for visual question answering. The model
25
+ consists of a vision encoder, a text encoder as well as a text decoder. The vision encoder
26
+ will encode the input image, the text encoder will encode the input question together with the
27
+ encoding of the image, and the text decoder will output the answer to the question.
28
+
29
+ SmolVLM is a compact open multimodal model that accepts arbitrary sequences of image and text
30
+ inputs to produce text outputs. Designed for efficiency, SmolVLM can answer questions about images,
31
+ describe visual content, create stories grounded on multiple images, or function as a pure language
32
+ model without visual inputs. Its lightweight architecture makes it suitable for on-device applications
33
+ while maintaining strong performance on multimodal tasks.
34
+
35
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion object:
36
+ >>> visualQA = SmolVLMTransformer.pretrained() \\
37
+ ... .setInputCols(["image_assembler"]) \\
38
+ ... .setOutputCol("answer")
39
+
40
+ The default model is `"smolvlm_instruct_int4"`, if no name is provided.
41
+ For available pretrained models, refer to the `Models Hub
42
+ <https://sparknlp.org/models?task=Question+Answering>`__.
43
+
44
+ Models from the HuggingFace 🧧 Transformers library are also compatible with Spark NLP 🚀.
45
+ To check compatibility and learn how to import them, see `Import Transformers into Spark NLP 🚀
46
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
47
+ For extended examples, refer to the `SmolVLMTransformer Test Suite
48
+ <https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/SmolVLMTransformerTest.scala>`_.
49
+
50
+ ====================== ======================
51
+ Input Annotation types Output Annotation type
52
+ ====================== ======================
53
+ ``IMAGE`` ``DOCUMENT``
54
+ ====================== ======================
55
+
56
+ Parameters
57
+ ----------
58
+ batchSize : int, optional
59
+ Batch size. Larger values allow faster processing but require more memory,
60
+ by default 1.
61
+ configProtoBytes : bytes, optional
62
+ ConfigProto from TensorFlow, serialized into a byte array.
63
+ maxSentenceLength : int, optional
64
+ Maximum sentence length to process, by default 20.
65
+ doImageSplitting : bool, optional
66
+ Whether to split the image, by default True.
67
+ imageToken : int, optional
68
+ Token ID for image embeddings, by default 49153.
69
+ numVisionTokens : int, optional
70
+ Number of vision tokens, by default 81.
71
+ maxImageSize : int, optional
72
+ Maximum image size for the model, by default 384.
73
+ patchSize : int, optional
74
+ Patch size for the model, by default 14.
75
+ paddingConstant : int, optional
76
+ Padding constant for the model, by default 0.
77
+
78
+ Examples
79
+ --------
80
+ >>> import sparknlp
81
+ >>> from sparknlp.base import *
82
+ >>> from sparknlp.annotator import *
83
+ >>> from pyspark.ml import Pipeline
84
+ >>> from pyspark.sql.functions import lit
85
+ >>> imageDF = spark.read.format("image").load(path=images_path)
86
+ >>> testDF = imageDF.withColumn(
87
+ ... "text",
88
+ ... lit("<|im_start|>User:<image>Can you describe the image?<end_of_utterance>\\nAssistant:")
89
+ ... )
90
+ >>> imageAssembler = ImageAssembler() \\
91
+ ... .setInputCol("image") \\
92
+ ... .setOutputCol("image_assembler")
93
+ >>> visualQAClassifier = SmolVLMTransformer.pretrained() \\
94
+ ... .setInputCols("image_assembler") \\
95
+ ... .setOutputCol("answer")
96
+ >>> pipeline = Pipeline().setStages([
97
+ ... imageAssembler,
98
+ ... visualQAClassifier
99
+ ... ])
100
+ >>> result = pipeline.fit(testDF).transform(testDF)
101
+ >>> result.select("image_assembler.origin", "answer.result").show(truncate=False)
102
+ +--------------------------------------+----------------------------------------------------------------------+
103
+ |origin |result |
104
+ +--------------------------------------+----------------------------------------------------------------------+
105
+ |[file:///content/images/cat_image.jpg]|[The unusual aspect of this picture is the presence of two cats lying on a pink couch]|
106
+ +--------------------------------------+----------------------------------------------------------------------+
107
+ """
108
+
109
+ name = "SmolVLMTransformer"
110
+
111
+ inputAnnotatorTypes = [AnnotatorType.IMAGE]
112
+
113
+ outputAnnotatorType = AnnotatorType.DOCUMENT
114
+
115
+ minOutputLength = Param(Params._dummy(), "minOutputLength", "Minimum length of the sequence to be generated",
116
+ typeConverter=TypeConverters.toInt)
117
+
118
+ maxOutputLength = Param(Params._dummy(), "maxOutputLength", "Maximum length of output text",
119
+ typeConverter=TypeConverters.toInt)
120
+
121
+ doSample = Param(Params._dummy(), "doSample", "Whether or not to use sampling; use greedy decoding otherwise",
122
+ typeConverter=TypeConverters.toBoolean)
123
+
124
+ temperature = Param(Params._dummy(), "temperature", "The value used to module the next token probabilities",
125
+ typeConverter=TypeConverters.toFloat)
126
+
127
+ topK = Param(Params._dummy(), "topK",
128
+ "The number of highest probability vocabulary tokens to keep for top-k-filtering",
129
+ typeConverter=TypeConverters.toInt)
130
+
131
+ topP = Param(Params._dummy(), "topP",
132
+ "If set to float < 1, only the most probable tokens with probabilities that add up to ``top_p`` or higher are kept for generation",
133
+ typeConverter=TypeConverters.toFloat)
134
+
135
+ repetitionPenalty = Param(Params._dummy(), "repetitionPenalty",
136
+ "The parameter for repetition penalty. 1.0 means no penalty. See `this paper <https://arxiv.org/pdf/1909.05858.pdf>`__ for more details",
137
+ typeConverter=TypeConverters.toFloat)
138
+
139
+ noRepeatNgramSize = Param(Params._dummy(), "noRepeatNgramSize",
140
+ "If set to int > 0, all ngrams of that size can only occur once",
141
+ typeConverter=TypeConverters.toInt)
142
+
143
+ ignoreTokenIds = Param(Params._dummy(), "ignoreTokenIds",
144
+ "A list of token ids which are ignored in the decoder's output",
145
+ typeConverter=TypeConverters.toListInt)
146
+
147
+ beamSize = Param(Params._dummy(), "beamSize",
148
+ "The Number of beams for beam search.",
149
+ typeConverter=TypeConverters.toInt)
150
+
151
+ stopTokenIds = Param(Params._dummy(), "stopTokenIds",
152
+ "Stop tokens to terminate the generation",
153
+ typeConverter=TypeConverters.toListInt)
154
+
155
+ imageToken = Param(Params._dummy(), "imageToken",
156
+ "Token id for image embeddings",
157
+ typeConverter=TypeConverters.toInt)
158
+
159
+ numVisionTokens = Param(Params._dummy(), "numVisionTokens",
160
+ "Number of vision tokens",
161
+ typeConverter=TypeConverters.toInt)
162
+
163
+ maxImageSize = Param(Params._dummy(), "maxImageSize",
164
+ "Maximum image size for the model",
165
+ typeConverter=TypeConverters.toInt)
166
+
167
+ patchSize = Param(Params._dummy(), "patchSize",
168
+ "Patch size for the model",
169
+ typeConverter=TypeConverters.toInt)
170
+
171
+ paddingConstant = Param(Params._dummy(), "paddingConstant",
172
+ "Padding constant for the model",
173
+ typeConverter=TypeConverters.toInt)
174
+
175
+ doImageSplitting = Param(Params._dummy(), "doImageSplitting",
176
+ "Whether to split the image",
177
+ typeConverter=TypeConverters.toBoolean)
178
+
179
+ def setMaxSentenceSize(self, value):
180
+ """Sets Maximum sentence length that the annotator will process, by
181
+ default 20.
182
+ Parameters
183
+ ----------
184
+ value : int
185
+ Maximum sentence length that the annotator will process
186
+ """
187
+ return self._set(maxSentenceLength=value)
188
+
189
+ def setIgnoreTokenIds(self, value):
190
+ """A list of token ids which are ignored in the decoder's output.
191
+ Parameters
192
+ ----------
193
+ value : List[int]
194
+ The words to be filtered out
195
+ """
196
+ return self._set(ignoreTokenIds=value)
197
+
198
+ def setStopTokenIds(self, value):
199
+ """Stop tokens to terminate the generation.
200
+ Parameters
201
+ ----------
202
+ value : List[int]
203
+ The tokens that terminate generation
204
+ """
205
+ return self._set(stopTokenIds=value)
206
+
207
+ def setConfigProtoBytes(self, b):
208
+ """Sets configProto from tensorflow, serialized into byte array.
209
+ Parameters
210
+ ----------
211
+ b : List[int]
212
+ ConfigProto from tensorflow, serialized into byte array
213
+ """
214
+ return self._set(configProtoBytes=b)
215
+
216
+ def setMinOutputLength(self, value):
217
+ """Sets minimum length of the sequence to be generated.
218
+ Parameters
219
+ ----------
220
+ value : int
221
+ Minimum length of the sequence to be generated
222
+ """
223
+ return self._set(minOutputLength=value)
224
+
225
+ def setMaxOutputLength(self, value):
226
+ """Sets maximum length of output text.
227
+ Parameters
228
+ ----------
229
+ value : int
230
+ Maximum length of output text
231
+ """
232
+ return self._set(maxOutputLength=value)
233
+
234
+ def setDoSample(self, value):
235
+ """Sets whether or not to use sampling, use greedy decoding otherwise.
236
+ Parameters
237
+ ----------
238
+ value : bool
239
+ Whether or not to use sampling; use greedy decoding otherwise
240
+ """
241
+ return self._set(doSample=value)
242
+
243
+ def setTemperature(self, value):
244
+ """Sets the value used to module the next token probabilities.
245
+ Parameters
246
+ ----------
247
+ value : float
248
+ The value used to module the next token probabilities
249
+ """
250
+ return self._set(temperature=value)
251
+
252
+ def setTopK(self, value):
253
+ """Sets the number of highest probability vocabulary tokens to keep for
254
+ top-k-filtering.
255
+ Parameters
256
+ ----------
257
+ value : int
258
+ Number of highest probability vocabulary tokens to keep
259
+ """
260
+ return self._set(topK=value)
261
+
262
+ def setTopP(self, value):
263
+ """Sets the top cumulative probability for vocabulary tokens.
264
+ If set to float < 1, only the most probable tokens with probabilities
265
+ that add up to ``topP`` or higher are kept for generation.
266
+ Parameters
267
+ ----------
268
+ value : float
269
+ Cumulative probability for vocabulary tokens
270
+ """
271
+ return self._set(topP=value)
272
+
273
+ def setRepetitionPenalty(self, value):
274
+ """Sets the parameter for repetition penalty. 1.0 means no penalty.
275
+ Parameters
276
+ ----------
277
+ value : float
278
+ The repetition penalty
279
+ References
280
+ ----------
281
+ See `Ctrl: A Conditional Transformer Language Model For Controllable
282
+ Generation <https://arxiv.org/pdf/1909.05858.pdf>`__ for more details.
283
+ """
284
+ return self._set(repetitionPenalty=value)
285
+
286
+ def setNoRepeatNgramSize(self, value):
287
+ """Sets size of n-grams that can only occur once.
288
+ If set to int > 0, all ngrams of that size can only occur once.
289
+ Parameters
290
+ ----------
291
+ value : int
292
+ N-gram size can only occur once
293
+ """
294
+ return self._set(noRepeatNgramSize=value)
295
+
296
+ def setBeamSize(self, value):
297
+ """Sets the number of beam size for beam search, by default `1`.
298
+ Parameters
299
+ ----------
300
+ value : int
301
+ Number of beam size for beam search
302
+ """
303
+ return self._set(beamSize=value)
304
+
305
+ def setImageToken(self, value):
306
+ """Sets the token ID for image embeddings.
307
+ Parameters
308
+ ----------
309
+ value : int
310
+ Token ID for image embeddings
311
+ """
312
+ return self._set(imageToken=value)
313
+
314
+ def setNumVisionTokens(self, value):
315
+ """Sets the number of vision tokens.
316
+ Parameters
317
+ ----------
318
+ value : int
319
+ Number of vision tokens
320
+ """
321
+ return self._set(numVisionTokens=value)
322
+
323
+ def setMaxImageSize(self, value):
324
+ """Sets the maximum image size for the model.
325
+ Parameters
326
+ ----------
327
+ value : int
328
+ Maximum image size
329
+ """
330
+ return self._set(maxImageSize=value)
331
+
332
+ def setPatchSize(self, value):
333
+ """Sets the patch size for the model.
334
+ Parameters
335
+ ----------
336
+ value : int
337
+ Patch size
338
+ """
339
+ return self._set(patchSize=value)
340
+
341
+ def setPaddingConstant(self, value):
342
+ """Sets the padding constant for the model.
343
+ Parameters
344
+ ----------
345
+ value : int
346
+ Padding constant
347
+ """
348
+ return self._set(paddingConstant=value)
349
+
350
+ def setDoImageSplitting(self, value):
351
+ """Sets whether to split the image.
352
+ Parameters
353
+ ----------
354
+ value : bool
355
+ Whether to split the image
356
+ """
357
+ return self._set(doImageSplitting=value)
358
+
359
+ @keyword_only
360
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.cv.SmolVLMTransformer",
361
+ java_model=None):
362
+ super(SmolVLMTransformer, self).__init__(
363
+ classname=classname,
364
+ java_model=java_model
365
+ )
366
+ self._setDefault(
367
+ batchSize=1,
368
+ minOutputLength=0,
369
+ maxOutputLength=20,
370
+ doSample=False,
371
+ temperature=0.6,
372
+ topK=-1,
373
+ topP=0.9,
374
+ repetitionPenalty=1.0,
375
+ noRepeatNgramSize=3,
376
+ ignoreTokenIds=[],
377
+ beamSize=1,
378
+ stopTokenIds=[49154],
379
+ imageToken=49153,
380
+ numVisionTokens=81,
381
+ maxImageSize=384,
382
+ patchSize=14,
383
+ paddingConstant=0,
384
+ doImageSplitting=True
385
+ )
386
+
387
+ @staticmethod
388
+ def loadSavedModel(folder, spark_session, use_openvino=False):
389
+ """Loads a locally saved model.
390
+ Parameters
391
+ ----------
392
+ folder : str
393
+ Folder of the saved model
394
+ spark_session : pyspark.sql.SparkSession
395
+ The current SparkSession
396
+ use_openvino : bool, optional
397
+ Whether to use OpenVINO for inference, by default False
398
+ Returns
399
+ -------
400
+ SmolVLMTransformer
401
+ The restored model
402
+ """
403
+ from sparknlp.internal import _SmolVLMTransformerLoader
404
+ jModel = _SmolVLMTransformerLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
405
+ return SmolVLMTransformer(java_model=jModel)
406
+
407
+ @staticmethod
408
+ def pretrained(name="smolvlm_instruct_int4", lang="en", remote_loc=None):
409
+ """Downloads and loads a pretrained model.
410
+ Parameters
411
+ ----------
412
+ name : str, optional
413
+ Name of the pretrained model, by default
414
+ "smolvlm_instruct_int4"
415
+ lang : str, optional
416
+ Language of the pretrained model, by default "en"
417
+ remote_loc : str, optional
418
+ Optional remote address of the resource, by default None. Will use
419
+ Spark NLPs repositories otherwise.
420
+ Returns
421
+ -------
422
+ SmolVLMTransformer
423
+ The restored model
424
+ """
425
+ from sparknlp.pretrained import ResourceDownloader
426
+ return ResourceDownloader.downloadModel(SmolVLMTransformer, name, lang, remote_loc)
@@ -0,0 +1,242 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Contains classes concerning SwinForImageClassification."""
16
+
17
+ from sparknlp.common import *
18
+
19
+
20
+ class SwinForImageClassification(AnnotatorModel,
21
+ HasBatchedAnnotateImage,
22
+ HasImageFeatureProperties,
23
+ HasRescaleFactor,
24
+ HasEngine):
25
+ """SwinImageClassification is an image classifier based on Swin.
26
+
27
+ The Swin Transformer was proposed in Swin Transformer: Hierarchical Vision
28
+ Transformer using Shifted Windows by Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan
29
+ Wei, Zheng Zhang, Stephen Lin, Baining Guo.
30
+
31
+ It is basically a hierarchical Transformer whose representation is computed with
32
+ shifted windows. The shifted windowing scheme brings greater efficiency by limiting
33
+ self-attention computation to non-overlapping local windows while also allowing for
34
+ cross-window connection.
35
+
36
+ .. code-block:: python
37
+
38
+ imageClassifier = SwinForImageClassification.pretrained() \\
39
+ .setInputCols(["image_assembler"]) \\
40
+ .setOutputCol("class")
41
+
42
+
43
+ The default model is ``"image_classifier_swin_base_patch4_window7_224"``, if no name is
44
+ provided.
45
+
46
+ For available pretrained models please see the
47
+ `Models Hub <https://sparknlp.org/models?task=Image+Classification>`__.
48
+
49
+ Models from the HuggingFace 🤗 Transformers library are also compatible with Spark
50
+ NLP 🚀. To see which models are compatible and how to import them see
51
+ https://github.com/JohnSnowLabs/spark-nlp/discussions/5669 and to see more extended
52
+ examples, see
53
+ `SwinForImageClassificationTest <https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/SwinForImageClassificationTest.scala>`__.
54
+
55
+ ====================== ======================
56
+ Input Annotation types Output Annotation type
57
+ ====================== ======================
58
+ ``IMAGE`` ``CATEGORY``
59
+ ====================== ======================
60
+
61
+ **Paper Abstract:**
62
+
63
+ *This paper presents a new vision Transformer, called Swin Transformer, that capably
64
+ serves as a general-purpose backbone for computer vision. Challenges in adapting
65
+ Transformer from language to vision arise from differences between the two domains,
66
+ such as large variations in the scale of visual entities and the high resolution of
67
+ pixels in images compared to words in text. To address these differences, we
68
+ propose a hierarchical Transformer whose representation is computed with Shifted
69
+ windows. The shifted windowing scheme brings greater efficiency by limiting
70
+ self-attention computation to non-overlapping local windows while also allowing for
71
+ cross-window connection. This hierarchical architecture has the flexibility to
72
+ model at various scales and has linear computational complexity with respect to
73
+ image size. These qualities of Swin Transformer make it compatible with a broad
74
+ range of vision tasks, including image classification (87.3 top-1 accuracy on
75
+ ImageNet-1K) and dense prediction tasks such as object detection (58.7 box AP and
76
+ 51.1 mask AP on COCO test- dev) and semantic segmentation (53.5 mIoU on ADE20K
77
+ val). Its performance surpasses the previous state-of-the- art by a large margin of
78
+ +2.7 box AP and +2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the
79
+ potential of Transformer-based models as vision backbones. The hierarchical design
80
+ and the shifted window approach also prove beneficial for all-MLP architectures.*
81
+
82
+ References
83
+ ----------
84
+
85
+ `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows
86
+ <https://arxiv.org/pdf/2103.14030.pdf>`__
87
+
88
+ Parameters
89
+ ----------
90
+ doResize
91
+ Whether to resize the input to a certain size
92
+ doNormalize
93
+ Whether to normalize the input with mean and standard deviation
94
+ featureExtractorType
95
+ Name of model's architecture for feature extraction
96
+ imageMean
97
+ The sequence of means for each channel, to be used when normalizing images
98
+ imageStd
99
+ The sequence of standard deviations for each channel, to be used when normalizing images
100
+ resample
101
+ An optional resampling filter. This can be one of `PIL.Image.NEAREST`, `PIL.Image.BILINEAR` or
102
+ `PIL.Image.BICUBIC`. Only has an effect if do_resize is set to True.
103
+ size
104
+ Resize the input to the given size. If a tuple is provided, it should be (width, height). If only an integer is
105
+ provided, then the input will be resized to (size, size). Only has an effect if do_resize is set to True.
106
+ doRescale
107
+ Whether to rescale the image values by rescaleFactor
108
+ rescaleFactor
109
+ Factor to scale the image values
110
+ configProtoBytes
111
+ ConfigProto from tensorflow, serialized into byte array.
112
+
113
+ Examples
114
+ --------
115
+ >>> import sparknlp
116
+ >>> from sparknlp.base import *
117
+ >>> from sparknlp.annotator import *
118
+ >>> from pyspark.ml import Pipeline
119
+ >>> imageDF = spark.read \\
120
+ ... .format("image") \\
121
+ ... .option("dropInvalid", value = True) \\
122
+ ... .load("src/test/resources/image/")
123
+ >>> imageAssembler = ImageAssembler() \\
124
+ ... .setInputCol("image") \\
125
+ ... .setOutputCol("image_assembler")
126
+ >>> imageClassifier = SwinForImageClassification \\
127
+ ... .pretrained() \\
128
+ ... .setInputCols(["image_assembler"]) \\
129
+ ... .setOutputCol("class")
130
+ >>> pipeline = Pipeline().setStages([imageAssembler, imageClassifier])
131
+ >>> pipelineDF = pipeline.fit(imageDF).transform(imageDF)
132
+ >>> pipelineDF \\
133
+ ... .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "class.result") \\
134
+ ... .show(truncate=False)
135
+ +-----------------+----------------------------------------------------------+
136
+ |image_name |result |
137
+ +-----------------+----------------------------------------------------------+
138
+ |palace.JPEG |[palace] |
139
+ |egyptian_cat.jpeg|[tabby, tabby cat] |
140
+ |hippopotamus.JPEG|[hippopotamus, hippo, river horse, Hippopotamus amphibius]|
141
+ |hen.JPEG |[hen] |
142
+ |ostrich.JPEG |[ostrich, Struthio camelus] |
143
+ |junco.JPEG |[junco, snowbird] |
144
+ |bluetick.jpg |[bluetick] |
145
+ |chihuahua.jpg |[Chihuahua] |
146
+ |tractor.JPEG |[tractor] |
147
+ |ox.JPEG |[ox] |
148
+ +-----------------+----------------------------------------------------------+
149
+ """
150
+ name = "SwinForImageClassification"
151
+
152
+ inputAnnotatorTypes = [AnnotatorType.IMAGE]
153
+
154
+ outputAnnotatorType = AnnotatorType.CATEGORY
155
+
156
+ configProtoBytes = Param(Params._dummy(),
157
+ "configProtoBytes",
158
+ "ConfigProto from tensorflow, serialized into byte array. Get with "
159
+ "config_proto.SerializeToString()",
160
+ TypeConverters.toListInt)
161
+
162
+ def getClasses(self):
163
+ """
164
+ Returns labels used to train this model
165
+ """
166
+ return self._call_java("getClasses")
167
+
168
+ def setConfigProtoBytes(self, b):
169
+ """Sets configProto from tensorflow, serialized into byte array.
170
+
171
+ Parameters
172
+ ----------
173
+ b : List[int]
174
+ ConfigProto from tensorflow, serialized into byte array
175
+ """
176
+ return self._set(configProtoBytes=b)
177
+
178
+ @keyword_only
179
+ def __init__(self,
180
+ classname="com.johnsnowlabs.nlp.annotators.cv.SwinForImageClassification",
181
+ java_model=None):
182
+ super(SwinForImageClassification, self).__init__(
183
+ classname=classname,
184
+ java_model=java_model
185
+ )
186
+ self._setDefault(
187
+ batchSize=2,
188
+ doNormalize=True,
189
+ doRescale=True,
190
+ doResize=True,
191
+ imageMean=[0.485, 0.456, 0.406],
192
+ imageStd=[0.229, 0.224, 0.225],
193
+ resample=3,
194
+ size=224,
195
+ rescaleFactor=1 / 255.0
196
+ )
197
+
198
+ @staticmethod
199
+ def loadSavedModel(folder, spark_session):
200
+ """Loads a locally saved model.
201
+
202
+ Parameters
203
+ ----------
204
+ folder : str
205
+ Folder of the saved model
206
+ spark_session : pyspark.sql.SparkSession
207
+ The current SparkSession
208
+
209
+ Returns
210
+ -------
211
+ SwinForImageClassification
212
+ The restored model
213
+ """
214
+ from sparknlp.internal import _SwinForImageClassification
215
+ jModel = _SwinForImageClassification(folder,
216
+ spark_session._jsparkSession)._java_obj
217
+ return SwinForImageClassification(java_model=jModel)
218
+
219
+ @staticmethod
220
+ def pretrained(name="image_classifier_swin_base_patch4_window7_224", lang="en",
221
+ remote_loc=None):
222
+ """Downloads and loads a pretrained model.
223
+
224
+ Parameters
225
+ ----------
226
+ name : str, optional
227
+ Name of the pretrained model, by default
228
+ "image_classifier_swin_base_patch4_window7_224"
229
+ lang : str, optional
230
+ Language of the pretrained model, by default "en"
231
+ remote_loc : str, optional
232
+ Optional remote address of the resource, by default None. Will use
233
+ Spark NLPs repositories otherwise.
234
+
235
+ Returns
236
+ -------
237
+ SwinForImageClassification
238
+ The restored model
239
+ """
240
+ from sparknlp.pretrained import ResourceDownloader
241
+ return ResourceDownloader.downloadModel(SwinForImageClassification, name, lang,
242
+ remote_loc)