spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. com/johnsnowlabs/nlp/__init__.py +4 -2
  4. spark_nlp-6.2.1.dist-info/METADATA +362 -0
  5. spark_nlp-6.2.1.dist-info/RECORD +292 -0
  6. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
  7. sparknlp/__init__.py +281 -27
  8. sparknlp/annotation.py +137 -6
  9. sparknlp/annotation_audio.py +61 -0
  10. sparknlp/annotation_image.py +82 -0
  11. sparknlp/annotator/__init__.py +93 -0
  12. sparknlp/annotator/audio/__init__.py +16 -0
  13. sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
  14. sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
  15. sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
  16. sparknlp/annotator/chunk2_doc.py +85 -0
  17. sparknlp/annotator/chunker.py +137 -0
  18. sparknlp/annotator/classifier_dl/__init__.py +61 -0
  19. sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  20. sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
  21. sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
  22. sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
  23. sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  24. sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  25. sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
  26. sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
  27. sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
  28. sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
  29. sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
  30. sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
  31. sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
  32. sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
  33. sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  34. sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
  35. sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
  36. sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
  37. sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
  38. sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
  39. sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
  40. sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
  41. sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
  42. sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
  43. sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  44. sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
  45. sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
  46. sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
  47. sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
  48. sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
  49. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  50. sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
  51. sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  52. sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
  53. sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
  54. sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
  55. sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
  56. sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
  57. sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
  58. sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  59. sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
  60. sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
  61. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
  62. sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
  63. sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
  64. sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
  65. sparknlp/annotator/cleaners/__init__.py +15 -0
  66. sparknlp/annotator/cleaners/cleaner.py +202 -0
  67. sparknlp/annotator/cleaners/extractor.py +191 -0
  68. sparknlp/annotator/coref/__init__.py +1 -0
  69. sparknlp/annotator/coref/spanbert_coref.py +221 -0
  70. sparknlp/annotator/cv/__init__.py +29 -0
  71. sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
  72. sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
  73. sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
  74. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  75. sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
  76. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  77. sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
  78. sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  79. sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  80. sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  81. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  82. sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  83. sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
  84. sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
  85. sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
  86. sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
  87. sparknlp/annotator/dataframe_optimizer.py +216 -0
  88. sparknlp/annotator/date2_chunk.py +88 -0
  89. sparknlp/annotator/dependency/__init__.py +17 -0
  90. sparknlp/annotator/dependency/dependency_parser.py +294 -0
  91. sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
  92. sparknlp/annotator/document_character_text_splitter.py +228 -0
  93. sparknlp/annotator/document_normalizer.py +235 -0
  94. sparknlp/annotator/document_token_splitter.py +175 -0
  95. sparknlp/annotator/document_token_splitter_test.py +85 -0
  96. sparknlp/annotator/embeddings/__init__.py +45 -0
  97. sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
  98. sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
  99. sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
  100. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
  101. sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
  102. sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
  103. sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
  104. sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
  105. sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
  106. sparknlp/annotator/embeddings/doc2vec.py +352 -0
  107. sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
  108. sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
  109. sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
  110. sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
  111. sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
  112. sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
  113. sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
  114. sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  115. sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  116. sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
  117. sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
  118. sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
  119. sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  120. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  121. sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
  122. sparknlp/annotator/embeddings/word2vec.py +353 -0
  123. sparknlp/annotator/embeddings/word_embeddings.py +385 -0
  124. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
  125. sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
  126. sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
  127. sparknlp/annotator/er/__init__.py +16 -0
  128. sparknlp/annotator/er/entity_ruler.py +267 -0
  129. sparknlp/annotator/graph_extraction.py +368 -0
  130. sparknlp/annotator/keyword_extraction/__init__.py +16 -0
  131. sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
  132. sparknlp/annotator/ld_dl/__init__.py +16 -0
  133. sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
  134. sparknlp/annotator/lemmatizer.py +250 -0
  135. sparknlp/annotator/matcher/__init__.py +20 -0
  136. sparknlp/annotator/matcher/big_text_matcher.py +272 -0
  137. sparknlp/annotator/matcher/date_matcher.py +303 -0
  138. sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
  139. sparknlp/annotator/matcher/regex_matcher.py +221 -0
  140. sparknlp/annotator/matcher/text_matcher.py +290 -0
  141. sparknlp/annotator/n_gram_generator.py +141 -0
  142. sparknlp/annotator/ner/__init__.py +21 -0
  143. sparknlp/annotator/ner/ner_approach.py +94 -0
  144. sparknlp/annotator/ner/ner_converter.py +148 -0
  145. sparknlp/annotator/ner/ner_crf.py +397 -0
  146. sparknlp/annotator/ner/ner_dl.py +591 -0
  147. sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
  148. sparknlp/annotator/ner/ner_overwriter.py +166 -0
  149. sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
  150. sparknlp/annotator/normalizer.py +230 -0
  151. sparknlp/annotator/openai/__init__.py +16 -0
  152. sparknlp/annotator/openai/openai_completion.py +349 -0
  153. sparknlp/annotator/openai/openai_embeddings.py +106 -0
  154. sparknlp/annotator/param/__init__.py +17 -0
  155. sparknlp/annotator/param/classifier_encoder.py +98 -0
  156. sparknlp/annotator/param/evaluation_dl_params.py +130 -0
  157. sparknlp/annotator/pos/__init__.py +16 -0
  158. sparknlp/annotator/pos/perceptron.py +263 -0
  159. sparknlp/annotator/sentence/__init__.py +17 -0
  160. sparknlp/annotator/sentence/sentence_detector.py +290 -0
  161. sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
  162. sparknlp/annotator/sentiment/__init__.py +17 -0
  163. sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
  164. sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
  165. sparknlp/annotator/seq2seq/__init__.py +35 -0
  166. sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
  167. sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
  168. sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
  169. sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
  170. sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  171. sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  172. sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
  173. sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
  174. sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  175. sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
  176. sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
  177. sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
  178. sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  179. sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  180. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  181. sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  182. sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  183. sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
  184. sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  185. sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
  186. sparknlp/annotator/similarity/__init__.py +0 -0
  187. sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
  188. sparknlp/annotator/spell_check/__init__.py +18 -0
  189. sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
  190. sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
  191. sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
  192. sparknlp/annotator/stemmer.py +79 -0
  193. sparknlp/annotator/stop_words_cleaner.py +190 -0
  194. sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
  195. sparknlp/annotator/token/__init__.py +19 -0
  196. sparknlp/annotator/token/chunk_tokenizer.py +118 -0
  197. sparknlp/annotator/token/recursive_tokenizer.py +205 -0
  198. sparknlp/annotator/token/regex_tokenizer.py +208 -0
  199. sparknlp/annotator/token/tokenizer.py +561 -0
  200. sparknlp/annotator/token2_chunk.py +76 -0
  201. sparknlp/annotator/ws/__init__.py +16 -0
  202. sparknlp/annotator/ws/word_segmenter.py +429 -0
  203. sparknlp/base/__init__.py +30 -0
  204. sparknlp/base/audio_assembler.py +95 -0
  205. sparknlp/base/doc2_chunk.py +169 -0
  206. sparknlp/base/document_assembler.py +164 -0
  207. sparknlp/base/embeddings_finisher.py +201 -0
  208. sparknlp/base/finisher.py +217 -0
  209. sparknlp/base/gguf_ranking_finisher.py +234 -0
  210. sparknlp/base/graph_finisher.py +125 -0
  211. sparknlp/base/has_recursive_fit.py +24 -0
  212. sparknlp/base/has_recursive_transform.py +22 -0
  213. sparknlp/base/image_assembler.py +172 -0
  214. sparknlp/base/light_pipeline.py +429 -0
  215. sparknlp/base/multi_document_assembler.py +164 -0
  216. sparknlp/base/prompt_assembler.py +207 -0
  217. sparknlp/base/recursive_pipeline.py +107 -0
  218. sparknlp/base/table_assembler.py +145 -0
  219. sparknlp/base/token_assembler.py +124 -0
  220. sparknlp/common/__init__.py +26 -0
  221. sparknlp/common/annotator_approach.py +41 -0
  222. sparknlp/common/annotator_model.py +47 -0
  223. sparknlp/common/annotator_properties.py +114 -0
  224. sparknlp/common/annotator_type.py +38 -0
  225. sparknlp/common/completion_post_processing.py +37 -0
  226. sparknlp/common/coverage_result.py +22 -0
  227. sparknlp/common/match_strategy.py +33 -0
  228. sparknlp/common/properties.py +1298 -0
  229. sparknlp/common/read_as.py +33 -0
  230. sparknlp/common/recursive_annotator_approach.py +35 -0
  231. sparknlp/common/storage.py +149 -0
  232. sparknlp/common/utils.py +39 -0
  233. sparknlp/functions.py +315 -5
  234. sparknlp/internal/__init__.py +1199 -0
  235. sparknlp/internal/annotator_java_ml.py +32 -0
  236. sparknlp/internal/annotator_transformer.py +37 -0
  237. sparknlp/internal/extended_java_wrapper.py +63 -0
  238. sparknlp/internal/params_getters_setters.py +71 -0
  239. sparknlp/internal/recursive.py +70 -0
  240. sparknlp/logging/__init__.py +15 -0
  241. sparknlp/logging/comet.py +467 -0
  242. sparknlp/partition/__init__.py +16 -0
  243. sparknlp/partition/partition.py +244 -0
  244. sparknlp/partition/partition_properties.py +902 -0
  245. sparknlp/partition/partition_transformer.py +200 -0
  246. sparknlp/pretrained/__init__.py +17 -0
  247. sparknlp/pretrained/pretrained_pipeline.py +158 -0
  248. sparknlp/pretrained/resource_downloader.py +216 -0
  249. sparknlp/pretrained/utils.py +35 -0
  250. sparknlp/reader/__init__.py +15 -0
  251. sparknlp/reader/enums.py +19 -0
  252. sparknlp/reader/pdf_to_text.py +190 -0
  253. sparknlp/reader/reader2doc.py +124 -0
  254. sparknlp/reader/reader2image.py +136 -0
  255. sparknlp/reader/reader2table.py +44 -0
  256. sparknlp/reader/reader_assembler.py +159 -0
  257. sparknlp/reader/sparknlp_reader.py +461 -0
  258. sparknlp/training/__init__.py +20 -0
  259. sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  260. sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
  261. sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  262. sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
  263. sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
  264. sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
  265. sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
  266. sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
  267. sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
  268. sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
  269. sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
  270. sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
  271. sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
  272. sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
  273. sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
  274. sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  275. sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
  276. sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  277. sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
  278. sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
  279. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
  280. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
  281. sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
  282. sparknlp/training/conll.py +150 -0
  283. sparknlp/training/conllu.py +103 -0
  284. sparknlp/training/pos.py +103 -0
  285. sparknlp/training/pub_tator.py +76 -0
  286. sparknlp/training/spacy_to_annotation.py +57 -0
  287. sparknlp/training/tfgraphs.py +5 -0
  288. sparknlp/upload_to_hub.py +149 -0
  289. sparknlp/util.py +51 -5
  290. com/__init__.pyc +0 -0
  291. com/__pycache__/__init__.cpython-36.pyc +0 -0
  292. com/johnsnowlabs/__init__.pyc +0 -0
  293. com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
  294. com/johnsnowlabs/nlp/__init__.pyc +0 -0
  295. com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
  296. spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
  297. spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
  298. sparknlp/__init__.pyc +0 -0
  299. sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
  300. sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
  301. sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
  302. sparknlp/__pycache__/base.cpython-36.pyc +0 -0
  303. sparknlp/__pycache__/common.cpython-36.pyc +0 -0
  304. sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
  305. sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
  306. sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
  307. sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
  308. sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
  309. sparknlp/__pycache__/training.cpython-36.pyc +0 -0
  310. sparknlp/__pycache__/util.cpython-36.pyc +0 -0
  311. sparknlp/annotation.pyc +0 -0
  312. sparknlp/annotator.py +0 -3006
  313. sparknlp/annotator.pyc +0 -0
  314. sparknlp/base.py +0 -347
  315. sparknlp/base.pyc +0 -0
  316. sparknlp/common.py +0 -193
  317. sparknlp/common.pyc +0 -0
  318. sparknlp/embeddings.py +0 -40
  319. sparknlp/embeddings.pyc +0 -0
  320. sparknlp/internal.py +0 -288
  321. sparknlp/internal.pyc +0 -0
  322. sparknlp/pretrained.py +0 -123
  323. sparknlp/pretrained.pyc +0 -0
  324. sparknlp/storage.py +0 -32
  325. sparknlp/storage.pyc +0 -0
  326. sparknlp/training.py +0 -62
  327. sparknlp/training.pyc +0 -0
  328. sparknlp/util.pyc +0 -0
  329. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,221 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for the SpanBertCorefModel."""
15
+
16
+ from sparknlp.common import *
17
+
18
+
19
+ class SpanBertCorefModel(AnnotatorModel,
20
+ HasEmbeddingsProperties,
21
+ HasCaseSensitiveProperties,
22
+ HasStorageRef,
23
+ HasEngine,
24
+ HasMaxSentenceLengthLimit):
25
+ """
26
+ A coreference resolution model based on SpanBert.
27
+
28
+ A coreference resolution model identifies expressions which refer to the same entity in a text. For example, given
29
+ a sentence "John told Mary he would like to borrow a book from her." the model will link "he" to "John" and "her"
30
+ to "Mary".
31
+
32
+ This model is based on SpanBert, which is fine-tuned on the OntoNotes 5.0 data set.
33
+
34
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion object:
35
+
36
+ >>> corefResolution = SpanBertCorefModel.pretrained() \\
37
+ ... .setInputCols(["sentence", "token"]) \\
38
+ ... .setOutputCol("coref")
39
+
40
+ The default model is ``"spanbert_base_coref"``, if no name is provided. For available
41
+ pretrained models please see the `Models Hub
42
+ <https://sparknlp.org/models?q=coref>`__.
43
+
44
+ For extended examples of usage, see the
45
+ `Examples <https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/coreference-resolution/Coreference_Resolution_SpanBertCorefModel.ipynb>`__.
46
+
47
+ ====================== ======================
48
+ Input Annotation types Output Annotation type
49
+ ====================== ======================
50
+ ``DOCUMENT, TOKEN`` ``DEPENDENCY``
51
+ ====================== ======================
52
+
53
+ Parameters
54
+ ----------
55
+ maxSentenceLength
56
+ Maximum sentence length to process
57
+ maxSegmentLength
58
+ Maximum segment length
59
+ textGenre
60
+ Text genre. One of the following values:
61
+
62
+ | "bc", // Broadcast conversation, default
63
+ | "bn", // Broadcast news
64
+ | "nw", // News wire
65
+ | "pt", // Pivot text: Old Testament and New Testament text
66
+ | "tc", // Telephone conversation
67
+ | "wb" // Web data
68
+
69
+ Examples
70
+ --------
71
+ >>> import sparknlp
72
+ >>> from sparknlp.base import *
73
+ >>> from sparknlp.annotator import *
74
+ >>> from pyspark.ml import Pipeline
75
+ >>> documentAssembler = DocumentAssembler() \\
76
+ ... .setInputCol("text") \\
77
+ ... .setOutputCol("document")
78
+ >>> sentence = SentenceDetector() \\
79
+ ... .setInputCols(["document"]) \\
80
+ ... .setOutputCol("sentence")
81
+ >>> tokenizer = Tokenizer() \\
82
+ ... .setInputCols(["sentence"]) \\
83
+ ... .setOutputCol("token")
84
+ >>> corefResolution = SpanBertCorefModel() \\
85
+ ... .pretrained() \\
86
+ ... .setInputCols(["sentence", "token"]) \\
87
+ ... .setOutputCol("corefs") \\
88
+ >>> pipeline = Pipeline().setStages([
89
+ ... documentAssembler,
90
+ ... sentence,
91
+ ... tokenizer,
92
+ ... corefResolution
93
+ ... ])
94
+ >>> data = spark.createDataFrame([
95
+ ... ["John told Mary he would like to borrow a book from her."]
96
+ ... ]).toDF("text")
97
+ >>> results = pipeline.fit(data).transform(data))
98
+ >>> results \\
99
+ ... .selectExpr("explode(corefs) AS coref")
100
+ ... .selectExpr("coref.result as token", "coref.metadata")
101
+ ... .show(truncate=False)
102
+ +-----+------------------------------------------------------------------------------------+
103
+ |token|metadata |
104
+ +-----+------------------------------------------------------------------------------------+
105
+ |John |{head.sentence -> -1, head -> ROOT, head.begin -> -1, head.end -> -1, sentence -> 0}|
106
+ |he |{head.sentence -> 0, head -> John, head.begin -> 0, head.end -> 3, sentence -> 0} |
107
+ |Mary |{head.sentence -> -1, head -> ROOT, head.begin -> -1, head.end -> -1, sentence -> 0}|
108
+ |her |{head.sentence -> 0, head -> Mary, head.begin -> 10, head.end -> 13, sentence -> 0} |
109
+ +-----+------------------------------------------------------------------------------------|
110
+ """
111
+
112
+ name = "SpanBertCorefModel"
113
+
114
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]
115
+
116
+ outputAnnotatorType = AnnotatorType.DEPENDENCY
117
+
118
+ maxSegmentLength = Param(Params._dummy(),
119
+ "maxSegmentLength",
120
+ "Max segment length",
121
+ typeConverter=TypeConverters.toInt)
122
+
123
+ textGenre = Param(Params._dummy(),
124
+ "textGenre",
125
+ "Text genre, one of ('bc', 'bn', 'mz', 'nw', 'pt','tc', 'wb')",
126
+ typeConverter=TypeConverters.toString)
127
+
128
+ configProtoBytes = Param(Params._dummy(),
129
+ "configProtoBytes",
130
+ "ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()",
131
+ TypeConverters.toListInt)
132
+
133
+ def setConfigProtoBytes(self, b):
134
+ """Sets configProto from tensorflow, serialized into byte array.
135
+
136
+ Parameters
137
+ ----------
138
+ b : List[int]
139
+ ConfigProto from tensorflow, serialized into byte array
140
+ """
141
+ return self._set(configProtoBytes=b)
142
+
143
+ def setMaxSegmentLength(self, value):
144
+ """Sets max segment length
145
+
146
+ Parameters
147
+ ----------
148
+ value : int
149
+ Max segment length
150
+ """
151
+ return self._set(maxSegmentLength=value)
152
+
153
+ def setTextGenre(self, value):
154
+ """ Sets the text genre, one of the following values:
155
+ | "bc" : Broadcast conversation, default
156
+ | "bn" Broadcast news
157
+ | "nw" : News wire
158
+ | "pt" : Pivot text: Old Testament and New Testament text
159
+ | "tc" : Telephone conversation
160
+ | "wb" : Web data
161
+
162
+ Parameters
163
+ ----------
164
+ value : string
165
+ Text genre code, default is 'bc'
166
+ """
167
+ return self._set(textGenre=value)
168
+
169
+ @keyword_only
170
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.coref.SpanBertCorefModel", java_model=None):
171
+ super(SpanBertCorefModel, self).__init__(
172
+ classname=classname,
173
+ java_model=java_model
174
+ )
175
+ self._setDefault(
176
+ maxSentenceLength=512,
177
+ caseSensitive=True,
178
+ textGenre="bc"
179
+ )
180
+
181
+ @staticmethod
182
+ def loadSavedModel(folder, spark_session):
183
+ """Loads a locally saved model.
184
+
185
+ Parameters
186
+ ----------
187
+ folder : str
188
+ Folder of the saved model
189
+ spark_session : pyspark.sql.SparkSession
190
+ The current SparkSession
191
+
192
+ Returns
193
+ -------
194
+ SpanBertCorefModel
195
+ The restored model
196
+ """
197
+ from sparknlp.internal import _SpanBertCorefLoader
198
+ jModel = _SpanBertCorefLoader(folder, spark_session._jsparkSession)._java_obj
199
+ return SpanBertCorefModel(java_model=jModel)
200
+
201
+ @staticmethod
202
+ def pretrained(name="spanbert_base_coref", lang="en", remote_loc=None):
203
+ """Downloads and loads a pretrained model.
204
+
205
+ Parameters
206
+ ----------
207
+ name : str, optional
208
+ Name of the pretrained model, by default "spanbert_base_coref"
209
+ lang : str, optional
210
+ Language of the pretrained model, by default "en"
211
+ remote_loc : str, optional
212
+ Optional remote address of the resource, by default None. Will use
213
+ Spark NLPs repositories otherwise.
214
+
215
+ Returns
216
+ -------
217
+ SpanBertCorefModel
218
+ The restored model
219
+ """
220
+ from sparknlp.pretrained import ResourceDownloader
221
+ return ResourceDownloader.downloadModel(SpanBertCorefModel, name, lang, remote_loc)
@@ -0,0 +1,29 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from sparknlp.annotator.cv.vit_for_image_classification import *
15
+ from sparknlp.annotator.cv.swin_for_image_classification import *
16
+ from sparknlp.annotator.cv.convnext_for_image_classification import *
17
+ from sparknlp.annotator.cv.vision_encoder_decoder_for_image_captioning import *
18
+ from sparknlp.annotator.cv.clip_for_zero_shot_classification import *
19
+ from sparknlp.annotator.cv.blip_for_question_answering import *
20
+ from sparknlp.annotator.cv.janus_for_multimodal import *
21
+ from sparknlp.annotator.cv.mllama_for_multimodal import *
22
+ from sparknlp.annotator.cv.qwen2vl_transformer import *
23
+ from sparknlp.annotator.cv.llava_for_multimodal import *
24
+ from sparknlp.annotator.cv.phi3_vision_for_multimodal import *
25
+ from sparknlp.annotator.cv.smolvlm_transformer import *
26
+ from sparknlp.annotator.cv.paligemma_for_multimodal import *
27
+ from sparknlp.annotator.cv.gemma3_for_multimodal import *
28
+ from sparknlp.annotator.cv.internvl_for_multimodal import *
29
+ from sparknlp.annotator.cv.florence2_transformer import *
@@ -0,0 +1,172 @@
1
+ # Copyright 2017-2024 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from sparknlp.common import *
16
+
17
+ class BLIPForQuestionAnswering(AnnotatorModel,
18
+ HasBatchedAnnotateImage,
19
+ HasImageFeatureProperties,
20
+ HasEngine,
21
+ HasCandidateLabelsProperties,
22
+ HasRescaleFactor):
23
+ """BLIPForQuestionAnswering can load BLIP models for visual question answering.
24
+ The model consists of a vision encoder, a text encoder as well as a text decoder.
25
+ The vision encoder will encode the input image, the text encoder will encode the input question together
26
+ with the encoding of the image, and the text decoder will output the answer to the question.
27
+
28
+ Pretrained models can be loaded with :meth:`.pretrained` of the companion
29
+ object:
30
+
31
+ >>> visualQAClassifier = BLIPForQuestionAnswering.pretrained() \\
32
+ ... .setInputCols(["image_assembler"]) \\
33
+ ... .setOutputCol("answer")
34
+
35
+ The default model is ``"blip_vqa_base"``, if no name is
36
+ provided.
37
+
38
+ For available pretrained models please see the `Models Hub
39
+ <https://sparknlp.org/models?task=Question+Answering>`__.
40
+
41
+ To see which models are compatible and how to import them see
42
+ `Import Transformers into Spark NLP 🚀
43
+ <https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
44
+
45
+ ====================== ======================
46
+ Input Annotation types Output Annotation type
47
+ ====================== ======================
48
+ ``IMAGE`` ``DOCUMENT``
49
+ ====================== ======================
50
+
51
+ Parameters
52
+ ----------
53
+ batchSize
54
+ Batch size. Large values allows faster processing but requires more
55
+ memory, by default 2
56
+ configProtoBytes
57
+ ConfigProto from tensorflow, serialized into byte array.
58
+ maxSentenceLength
59
+ Max sentence length to process, by default 50
60
+
61
+ Examples
62
+ --------
63
+ >>> import sparknlp
64
+ >>> from sparknlp.base import *
65
+ >>> from sparknlp.annotator import *
66
+ >>> from pyspark.ml import Pipeline
67
+ >>> image_df = SparkSessionForTest.spark.read.format("image").load(path=images_path)
68
+ >>> test_df = image_df.withColumn("text", lit("What's this picture about?"))
69
+ >>> imageAssembler = ImageAssembler() \\
70
+ ... .setInputCol("image") \\
71
+ ... .setOutputCol("image_assembler")
72
+ >>> visualQAClassifier = BLIPForQuestionAnswering.pretrained() \\
73
+ ... .setInputCols("image_assembler") \\
74
+ ... .setOutputCol("answer") \\
75
+ ... .setSize(384)
76
+ >>> pipeline = Pipeline().setStages([
77
+ ... imageAssembler,
78
+ ... visualQAClassifier
79
+ ... ])
80
+ >>> result = pipeline.fit(test_df).transform(test_df)
81
+ >>> result.select("image_assembler.origin", "answer.result").show(false)
82
+ +--------------------------------------+------+
83
+ |origin |result|
84
+ +--------------------------------------+------+
85
+ |[file:///content/images/cat_image.jpg]|[cats]|
86
+ +--------------------------------------+------+
87
+ """
88
+
89
+ name = "BLIPForQuestionAnswering"
90
+
91
+ inputAnnotatorTypes = [AnnotatorType.IMAGE]
92
+
93
+ outputAnnotatorType = AnnotatorType.DOCUMENT
94
+
95
+ configProtoBytes = Param(Params._dummy(),
96
+ "configProtoBytes",
97
+ "ConfigProto from tensorflow, serialized into byte array. Get with "
98
+ "config_proto.SerializeToString()",
99
+ TypeConverters.toListInt)
100
+
101
+ maxSentenceLength = Param(Params._dummy(),
102
+ "maxSentenceLength",
103
+ "Maximum sentence length that the annotator will process. Above this, the sentence is skipped",
104
+ typeConverter=TypeConverters.toInt)
105
+
106
+ def setMaxSentenceSize(self, value):
107
+ """Sets Maximum sentence length that the annotator will process, by
108
+ default 50.
109
+
110
+ Parameters
111
+ ----------
112
+ value : int
113
+ Maximum sentence length that the annotator will process
114
+ """
115
+ return self._set(maxSentenceLength=value)
116
+
117
+
118
+ @keyword_only
119
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.cv.BLIPForQuestionAnswering",
120
+ java_model=None):
121
+ super(BLIPForQuestionAnswering, self).__init__(
122
+ classname=classname,
123
+ java_model=java_model
124
+ )
125
+ self._setDefault(
126
+ batchSize=2,
127
+ size=384,
128
+ maxSentenceLength=50
129
+ )
130
+
131
+ @staticmethod
132
+ def loadSavedModel(folder, spark_session):
133
+ """Loads a locally saved model.
134
+
135
+ Parameters
136
+ ----------
137
+ folder : str
138
+ Folder of the saved model
139
+ spark_session : pyspark.sql.SparkSession
140
+ The current SparkSession
141
+
142
+ Returns
143
+ -------
144
+ CLIPForZeroShotClassification
145
+ The restored model
146
+ """
147
+ from sparknlp.internal import _BLIPForQuestionAnswering
148
+ jModel = _BLIPForQuestionAnswering(folder, spark_session._jsparkSession)._java_obj
149
+ return BLIPForQuestionAnswering(java_model=jModel)
150
+
151
+ @staticmethod
152
+ def pretrained(name="blip_vqa_base", lang="en", remote_loc=None):
153
+ """Downloads and loads a pretrained model.
154
+
155
+ Parameters
156
+ ----------
157
+ name : str, optional
158
+ Name of the pretrained model, by default
159
+ "blip_vqa_tf"
160
+ lang : str, optional
161
+ Language of the pretrained model, by default "en"
162
+ remote_loc : str, optional
163
+ Optional remote address of the resource, by default None. Will use
164
+ Spark NLPs repositories otherwise.
165
+
166
+ Returns
167
+ -------
168
+ CLIPForZeroShotClassification
169
+ The restored model
170
+ """
171
+ from sparknlp.pretrained import ResourceDownloader
172
+ return ResourceDownloader.downloadModel(BLIPForQuestionAnswering, name, lang, remote_loc)
@@ -0,0 +1,193 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Contains classes concerning CLIPForZeroShotClassification."""
16
+
17
+ from sparknlp.common import *
18
+
19
+
20
+ class CLIPForZeroShotClassification(AnnotatorModel,
21
+ HasBatchedAnnotateImage,
22
+ HasImageFeatureProperties,
23
+ HasEngine,
24
+ HasCandidateLabelsProperties,
25
+ HasRescaleFactor):
26
+ """Zero Shot Image Classifier based on CLIP.
27
+
28
+ CLIP (Contrastive Language-Image Pre-Training) is a neural network that was trained on image
29
+ and text pairs. It has the ability to predict images without training on any hard-coded
30
+ labels. This makes it very flexible, as labels can be provided during inference. This is
31
+ similar to the zero-shot capabilities of the GPT-2 and 3 models.
32
+
33
+ Pretrained models can be loaded with ``pretrained`` of the companion object:
34
+
35
+
36
+ .. code-block:: python
37
+
38
+ imageClassifier = CLIPForZeroShotClassification.pretrained() \\
39
+ .setInputCols(["image_assembler"]) \\
40
+ .setOutputCol("label")
41
+
42
+
43
+ The default model is ``"zero_shot_classifier_clip_vit_base_patch32"``, if no name is provided.
44
+
45
+ For available pretrained models please see the
46
+ `Models Hub <https://sparknlp.org/models?task=Zero-Shot+Classification>`__.
47
+
48
+ Models from the HuggingFace 🤗 Transformers library are also compatible with Spark NLP 🚀. To
49
+ see which models are compatible and how to import them see
50
+ https://github.com/JohnSnowLabs/spark-nlp/discussions/5669 and to see more extended
51
+ examples, see
52
+ `CLIPForZeroShotClassificationTestSpec <https://github.com/JohnSnowLabs/spark-nlp/blob/master/src/test/scala/com/johnsnowlabs/nlp/annotators/cv/CLIPForZeroShotClassificationTestSpec.scala>`__.
53
+
54
+ ====================== ======================
55
+ Input Annotation types Output Annotation type
56
+ ====================== ======================
57
+ ``IMAGE`` ``CATEGORY``
58
+ ====================== ======================
59
+
60
+ Parameters
61
+ ----------
62
+ batchSize
63
+ Batch size, by default `2`.
64
+ candidateLabels
65
+ Array of labels for classification
66
+
67
+ Examples
68
+ --------
69
+ >>> import sparknlp
70
+ >>> from sparknlp.base import *
71
+ >>> from sparknlp.annotator import *
72
+ >>> from pyspark.ml import Pipeline
73
+ >>> imageDF = spark.read \\
74
+ ... .format("image") \\
75
+ ... .option("dropInvalid", value = True) \\
76
+ ... .load("src/test/resources/image/")
77
+ >>> imageAssembler = ImageAssembler() \\
78
+ ... .setInputCol("image") \\
79
+ ... .setOutputCol("image_assembler")
80
+ >>> candidateLabels = [
81
+ ... "a photo of a bird",
82
+ ... "a photo of a cat",
83
+ ... "a photo of a dog",
84
+ ... "a photo of a hen",
85
+ ... "a photo of a hippo",
86
+ ... "a photo of a room",
87
+ ... "a photo of a tractor",
88
+ ... "a photo of an ostrich",
89
+ ... "a photo of an ox"]
90
+ >>> imageClassifier = CLIPForZeroShotClassification \\
91
+ ... .pretrained() \\
92
+ ... .setInputCols(["image_assembler"]) \\
93
+ ... .setOutputCol("label") \\
94
+ ... .setCandidateLabels(candidateLabels)
95
+ >>> pipeline = Pipeline().setStages([imageAssembler, imageClassifier])
96
+ >>> pipelineDF = pipeline.fit(imageDF).transform(imageDF)
97
+ >>> pipelineDF \\
98
+ ... .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "label.result") \\
99
+ ... .show(truncate=False)
100
+ +-----------------+-----------------------+
101
+ |image_name |result |
102
+ +-----------------+-----------------------+
103
+ |palace.JPEG |[a photo of a room] |
104
+ |egyptian_cat.jpeg|[a photo of a cat] |
105
+ |hippopotamus.JPEG|[a photo of a hippo] |
106
+ |hen.JPEG |[a photo of a hen] |
107
+ |ostrich.JPEG |[a photo of an ostrich]|
108
+ |junco.JPEG |[a photo of a bird] |
109
+ |bluetick.jpg |[a photo of a dog] |
110
+ |chihuahua.jpg |[a photo of a dog] |
111
+ |tractor.JPEG |[a photo of a tractor] |
112
+ |ox.JPEG |[a photo of an ox] |
113
+ +-----------------+-----------------------+
114
+ """
115
+ name = "CLIPForZeroShotClassification"
116
+
117
+ inputAnnotatorTypes = [AnnotatorType.IMAGE]
118
+
119
+ outputAnnotatorType = AnnotatorType.CATEGORY
120
+
121
+ configProtoBytes = Param(Params._dummy(),
122
+ "configProtoBytes",
123
+ "ConfigProto from tensorflow, serialized into byte array. Get with "
124
+ "config_proto.SerializeToString()",
125
+ TypeConverters.toListInt)
126
+
127
+ def getCandidateLabels(self):
128
+ """
129
+ Returns labels used to train this model
130
+ """
131
+ return self._call_java("getCandidateLabels")
132
+
133
+ @keyword_only
134
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.cv.CLIPForZeroShotClassification",
135
+ java_model=None):
136
+ super(CLIPForZeroShotClassification, self).__init__(
137
+ classname=classname,
138
+ java_model=java_model
139
+ )
140
+ self._setDefault(
141
+ batchSize=2,
142
+ doNormalize=True,
143
+ doRescale=True,
144
+ doResize=True,
145
+ imageMean=[0.48145466, 0.4578275, 0.40821073],
146
+ imageStd=[0.26862954, 0.26130258, 0.27577711],
147
+ resample=2,
148
+ rescaleFactor=1 / 255.0,
149
+ size=224
150
+ )
151
+
152
+ @staticmethod
153
+ def loadSavedModel(folder, spark_session):
154
+ """Loads a locally saved model.
155
+
156
+ Parameters
157
+ ----------
158
+ folder : str
159
+ Folder of the saved model
160
+ spark_session : pyspark.sql.SparkSession
161
+ The current SparkSession
162
+
163
+ Returns
164
+ -------
165
+ CLIPForZeroShotClassification
166
+ The restored model
167
+ """
168
+ from sparknlp.internal import _CLIPForZeroShotClassification
169
+ jModel = _CLIPForZeroShotClassification(folder, spark_session._jsparkSession)._java_obj
170
+ return CLIPForZeroShotClassification(java_model=jModel)
171
+
172
+ @staticmethod
173
+ def pretrained(name="zero_shot_classifier_clip_vit_base_patch32", lang="en", remote_loc=None):
174
+ """Downloads and loads a pretrained model.
175
+
176
+ Parameters
177
+ ----------
178
+ name : str, optional
179
+ Name of the pretrained model, by default
180
+ "image_classifier_vit_base_patch16_224"
181
+ lang : str, optional
182
+ Language of the pretrained model, by default "en"
183
+ remote_loc : str, optional
184
+ Optional remote address of the resource, by default None. Will use
185
+ Spark NLPs repositories otherwise.
186
+
187
+ Returns
188
+ -------
189
+ CLIPForZeroShotClassification
190
+ The restored model
191
+ """
192
+ from sparknlp.pretrained import ResourceDownloader
193
+ return ResourceDownloader.downloadModel(CLIPForZeroShotClassification, name, lang, remote_loc)