spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. com/johnsnowlabs/nlp/__init__.py +4 -2
  4. spark_nlp-6.2.1.dist-info/METADATA +362 -0
  5. spark_nlp-6.2.1.dist-info/RECORD +292 -0
  6. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
  7. sparknlp/__init__.py +281 -27
  8. sparknlp/annotation.py +137 -6
  9. sparknlp/annotation_audio.py +61 -0
  10. sparknlp/annotation_image.py +82 -0
  11. sparknlp/annotator/__init__.py +93 -0
  12. sparknlp/annotator/audio/__init__.py +16 -0
  13. sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
  14. sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
  15. sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
  16. sparknlp/annotator/chunk2_doc.py +85 -0
  17. sparknlp/annotator/chunker.py +137 -0
  18. sparknlp/annotator/classifier_dl/__init__.py +61 -0
  19. sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  20. sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
  21. sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
  22. sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
  23. sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  24. sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  25. sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
  26. sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
  27. sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
  28. sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
  29. sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
  30. sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
  31. sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
  32. sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
  33. sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  34. sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
  35. sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
  36. sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
  37. sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
  38. sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
  39. sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
  40. sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
  41. sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
  42. sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
  43. sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  44. sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
  45. sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
  46. sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
  47. sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
  48. sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
  49. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  50. sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
  51. sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  52. sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
  53. sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
  54. sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
  55. sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
  56. sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
  57. sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
  58. sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  59. sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
  60. sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
  61. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
  62. sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
  63. sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
  64. sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
  65. sparknlp/annotator/cleaners/__init__.py +15 -0
  66. sparknlp/annotator/cleaners/cleaner.py +202 -0
  67. sparknlp/annotator/cleaners/extractor.py +191 -0
  68. sparknlp/annotator/coref/__init__.py +1 -0
  69. sparknlp/annotator/coref/spanbert_coref.py +221 -0
  70. sparknlp/annotator/cv/__init__.py +29 -0
  71. sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
  72. sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
  73. sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
  74. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  75. sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
  76. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  77. sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
  78. sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  79. sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  80. sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  81. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  82. sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  83. sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
  84. sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
  85. sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
  86. sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
  87. sparknlp/annotator/dataframe_optimizer.py +216 -0
  88. sparknlp/annotator/date2_chunk.py +88 -0
  89. sparknlp/annotator/dependency/__init__.py +17 -0
  90. sparknlp/annotator/dependency/dependency_parser.py +294 -0
  91. sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
  92. sparknlp/annotator/document_character_text_splitter.py +228 -0
  93. sparknlp/annotator/document_normalizer.py +235 -0
  94. sparknlp/annotator/document_token_splitter.py +175 -0
  95. sparknlp/annotator/document_token_splitter_test.py +85 -0
  96. sparknlp/annotator/embeddings/__init__.py +45 -0
  97. sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
  98. sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
  99. sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
  100. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
  101. sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
  102. sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
  103. sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
  104. sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
  105. sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
  106. sparknlp/annotator/embeddings/doc2vec.py +352 -0
  107. sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
  108. sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
  109. sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
  110. sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
  111. sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
  112. sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
  113. sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
  114. sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  115. sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  116. sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
  117. sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
  118. sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
  119. sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  120. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  121. sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
  122. sparknlp/annotator/embeddings/word2vec.py +353 -0
  123. sparknlp/annotator/embeddings/word_embeddings.py +385 -0
  124. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
  125. sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
  126. sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
  127. sparknlp/annotator/er/__init__.py +16 -0
  128. sparknlp/annotator/er/entity_ruler.py +267 -0
  129. sparknlp/annotator/graph_extraction.py +368 -0
  130. sparknlp/annotator/keyword_extraction/__init__.py +16 -0
  131. sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
  132. sparknlp/annotator/ld_dl/__init__.py +16 -0
  133. sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
  134. sparknlp/annotator/lemmatizer.py +250 -0
  135. sparknlp/annotator/matcher/__init__.py +20 -0
  136. sparknlp/annotator/matcher/big_text_matcher.py +272 -0
  137. sparknlp/annotator/matcher/date_matcher.py +303 -0
  138. sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
  139. sparknlp/annotator/matcher/regex_matcher.py +221 -0
  140. sparknlp/annotator/matcher/text_matcher.py +290 -0
  141. sparknlp/annotator/n_gram_generator.py +141 -0
  142. sparknlp/annotator/ner/__init__.py +21 -0
  143. sparknlp/annotator/ner/ner_approach.py +94 -0
  144. sparknlp/annotator/ner/ner_converter.py +148 -0
  145. sparknlp/annotator/ner/ner_crf.py +397 -0
  146. sparknlp/annotator/ner/ner_dl.py +591 -0
  147. sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
  148. sparknlp/annotator/ner/ner_overwriter.py +166 -0
  149. sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
  150. sparknlp/annotator/normalizer.py +230 -0
  151. sparknlp/annotator/openai/__init__.py +16 -0
  152. sparknlp/annotator/openai/openai_completion.py +349 -0
  153. sparknlp/annotator/openai/openai_embeddings.py +106 -0
  154. sparknlp/annotator/param/__init__.py +17 -0
  155. sparknlp/annotator/param/classifier_encoder.py +98 -0
  156. sparknlp/annotator/param/evaluation_dl_params.py +130 -0
  157. sparknlp/annotator/pos/__init__.py +16 -0
  158. sparknlp/annotator/pos/perceptron.py +263 -0
  159. sparknlp/annotator/sentence/__init__.py +17 -0
  160. sparknlp/annotator/sentence/sentence_detector.py +290 -0
  161. sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
  162. sparknlp/annotator/sentiment/__init__.py +17 -0
  163. sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
  164. sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
  165. sparknlp/annotator/seq2seq/__init__.py +35 -0
  166. sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
  167. sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
  168. sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
  169. sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
  170. sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  171. sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  172. sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
  173. sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
  174. sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  175. sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
  176. sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
  177. sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
  178. sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  179. sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  180. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  181. sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  182. sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  183. sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
  184. sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  185. sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
  186. sparknlp/annotator/similarity/__init__.py +0 -0
  187. sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
  188. sparknlp/annotator/spell_check/__init__.py +18 -0
  189. sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
  190. sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
  191. sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
  192. sparknlp/annotator/stemmer.py +79 -0
  193. sparknlp/annotator/stop_words_cleaner.py +190 -0
  194. sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
  195. sparknlp/annotator/token/__init__.py +19 -0
  196. sparknlp/annotator/token/chunk_tokenizer.py +118 -0
  197. sparknlp/annotator/token/recursive_tokenizer.py +205 -0
  198. sparknlp/annotator/token/regex_tokenizer.py +208 -0
  199. sparknlp/annotator/token/tokenizer.py +561 -0
  200. sparknlp/annotator/token2_chunk.py +76 -0
  201. sparknlp/annotator/ws/__init__.py +16 -0
  202. sparknlp/annotator/ws/word_segmenter.py +429 -0
  203. sparknlp/base/__init__.py +30 -0
  204. sparknlp/base/audio_assembler.py +95 -0
  205. sparknlp/base/doc2_chunk.py +169 -0
  206. sparknlp/base/document_assembler.py +164 -0
  207. sparknlp/base/embeddings_finisher.py +201 -0
  208. sparknlp/base/finisher.py +217 -0
  209. sparknlp/base/gguf_ranking_finisher.py +234 -0
  210. sparknlp/base/graph_finisher.py +125 -0
  211. sparknlp/base/has_recursive_fit.py +24 -0
  212. sparknlp/base/has_recursive_transform.py +22 -0
  213. sparknlp/base/image_assembler.py +172 -0
  214. sparknlp/base/light_pipeline.py +429 -0
  215. sparknlp/base/multi_document_assembler.py +164 -0
  216. sparknlp/base/prompt_assembler.py +207 -0
  217. sparknlp/base/recursive_pipeline.py +107 -0
  218. sparknlp/base/table_assembler.py +145 -0
  219. sparknlp/base/token_assembler.py +124 -0
  220. sparknlp/common/__init__.py +26 -0
  221. sparknlp/common/annotator_approach.py +41 -0
  222. sparknlp/common/annotator_model.py +47 -0
  223. sparknlp/common/annotator_properties.py +114 -0
  224. sparknlp/common/annotator_type.py +38 -0
  225. sparknlp/common/completion_post_processing.py +37 -0
  226. sparknlp/common/coverage_result.py +22 -0
  227. sparknlp/common/match_strategy.py +33 -0
  228. sparknlp/common/properties.py +1298 -0
  229. sparknlp/common/read_as.py +33 -0
  230. sparknlp/common/recursive_annotator_approach.py +35 -0
  231. sparknlp/common/storage.py +149 -0
  232. sparknlp/common/utils.py +39 -0
  233. sparknlp/functions.py +315 -5
  234. sparknlp/internal/__init__.py +1199 -0
  235. sparknlp/internal/annotator_java_ml.py +32 -0
  236. sparknlp/internal/annotator_transformer.py +37 -0
  237. sparknlp/internal/extended_java_wrapper.py +63 -0
  238. sparknlp/internal/params_getters_setters.py +71 -0
  239. sparknlp/internal/recursive.py +70 -0
  240. sparknlp/logging/__init__.py +15 -0
  241. sparknlp/logging/comet.py +467 -0
  242. sparknlp/partition/__init__.py +16 -0
  243. sparknlp/partition/partition.py +244 -0
  244. sparknlp/partition/partition_properties.py +902 -0
  245. sparknlp/partition/partition_transformer.py +200 -0
  246. sparknlp/pretrained/__init__.py +17 -0
  247. sparknlp/pretrained/pretrained_pipeline.py +158 -0
  248. sparknlp/pretrained/resource_downloader.py +216 -0
  249. sparknlp/pretrained/utils.py +35 -0
  250. sparknlp/reader/__init__.py +15 -0
  251. sparknlp/reader/enums.py +19 -0
  252. sparknlp/reader/pdf_to_text.py +190 -0
  253. sparknlp/reader/reader2doc.py +124 -0
  254. sparknlp/reader/reader2image.py +136 -0
  255. sparknlp/reader/reader2table.py +44 -0
  256. sparknlp/reader/reader_assembler.py +159 -0
  257. sparknlp/reader/sparknlp_reader.py +461 -0
  258. sparknlp/training/__init__.py +20 -0
  259. sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  260. sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
  261. sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  262. sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
  263. sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
  264. sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
  265. sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
  266. sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
  267. sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
  268. sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
  269. sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
  270. sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
  271. sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
  272. sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
  273. sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
  274. sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  275. sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
  276. sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  277. sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
  278. sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
  279. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
  280. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
  281. sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
  282. sparknlp/training/conll.py +150 -0
  283. sparknlp/training/conllu.py +103 -0
  284. sparknlp/training/pos.py +103 -0
  285. sparknlp/training/pub_tator.py +76 -0
  286. sparknlp/training/spacy_to_annotation.py +57 -0
  287. sparknlp/training/tfgraphs.py +5 -0
  288. sparknlp/upload_to_hub.py +149 -0
  289. sparknlp/util.py +51 -5
  290. com/__init__.pyc +0 -0
  291. com/__pycache__/__init__.cpython-36.pyc +0 -0
  292. com/johnsnowlabs/__init__.pyc +0 -0
  293. com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
  294. com/johnsnowlabs/nlp/__init__.pyc +0 -0
  295. com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
  296. spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
  297. spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
  298. sparknlp/__init__.pyc +0 -0
  299. sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
  300. sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
  301. sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
  302. sparknlp/__pycache__/base.cpython-36.pyc +0 -0
  303. sparknlp/__pycache__/common.cpython-36.pyc +0 -0
  304. sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
  305. sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
  306. sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
  307. sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
  308. sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
  309. sparknlp/__pycache__/training.cpython-36.pyc +0 -0
  310. sparknlp/__pycache__/util.cpython-36.pyc +0 -0
  311. sparknlp/annotation.pyc +0 -0
  312. sparknlp/annotator.py +0 -3006
  313. sparknlp/annotator.pyc +0 -0
  314. sparknlp/base.py +0 -347
  315. sparknlp/base.pyc +0 -0
  316. sparknlp/common.py +0 -193
  317. sparknlp/common.pyc +0 -0
  318. sparknlp/embeddings.py +0 -40
  319. sparknlp/embeddings.pyc +0 -0
  320. sparknlp/internal.py +0 -288
  321. sparknlp/internal.pyc +0 -0
  322. sparknlp/pretrained.py +0 -123
  323. sparknlp/pretrained.pyc +0 -0
  324. sparknlp/storage.py +0 -32
  325. sparknlp/storage.pyc +0 -0
  326. sparknlp/training.py +0 -62
  327. sparknlp/training.pyc +0 -0
  328. sparknlp/util.pyc +0 -0
  329. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,33 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains utility classes for reading resources."""
15
+
16
+
17
+ class ReadAs(object):
18
+ """Object that contains constants for how to read Spark Resources.
19
+
20
+ Possible values are:
21
+
22
+ ================= =======================================
23
+ Value Description
24
+ ================= =======================================
25
+ ``ReadAs.TEXT`` Read the resource as text.
26
+ ``ReadAs.SPARK`` Read the resource as a Spark DataFrame.
27
+ ``ReadAs.BINARY`` Read the resource as a binary file.
28
+ ================= =======================================
29
+ """
30
+ TEXT = "TEXT"
31
+ SPARK = "SPARK"
32
+ BINARY = "BINARY"
33
+
@@ -0,0 +1,35 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains base classes for recursive AnnotatorApproaches."""
15
+
16
+ from pyspark import keyword_only
17
+ from pyspark.ml.util import JavaMLWritable
18
+
19
+ import sparknlp.internal as _internal
20
+ from sparknlp.common import AnnotatorProperties
21
+
22
+
23
+ class RecursiveAnnotatorApproach(_internal.RecursiveEstimator, JavaMLWritable, _internal.AnnotatorJavaMLReadable,
24
+ AnnotatorProperties,
25
+ _internal.ParamsGettersSetters):
26
+ @keyword_only
27
+ def __init__(self, classname):
28
+ _internal.ParamsGettersSetters.__init__(self)
29
+ self.__class__._java_class_name = classname
30
+ self._java_obj = self._new_java_obj(classname, self.uid)
31
+ self._setDefault(lazyAnnotator=False)
32
+
33
+ def _create_model(self, java_model):
34
+ raise NotImplementedError('Please implement _create_model in %s' % self)
35
+
@@ -0,0 +1,149 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains utility classes for handling storage."""
15
+
16
+ from pyspark.ml.param import Param, Params, TypeConverters
17
+
18
+ from sparknlp.common.utils import ExternalResource
19
+ from sparknlp.common.properties import HasCaseSensitiveProperties
20
+ import sparknlp.internal as _internal
21
+
22
+
23
+ class HasStorageRef:
24
+ storageRef = Param(Params._dummy(), "storageRef",
25
+ "unique reference name for identification",
26
+ TypeConverters.toString)
27
+
28
+ def setStorageRef(self, value):
29
+ """Sets unique reference name for identification.
30
+
31
+ Parameters
32
+ ----------
33
+ value : str
34
+ Unique reference name for identification
35
+ """
36
+ return self._set(storageRef=value)
37
+
38
+ def getStorageRef(self):
39
+ """Gets unique reference name for identification.
40
+
41
+ Returns
42
+ -------
43
+ str
44
+ Unique reference name for identification
45
+ """
46
+ return self.getOrDefault("storageRef")
47
+
48
+
49
+ class HasStorageOptions:
50
+ includeStorage = Param(Params._dummy(),
51
+ "includeStorage",
52
+ "whether to include indexed storage in trained model",
53
+ typeConverter=TypeConverters.toBoolean)
54
+
55
+ enableInMemoryStorage = Param(Params._dummy(),
56
+ "enableInMemoryStorage",
57
+ "whether to load whole indexed storage in memory (in-memory lookup)",
58
+ typeConverter=TypeConverters.toBoolean)
59
+
60
+ def setIncludeStorage(self, value):
61
+ """Sets whether to include indexed storage in trained model.
62
+
63
+ Parameters
64
+ ----------
65
+ value : bool
66
+ Whether to include indexed storage in trained model
67
+ """
68
+ return self._set(includeStorage=value)
69
+
70
+ def getIncludeStorage(self):
71
+ """Gets whether to include indexed storage in trained model.
72
+
73
+ Returns
74
+ -------
75
+ bool
76
+ Whether to include indexed storage in trained model
77
+ """
78
+ return self.getOrDefault("includeStorage")
79
+
80
+ def setEnableInMemoryStorage(self, value):
81
+ """Sets whether to load whole indexed storage in memory (in-memory lookup)
82
+
83
+ Parameters
84
+ ----------
85
+ value : bool
86
+ Whether to load whole indexed storage in memory (in-memory lookup)
87
+ """
88
+ return self._set(enableInMemoryStorage=value)
89
+
90
+ def getEnableInMemoryStorage(self):
91
+ return self.getOrDefault("enableInMemoryStorage")
92
+
93
+
94
+ class HasStorageModel(HasStorageRef, HasCaseSensitiveProperties, HasStorageOptions):
95
+
96
+ def saveStorage(self, path, spark):
97
+ """Saves the current model to storage.
98
+
99
+ Parameters
100
+ ----------
101
+ path : str
102
+ Path for saving the model.
103
+ spark : :class:`pyspark.sql.SparkSession`
104
+ The current SparkSession
105
+ """
106
+ self._transfer_params_to_java()
107
+ self._java_obj.saveStorage(path, spark._jsparkSession, False)
108
+
109
+ @staticmethod
110
+ def loadStorage(path, spark, storage_ref):
111
+ raise NotImplementedError("AnnotatorModel with HasStorageModel did not implement 'loadStorage'")
112
+
113
+ @staticmethod
114
+ def loadStorages(path, spark, storage_ref, databases):
115
+ for database in databases:
116
+ _internal._StorageHelper(path, spark, database, storage_ref, within_storage=False)
117
+
118
+
119
+ class HasStorage(HasStorageRef, HasCaseSensitiveProperties, HasStorageOptions):
120
+ storagePath = Param(Params._dummy(),
121
+ "storagePath",
122
+ "path to file",
123
+ typeConverter=TypeConverters.identity)
124
+
125
+ def setStoragePath(self, path, read_as):
126
+ """Sets path to file.
127
+
128
+ Parameters
129
+ ----------
130
+ path : str
131
+ Path to file
132
+ read_as : str
133
+ How to interpret the file
134
+
135
+ Notes
136
+ -----
137
+ See :class:`ReadAs <sparknlp.common.ReadAs>` for reading options.
138
+ """
139
+ return self._set(storagePath=ExternalResource(path, read_as, {}))
140
+
141
+ def getStoragePath(self):
142
+ """Gets path to file.
143
+
144
+ Returns
145
+ -------
146
+ str
147
+ path to file
148
+ """
149
+ return self.getOrDefault("storagePath")
@@ -0,0 +1,39 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains utilities for annotators."""
15
+
16
+ from sparknlp.common.read_as import ReadAs
17
+ import sparknlp.internal as _internal
18
+
19
+
20
+ def ExternalResource(path, read_as=ReadAs.TEXT, options={}):
21
+ """Returns a representation fo an External Resource.
22
+
23
+ How the resource is read can be set with `read_as`.
24
+
25
+ Parameters
26
+ ----------
27
+ path : str
28
+ Path to the resource
29
+ read_as : str, optional
30
+ How to read the resource, by default ReadAs.TEXT
31
+ options : dict, optional
32
+ Options to read the resource, by default {}
33
+ """
34
+ return _internal._ExternalResource(path, read_as, options).apply()
35
+
36
+
37
+ def RegexRule(rule, identifier):
38
+ return _internal._RegexRule(rule, identifier).apply()
39
+
sparknlp/functions.py CHANGED
@@ -1,28 +1,295 @@
1
- from pyspark.sql.functions import udf
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Contains helper functions to assist in transforming Annotation results.
16
+ """
17
+
18
+ from pyspark.sql.functions import udf, array
2
19
  from pyspark.sql.types import *
3
20
  from pyspark.sql import DataFrame
4
21
  from sparknlp.annotation import Annotation
5
22
 
6
23
 
7
24
  def map_annotations(f, output_type: DataType):
25
+ """Creates a Spark UDF to map over an Annotator's results.
26
+
27
+ Parameters
28
+ ----------
29
+ f : function
30
+ The function to be applied over the results
31
+ output_type : :class:`pyspark.sql.types.DataType`
32
+ Output type of the data
33
+
34
+ Returns
35
+ -------
36
+ :func:`pyspark.sql.functions.udf`
37
+ Spark UserDefinedFunction (udf)
38
+
39
+ Examples
40
+ --------
41
+ >>> from sparknlp.pretrained import PretrainedPipeline
42
+ >>> explain_document_pipeline = PretrainedPipeline("explain_document_dl")
43
+ >>> data = spark.createDataFrame([["U.N. official Ekeus heads for Baghdad."]]).toDF("text")
44
+ >>> result = explain_document_pipeline.transform(data)
45
+
46
+ The array type must be provided in order to tell Spark the expected output
47
+ type of our column. We are using an Annotation array here.
48
+
49
+ >>> from sparknlp.functions import *
50
+ >>> def nnp_tokens(annotations: List[Row]):
51
+ ... return list(
52
+ ... filter(lambda annotation: annotation.result == 'NNP', annotations)
53
+ ... )
54
+ >>> result.select(
55
+ ... map_annotations(nnp_tokens, Annotation.arrayType())('pos').alias("nnp")
56
+ ... ).selectExpr("explode(nnp) as nnp").show(truncate=False)
57
+ +-----------------------------------------+
58
+ |nnp |
59
+ +-----------------------------------------+
60
+ |[pos, 0, 2, NNP, [word -> U.N], []] |
61
+ |[pos, 14, 18, NNP, [word -> Epeus], []] |
62
+ |[pos, 30, 36, NNP, [word -> Baghdad], []]|
63
+ +-----------------------------------------+
64
+ """
8
65
  return udf(
9
- lambda content: f(content),
66
+ lambda content: [ Annotation.toRow(a) for a in f([Annotation.fromRow(r) for r in content])],
10
67
  output_type
11
68
  )
12
69
 
70
+ def map_annotations_array(f, output_type: DataType):
71
+ """Creates a Spark UDF to map over an Annotator's array results.
72
+
73
+ Parameters
74
+ ----------
75
+ f : function
76
+ The function to be applied over the results
77
+ output_type : :class:`pyspark.sql.types.DataType`
78
+ Output type of the data
79
+
80
+ Returns
81
+ -------
82
+ :func:`pyspark.sql.functions.udf`
83
+ Spark UserDefinedFunction (udf)
84
+ """
85
+ return udf(
86
+ lambda cols: [Annotation.toRow(item) for item in f([Annotation.fromRow(r) for col in cols for r in col])],
87
+ output_type
88
+ )
13
89
 
14
90
  def map_annotations_strict(f):
91
+ """Creates a Spark UDF to map over an Annotator's results, for which the
92
+ return type is explicitly defined as a `Annotation.dataType()`.
93
+
94
+ Parameters
95
+ ----------
96
+ f : function
97
+ The function to be applied over the results
98
+
99
+ Returns
100
+ -------
101
+ :func:`pyspark.sql.functions.udf`
102
+ Spark UserDefinedFunction (udf)
103
+
104
+ Examples
105
+ --------
106
+ >>> from sparknlp.pretrained import PretrainedPipeline
107
+ >>> explain_document_pipeline = PretrainedPipeline("explain_document_dl")
108
+ >>> data = spark.createDataFrame([["U.N. official Ekeus heads for Baghdad."]]).toDF("text")
109
+ >>> result = explain_document_pipeline.transform(data)
110
+ >>> def nnp_tokens(annotations):
111
+ ... return list(
112
+ ... filter(lambda annotation: annotation.result == 'NNP', annotations)
113
+ ... )
114
+ >>> result.select(
115
+ ... map_annotations_strict(nnp_tokens)('pos').alias("nnp")
116
+ ... ).selectExpr("explode(nnp) as nnp").show(truncate=False)
117
+ +-----------------------------------------+
118
+ |nnp |
119
+ +-----------------------------------------+
120
+ |[pos, 0, 2, NNP, [word -> U.N], []] |
121
+ |[pos, 14, 18, NNP, [word -> Epeus], []] |
122
+ |[pos, 30, 36, NNP, [word -> Baghdad], []]|
123
+ +-----------------------------------------+
124
+ """
15
125
  return udf(
16
- lambda content: f(content),
126
+ lambda content: [ Annotation.toRow(a) for a in f([Annotation.fromRow(r) for r in content])],
17
127
  ArrayType(Annotation.dataType())
18
128
  )
19
129
 
20
130
 
21
- def map_annotations_col(dataframe: DataFrame, f, column, output_column, output_type):
22
- return dataframe.withColumn(output_column, map_annotations(f, output_type)(column))
131
+ def map_annotations_col(dataframe: DataFrame, f, column: str, output_column: str, annotatyon_type: str,
132
+ output_type: DataType = Annotation.arrayType()):
133
+ """Creates a Spark UDF to map over a column of Annotation results.
134
+
135
+ Parameters
136
+ ----------
137
+ dataframe : DataFrame
138
+ Input DataFrame
139
+ f : function
140
+ Function to apply to the column
141
+ column : str
142
+ Name of the input column
143
+ output_column : str
144
+ Name of the output column
145
+ annotatyon_type : str
146
+ Annotator type
147
+ output_type : DataType, optional
148
+ Output type, by default Annotation.arrayType()
149
+
150
+ Returns
151
+ -------
152
+ :class:`pyspark.sql.DataFrame`
153
+ Transformed DataFrame
154
+
155
+ Examples
156
+ --------
157
+ >>> from sparknlp.pretrained import PretrainedPipeline
158
+ >>> from sparknlp.functions import *
159
+ >>> explain_document_pipeline = PretrainedPipeline("explain_document_dl")
160
+ >>> data = spark.createDataFrame([["U.N. official Ekeus heads for Baghdad."]]).toDF("text")
161
+ >>> result = explain_document_pipeline.transform(data)
162
+ >>> chunks_df = map_annotations_col(
163
+ ... result,
164
+ ... lambda x: [
165
+ ... Annotation("chunk", a.begin, a.end, a.result, a.metadata, a.embeddings)
166
+ ... for a in x
167
+ ... ],
168
+ ... "pos",
169
+ ... "pos_chunk",
170
+ ... "chunk",
171
+ ... )
172
+ >>> chunks_df.selectExpr("explode(pos_chunk)").show()
173
+ +--------------------+
174
+ | col|
175
+ +--------------------+
176
+ |[chunk, 0, 2, NNP...|
177
+ |[chunk, 3, 3, ., ...|
178
+ |[chunk, 5, 12, JJ...|
179
+ |[chunk, 14, 18, N...|
180
+ |[chunk, 20, 24, V...|
181
+ |[chunk, 26, 28, I...|
182
+ |[chunk, 30, 36, N...|
183
+ |[chunk, 37, 37, ....|
184
+ +--------------------+
185
+ """
186
+ return dataframe.withColumn(output_column, map_annotations(f, output_type)(column).alias(output_column, metadata={
187
+ 'annotatorType': annotatyon_type}))
188
+
189
+ def map_annotations_cols(dataframe: DataFrame, f, columns: list, output_column: str, annotatyon_type: str,
190
+ output_type: DataType = Annotation.arrayType()):
191
+ """Creates a Spark UDF to map over multiple columns of Annotation results.
192
+
193
+ Parameters
194
+ ----------
195
+ dataframe : DataFrame
196
+ Input DataFrame
197
+ f : function
198
+ Function to apply to the column
199
+ columns : list
200
+ Name of the input column
201
+ output_column : str
202
+ Name of the output column
203
+ annotatyon_type : str
204
+ Annotator type
205
+ output_type : DataType, optional
206
+ Output type, by default Annotation.arrayType()
207
+
208
+ Returns
209
+ -------
210
+ :class:`pyspark.sql.DataFrame`
211
+ Transformed DataFrame
212
+
213
+ Examples
214
+ --------
215
+ >>> from sparknlp.pretrained import PretrainedPipeline
216
+ >>> from sparknlp.functions import *
217
+ >>> explain_document_pipeline = PretrainedPipeline("explain_document_dl")
218
+ >>> data = spark.createDataFrame([["U.N. official Ekeus heads for Baghdad."]]).toDF("text")
219
+ >>> result = explain_document_pipeline.transform(data)
220
+ >>> chunks_df = map_annotations_cols(
221
+ ... result,
222
+ ... lambda x: [
223
+ ... Annotation("tag", a.begin, a.end, a.result, a.metadata, a.embeddings)
224
+ ... for a in x
225
+ ... ],
226
+ ... ["pos", "ner"],
227
+ ... "tags",
228
+ ... "chunk"
229
+ ... )
230
+ >>> chunks_df.selectExpr("explode(tags)").show(truncate=False)
231
+ +-------------------------------------------+
232
+ |col |
233
+ +-------------------------------------------+
234
+ |[tag, 0, 2, NNP, [word -> U.N], []] |
235
+ |[tag, 3, 3, ., [word -> .], []] |
236
+ |[tag, 5, 12, JJ, [word -> official], []] |
237
+ |[tag, 14, 18, NNP, [word -> Epeus], []] |
238
+ |[tag, 20, 24, VBZ, [word -> heads], []] |
239
+ |[tag, 26, 28, IN, [word -> for], []] |
240
+ |[tag, 30, 36, NNP, [word -> Baghdad], []] |
241
+ |[tag, 37, 37, ., [word -> .], []] |
242
+ |[tag, 0, 2, B-ORG, [word -> U.N], []] |
243
+ |[tag, 3, 3, O, [word -> .], []] |
244
+ |[tag, 5, 12, O, [word -> official], []] |
245
+ |[tag, 14, 18, B-PER, [word -> Ekeus], []] |
246
+ |[tag, 20, 24, O, [word -> heads], []] |
247
+ |[tag, 26, 28, O, [word -> for], []] |
248
+ |[tag, 30, 36, B-LOC, [word -> Baghdad], []]|
249
+ |[tag, 37, 37, O, [word -> .], []] |
250
+ +-------------------------------------------+
251
+ """
252
+ return dataframe.withColumn(output_column, map_annotations_array(f, output_type)(array(*columns)).alias(output_column, metadata={
253
+ 'annotatorType': annotatyon_type}))
23
254
 
24
255
 
25
256
  def filter_by_annotations_col(dataframe, f, column):
257
+ """Applies a filter over a column of Annotations.
258
+
259
+ Parameters
260
+ ----------
261
+ dataframe : DataFrame
262
+ Input DataFrame
263
+ f : function
264
+ Filter function
265
+ column : str
266
+ Name of the column
267
+
268
+ Returns
269
+ -------
270
+ :class:`pyspark.sql.DataFrame`
271
+ Filtered DataFrame
272
+
273
+ Examples
274
+ --------
275
+ >>> from sparknlp.pretrained import PretrainedPipeline
276
+ >>> from sparknlp.functions import *
277
+ >>> explain_document_pipeline = PretrainedPipeline("explain_document_dl")
278
+ >>> data = spark.createDataFrame([["U.N. official Ekeus heads for Baghdad."]]).toDF("text")
279
+ >>> result = explain_document_pipeline.transform(data)
280
+ >>> def filter_pos(annotation: Annotation):
281
+ ... return annotation.result == "NNP"
282
+ >>> filter_by_annotations_col(
283
+ ... explode_annotations_col(result, "pos", "pos"), filter_pos, "pos"
284
+ ... ).select("pos").show(truncate=False)
285
+ +-----------------------------------------+
286
+ |pos |
287
+ +-----------------------------------------+
288
+ |[pos, 0, 2, NNP, [word -> U.N], []] |
289
+ |[pos, 14, 18, NNP, [word -> Epeus], []] |
290
+ |[pos, 30, 36, NNP, [word -> Baghdad], []]|
291
+ +-----------------------------------------+
292
+ """
26
293
  this_udf = udf(
27
294
  lambda content: f(content),
28
295
  BooleanType()
@@ -31,5 +298,48 @@ def filter_by_annotations_col(dataframe, f, column):
31
298
 
32
299
 
33
300
  def explode_annotations_col(dataframe: DataFrame, column, output_column):
301
+ """Explodes an Annotation column, putting each result onto a separate row.
302
+
303
+ Parameters
304
+ ----------
305
+ dataframe : DataFrame
306
+ The Spark DataFrame containing output Annotations
307
+ column : str
308
+ Name of the column
309
+ output_column : str
310
+ Name of the output column
311
+
312
+ Returns
313
+ -------
314
+ :class:`pyspark.sql.DataFrame`
315
+ Transformed DataFrame
316
+
317
+ Examples
318
+ --------
319
+ >>> from sparknlp.pretrained import PretrainedPipeline
320
+ >>> from sparknlp.functions import *
321
+ >>> explain_document_pipeline = PretrainedPipeline("explain_document_dl")
322
+ >>> data = spark.createDataFrame([["U.N. official Ekeus heads for Baghdad."]]).toDF("text")
323
+ >>> result = explain_document_pipeline.transform(data)
324
+ >>> result.select("pos.result").show(truncate=False)
325
+ +----------------------------------+
326
+ |result |
327
+ +----------------------------------+
328
+ |[NNP, ., JJ, NNP, VBZ, IN, NNP, .]|
329
+ +----------------------------------+
330
+ >>> explode_annotations_col(result, "pos", "pos").select("pos.result").show()
331
+ +------+
332
+ |result|
333
+ +------+
334
+ | NNP|
335
+ | .|
336
+ | JJ|
337
+ | NNP|
338
+ | VBZ|
339
+ | IN|
340
+ | NNP|
341
+ | .|
342
+ +------+
343
+ """
34
344
  from pyspark.sql.functions import explode
35
345
  return dataframe.withColumn(output_column, explode(column))