spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. com/johnsnowlabs/nlp/__init__.py +4 -2
  4. spark_nlp-6.2.1.dist-info/METADATA +362 -0
  5. spark_nlp-6.2.1.dist-info/RECORD +292 -0
  6. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
  7. sparknlp/__init__.py +281 -27
  8. sparknlp/annotation.py +137 -6
  9. sparknlp/annotation_audio.py +61 -0
  10. sparknlp/annotation_image.py +82 -0
  11. sparknlp/annotator/__init__.py +93 -0
  12. sparknlp/annotator/audio/__init__.py +16 -0
  13. sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
  14. sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
  15. sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
  16. sparknlp/annotator/chunk2_doc.py +85 -0
  17. sparknlp/annotator/chunker.py +137 -0
  18. sparknlp/annotator/classifier_dl/__init__.py +61 -0
  19. sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  20. sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
  21. sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
  22. sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
  23. sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  24. sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  25. sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
  26. sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
  27. sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
  28. sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
  29. sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
  30. sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
  31. sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
  32. sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
  33. sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  34. sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
  35. sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
  36. sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
  37. sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
  38. sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
  39. sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
  40. sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
  41. sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
  42. sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
  43. sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  44. sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
  45. sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
  46. sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
  47. sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
  48. sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
  49. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  50. sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
  51. sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  52. sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
  53. sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
  54. sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
  55. sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
  56. sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
  57. sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
  58. sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  59. sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
  60. sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
  61. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
  62. sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
  63. sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
  64. sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
  65. sparknlp/annotator/cleaners/__init__.py +15 -0
  66. sparknlp/annotator/cleaners/cleaner.py +202 -0
  67. sparknlp/annotator/cleaners/extractor.py +191 -0
  68. sparknlp/annotator/coref/__init__.py +1 -0
  69. sparknlp/annotator/coref/spanbert_coref.py +221 -0
  70. sparknlp/annotator/cv/__init__.py +29 -0
  71. sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
  72. sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
  73. sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
  74. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  75. sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
  76. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  77. sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
  78. sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  79. sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  80. sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  81. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  82. sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  83. sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
  84. sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
  85. sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
  86. sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
  87. sparknlp/annotator/dataframe_optimizer.py +216 -0
  88. sparknlp/annotator/date2_chunk.py +88 -0
  89. sparknlp/annotator/dependency/__init__.py +17 -0
  90. sparknlp/annotator/dependency/dependency_parser.py +294 -0
  91. sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
  92. sparknlp/annotator/document_character_text_splitter.py +228 -0
  93. sparknlp/annotator/document_normalizer.py +235 -0
  94. sparknlp/annotator/document_token_splitter.py +175 -0
  95. sparknlp/annotator/document_token_splitter_test.py +85 -0
  96. sparknlp/annotator/embeddings/__init__.py +45 -0
  97. sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
  98. sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
  99. sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
  100. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
  101. sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
  102. sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
  103. sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
  104. sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
  105. sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
  106. sparknlp/annotator/embeddings/doc2vec.py +352 -0
  107. sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
  108. sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
  109. sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
  110. sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
  111. sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
  112. sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
  113. sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
  114. sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  115. sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  116. sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
  117. sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
  118. sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
  119. sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  120. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  121. sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
  122. sparknlp/annotator/embeddings/word2vec.py +353 -0
  123. sparknlp/annotator/embeddings/word_embeddings.py +385 -0
  124. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
  125. sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
  126. sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
  127. sparknlp/annotator/er/__init__.py +16 -0
  128. sparknlp/annotator/er/entity_ruler.py +267 -0
  129. sparknlp/annotator/graph_extraction.py +368 -0
  130. sparknlp/annotator/keyword_extraction/__init__.py +16 -0
  131. sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
  132. sparknlp/annotator/ld_dl/__init__.py +16 -0
  133. sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
  134. sparknlp/annotator/lemmatizer.py +250 -0
  135. sparknlp/annotator/matcher/__init__.py +20 -0
  136. sparknlp/annotator/matcher/big_text_matcher.py +272 -0
  137. sparknlp/annotator/matcher/date_matcher.py +303 -0
  138. sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
  139. sparknlp/annotator/matcher/regex_matcher.py +221 -0
  140. sparknlp/annotator/matcher/text_matcher.py +290 -0
  141. sparknlp/annotator/n_gram_generator.py +141 -0
  142. sparknlp/annotator/ner/__init__.py +21 -0
  143. sparknlp/annotator/ner/ner_approach.py +94 -0
  144. sparknlp/annotator/ner/ner_converter.py +148 -0
  145. sparknlp/annotator/ner/ner_crf.py +397 -0
  146. sparknlp/annotator/ner/ner_dl.py +591 -0
  147. sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
  148. sparknlp/annotator/ner/ner_overwriter.py +166 -0
  149. sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
  150. sparknlp/annotator/normalizer.py +230 -0
  151. sparknlp/annotator/openai/__init__.py +16 -0
  152. sparknlp/annotator/openai/openai_completion.py +349 -0
  153. sparknlp/annotator/openai/openai_embeddings.py +106 -0
  154. sparknlp/annotator/param/__init__.py +17 -0
  155. sparknlp/annotator/param/classifier_encoder.py +98 -0
  156. sparknlp/annotator/param/evaluation_dl_params.py +130 -0
  157. sparknlp/annotator/pos/__init__.py +16 -0
  158. sparknlp/annotator/pos/perceptron.py +263 -0
  159. sparknlp/annotator/sentence/__init__.py +17 -0
  160. sparknlp/annotator/sentence/sentence_detector.py +290 -0
  161. sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
  162. sparknlp/annotator/sentiment/__init__.py +17 -0
  163. sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
  164. sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
  165. sparknlp/annotator/seq2seq/__init__.py +35 -0
  166. sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
  167. sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
  168. sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
  169. sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
  170. sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  171. sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  172. sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
  173. sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
  174. sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  175. sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
  176. sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
  177. sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
  178. sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  179. sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  180. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  181. sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  182. sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  183. sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
  184. sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  185. sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
  186. sparknlp/annotator/similarity/__init__.py +0 -0
  187. sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
  188. sparknlp/annotator/spell_check/__init__.py +18 -0
  189. sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
  190. sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
  191. sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
  192. sparknlp/annotator/stemmer.py +79 -0
  193. sparknlp/annotator/stop_words_cleaner.py +190 -0
  194. sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
  195. sparknlp/annotator/token/__init__.py +19 -0
  196. sparknlp/annotator/token/chunk_tokenizer.py +118 -0
  197. sparknlp/annotator/token/recursive_tokenizer.py +205 -0
  198. sparknlp/annotator/token/regex_tokenizer.py +208 -0
  199. sparknlp/annotator/token/tokenizer.py +561 -0
  200. sparknlp/annotator/token2_chunk.py +76 -0
  201. sparknlp/annotator/ws/__init__.py +16 -0
  202. sparknlp/annotator/ws/word_segmenter.py +429 -0
  203. sparknlp/base/__init__.py +30 -0
  204. sparknlp/base/audio_assembler.py +95 -0
  205. sparknlp/base/doc2_chunk.py +169 -0
  206. sparknlp/base/document_assembler.py +164 -0
  207. sparknlp/base/embeddings_finisher.py +201 -0
  208. sparknlp/base/finisher.py +217 -0
  209. sparknlp/base/gguf_ranking_finisher.py +234 -0
  210. sparknlp/base/graph_finisher.py +125 -0
  211. sparknlp/base/has_recursive_fit.py +24 -0
  212. sparknlp/base/has_recursive_transform.py +22 -0
  213. sparknlp/base/image_assembler.py +172 -0
  214. sparknlp/base/light_pipeline.py +429 -0
  215. sparknlp/base/multi_document_assembler.py +164 -0
  216. sparknlp/base/prompt_assembler.py +207 -0
  217. sparknlp/base/recursive_pipeline.py +107 -0
  218. sparknlp/base/table_assembler.py +145 -0
  219. sparknlp/base/token_assembler.py +124 -0
  220. sparknlp/common/__init__.py +26 -0
  221. sparknlp/common/annotator_approach.py +41 -0
  222. sparknlp/common/annotator_model.py +47 -0
  223. sparknlp/common/annotator_properties.py +114 -0
  224. sparknlp/common/annotator_type.py +38 -0
  225. sparknlp/common/completion_post_processing.py +37 -0
  226. sparknlp/common/coverage_result.py +22 -0
  227. sparknlp/common/match_strategy.py +33 -0
  228. sparknlp/common/properties.py +1298 -0
  229. sparknlp/common/read_as.py +33 -0
  230. sparknlp/common/recursive_annotator_approach.py +35 -0
  231. sparknlp/common/storage.py +149 -0
  232. sparknlp/common/utils.py +39 -0
  233. sparknlp/functions.py +315 -5
  234. sparknlp/internal/__init__.py +1199 -0
  235. sparknlp/internal/annotator_java_ml.py +32 -0
  236. sparknlp/internal/annotator_transformer.py +37 -0
  237. sparknlp/internal/extended_java_wrapper.py +63 -0
  238. sparknlp/internal/params_getters_setters.py +71 -0
  239. sparknlp/internal/recursive.py +70 -0
  240. sparknlp/logging/__init__.py +15 -0
  241. sparknlp/logging/comet.py +467 -0
  242. sparknlp/partition/__init__.py +16 -0
  243. sparknlp/partition/partition.py +244 -0
  244. sparknlp/partition/partition_properties.py +902 -0
  245. sparknlp/partition/partition_transformer.py +200 -0
  246. sparknlp/pretrained/__init__.py +17 -0
  247. sparknlp/pretrained/pretrained_pipeline.py +158 -0
  248. sparknlp/pretrained/resource_downloader.py +216 -0
  249. sparknlp/pretrained/utils.py +35 -0
  250. sparknlp/reader/__init__.py +15 -0
  251. sparknlp/reader/enums.py +19 -0
  252. sparknlp/reader/pdf_to_text.py +190 -0
  253. sparknlp/reader/reader2doc.py +124 -0
  254. sparknlp/reader/reader2image.py +136 -0
  255. sparknlp/reader/reader2table.py +44 -0
  256. sparknlp/reader/reader_assembler.py +159 -0
  257. sparknlp/reader/sparknlp_reader.py +461 -0
  258. sparknlp/training/__init__.py +20 -0
  259. sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  260. sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
  261. sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  262. sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
  263. sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
  264. sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
  265. sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
  266. sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
  267. sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
  268. sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
  269. sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
  270. sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
  271. sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
  272. sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
  273. sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
  274. sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  275. sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
  276. sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  277. sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
  278. sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
  279. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
  280. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
  281. sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
  282. sparknlp/training/conll.py +150 -0
  283. sparknlp/training/conllu.py +103 -0
  284. sparknlp/training/pos.py +103 -0
  285. sparknlp/training/pub_tator.py +76 -0
  286. sparknlp/training/spacy_to_annotation.py +57 -0
  287. sparknlp/training/tfgraphs.py +5 -0
  288. sparknlp/upload_to_hub.py +149 -0
  289. sparknlp/util.py +51 -5
  290. com/__init__.pyc +0 -0
  291. com/__pycache__/__init__.cpython-36.pyc +0 -0
  292. com/johnsnowlabs/__init__.pyc +0 -0
  293. com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
  294. com/johnsnowlabs/nlp/__init__.pyc +0 -0
  295. com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
  296. spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
  297. spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
  298. sparknlp/__init__.pyc +0 -0
  299. sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
  300. sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
  301. sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
  302. sparknlp/__pycache__/base.cpython-36.pyc +0 -0
  303. sparknlp/__pycache__/common.cpython-36.pyc +0 -0
  304. sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
  305. sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
  306. sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
  307. sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
  308. sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
  309. sparknlp/__pycache__/training.cpython-36.pyc +0 -0
  310. sparknlp/__pycache__/util.cpython-36.pyc +0 -0
  311. sparknlp/annotation.pyc +0 -0
  312. sparknlp/annotator.py +0 -3006
  313. sparknlp/annotator.pyc +0 -0
  314. sparknlp/base.py +0 -347
  315. sparknlp/base.pyc +0 -0
  316. sparknlp/common.py +0 -193
  317. sparknlp/common.pyc +0 -0
  318. sparknlp/embeddings.py +0 -40
  319. sparknlp/embeddings.pyc +0 -0
  320. sparknlp/internal.py +0 -288
  321. sparknlp/internal.pyc +0 -0
  322. sparknlp/pretrained.py +0 -123
  323. sparknlp/pretrained.pyc +0 -0
  324. sparknlp/storage.py +0 -32
  325. sparknlp/storage.pyc +0 -0
  326. sparknlp/training.py +0 -62
  327. sparknlp/training.pyc +0 -0
  328. sparknlp/util.pyc +0 -0
  329. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,267 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for the EntityRuler."""
15
+
16
+ from sparknlp.common import *
17
+
18
+
19
+ class EntityRulerApproach(AnnotatorApproach, HasStorage):
20
+ """Fits an Annotator to match exact strings or regex patterns provided in a
21
+ file against a Document and assigns them an named entity. The definitions
22
+ can contain any number of named entities.
23
+
24
+ There are multiple ways and formats to set the extraction resource. It is
25
+ possible to set it either as a "JSON", "JSONL" or "CSV" file. A path to the
26
+ file needs to be provided to ``setPatternsResource``. The file format needs
27
+ to be set as the "format" field in the ``option`` parameter map and
28
+ depending on the file type, additional parameters might need to be set.
29
+
30
+ If the file is in a JSON format, then the rule definitions need to be given
31
+ in a list with the fields "id", "label" and "patterns"::
32
+
33
+ [
34
+ {
35
+ "id": "person-regex",
36
+ "label": "PERSON",
37
+ "patterns": ["\\w+\\s\\w+", "\\w+-\\w+"]
38
+ },
39
+ {
40
+ "id": "locations-words",
41
+ "label": "LOCATION",
42
+ "patterns": ["Winterfell"]
43
+ }
44
+ ]
45
+
46
+ The same fields also apply to a file in the JSONL format::
47
+
48
+ {"id": "names-with-j", "label": "PERSON", "patterns": ["Jon", "John", "John Snow"]}
49
+ {"id": "names-with-s", "label": "PERSON", "patterns": ["Stark", "Snow"]}
50
+ {"id": "names-with-e", "label": "PERSON", "patterns": ["Eddard", "Eddard Stark"]}
51
+
52
+ In order to use a CSV file, an additional parameter "delimiter" needs to be
53
+ set. In this case, the delimiter might be set by using
54
+ ``.setPatternsResource("patterns.csv", ReadAs.TEXT, {"format": "csv", "delimiter": "|")})``::
55
+
56
+ PERSON|Jon
57
+ PERSON|John
58
+ PERSON|John Snow
59
+ LOCATION|Winterfell
60
+
61
+ ====================== ======================
62
+ Input Annotation types Output Annotation type
63
+ ====================== ======================
64
+ ``DOCUMENT, TOKEN`` ``CHUNK``
65
+ ====================== ======================
66
+
67
+ Parameters
68
+ ----------
69
+ patternsResource
70
+ Resource in JSON or CSV format to map entities to patterns
71
+ useStorage
72
+ Whether to use RocksDB storage to serialize patterns
73
+
74
+ Examples
75
+ --------
76
+ >>> import sparknlp
77
+ >>> from sparknlp.base import *
78
+ >>> from sparknlp.annotator import *
79
+ >>> from sparknlp.common import *
80
+ >>> from pyspark.ml import Pipeline
81
+
82
+ In this example, the entities file as the form of::
83
+
84
+ PERSON|Jon
85
+ PERSON|John
86
+ PERSON|John Snow
87
+ LOCATION|Winterfell
88
+
89
+ where each line represents an entity and the associated string delimited by "|".
90
+
91
+ >>> documentAssembler = DocumentAssembler() \\
92
+ ... .setInputCol("text") \\
93
+ ... .setOutputCol("document")
94
+ >>> tokenizer = Tokenizer() \\
95
+ ... .setInputCols(["document"]) \\
96
+ ... .setOutputCol("token")
97
+ >>> entityRuler = EntityRulerApproach() \\
98
+ ... .setInputCols(["document", "token"]) \\
99
+ ... .setOutputCol("entities") \\
100
+ ... .setPatternsResource(
101
+ ... "patterns.csv",
102
+ ... ReadAs.TEXT,
103
+ ... {"format": "csv", "delimiter": "\\\\|"}
104
+ ... )
105
+ >>> pipeline = Pipeline().setStages([
106
+ ... documentAssembler,
107
+ ... tokenizer,
108
+ ... entityRuler
109
+ ... ])
110
+ >>> data = spark.createDataFrame([["Jon Snow wants to be lord of Winterfell."]]).toDF("text")
111
+ >>> result = pipeline.fit(data).transform(data)
112
+ >>> result.selectExpr("explode(entities)").show(truncate=False)
113
+ +--------------------------------------------------------------------+
114
+ |col |
115
+ +--------------------------------------------------------------------+
116
+ |[chunk, 0, 2, Jon, [entity -> PERSON, sentence -> 0], []] |
117
+ |[chunk, 29, 38, Winterfell, [entity -> LOCATION, sentence -> 0], []]|
118
+ +--------------------------------------------------------------------+
119
+ """
120
+ name = "EntityRulerApproach"
121
+
122
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT]
123
+ optionalInputAnnotatorTypes = [AnnotatorType.TOKEN]
124
+
125
+ outputAnnotatorType = AnnotatorType.CHUNK
126
+
127
+ patternsResource = Param(Params._dummy(),
128
+ "patternsResource",
129
+ "Resource in JSON or CSV format to map entities to patterns",
130
+ typeConverter=TypeConverters.identity)
131
+
132
+ useStorage = Param(Params._dummy(),
133
+ "useStorage",
134
+ "Whether to use RocksDB storage to serialize patterns",
135
+ typeConverter=TypeConverters.toBoolean)
136
+
137
+ sentenceMatch = Param(Params._dummy(),
138
+ "sentenceMatch",
139
+ "Whether to find match at sentence level. True: sentence level. False: token level",
140
+ typeConverter=TypeConverters.toBoolean)
141
+
142
+ alphabet = Param(Params._dummy(),
143
+ "alphabet",
144
+ "Alphabet resource path to plain text file with all characters in a given alphabet",
145
+ typeConverter=TypeConverters.identity)
146
+
147
+ @keyword_only
148
+ def __init__(self):
149
+ super(EntityRulerApproach, self).__init__(
150
+ classname="com.johnsnowlabs.nlp.annotators.er.EntityRulerApproach")
151
+
152
+ def setPatternsResource(self, path, read_as=ReadAs.TEXT, options={"format": "JSON"}):
153
+ """Sets Resource in JSON or CSV format to map entities to patterns.
154
+
155
+ Parameters
156
+ ----------
157
+ path : str
158
+ Path to the resource
159
+ read_as : str, optional
160
+ How to interpret the resource, by default ReadAs.TEXT
161
+ options : dict, optional
162
+ Options for parsing the resource, by default {"format": "JSON"}
163
+ """
164
+ return self._set(patternsResource=ExternalResource(path, read_as, options))
165
+
166
+ def setUseStorage(self, value):
167
+ """Sets whether to use RocksDB storage to serialize patterns.
168
+
169
+ Parameters
170
+ ----------
171
+ value : bool
172
+ Whether to use RocksDB storage to serialize patterns.
173
+ """
174
+ return self._set(useStorage=value)
175
+
176
+ def setSentenceMatch(self, value):
177
+ """Sets whether to find match at sentence level.
178
+
179
+ Parameters
180
+ ----------
181
+ value : bool
182
+ True: sentence level. False: token level
183
+ """
184
+ return self._set(sentenceMatch=value)
185
+
186
+ def setAlphabetResource(self, path):
187
+ """Alphabet Resource (a simple plain text with all language characters)
188
+
189
+ Parameters
190
+ ----------
191
+ path : str
192
+ Path to the resource
193
+ """
194
+ return self._set(alphabet=ExternalResource(path, read_as=ReadAs.TEXT, options={}))
195
+
196
+ def _create_model(self, java_model):
197
+ return EntityRulerModel(java_model=java_model)
198
+
199
+
200
+ class EntityRulerModel(AnnotatorModel, HasStorageModel):
201
+ """Instantiated model of the EntityRulerApproach.
202
+ For usage and examples see the documentation of the main class.
203
+
204
+ ====================== ======================
205
+ Input Annotation types Output Annotation type
206
+ ====================== ======================
207
+ ``DOCUMENT, TOKEN`` ``CHUNK``
208
+ ====================== ======================
209
+ """
210
+ name = "EntityRulerModel"
211
+ database = ['ENTITY_PATTERNS']
212
+
213
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT]
214
+ optionalInputAnnotatorTypes = [AnnotatorType.TOKEN]
215
+
216
+ outputAnnotatorType = AnnotatorType.CHUNK
217
+
218
+ autoMode = Param(
219
+ Params._dummy(),
220
+ "autoMode",
221
+ "Enable built-in regex presets that combine related entity patterns (e.g., 'communication_entities', 'network_entities', 'media_entities', etc.).",
222
+ typeConverter=TypeConverters.toString
223
+ )
224
+
225
+ extractEntities = Param(
226
+ Params._dummy(),
227
+ "extractEntities",
228
+ "List of entity types to extract. If not set, all entities in the active autoMode or from regexPatterns are used.",
229
+ typeConverter=TypeConverters.toListString
230
+ )
231
+
232
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.er.EntityRulerModel", java_model=None):
233
+ super(EntityRulerModel, self).__init__(
234
+ classname=classname,
235
+ java_model=java_model
236
+ )
237
+
238
+ @staticmethod
239
+ def pretrained(name, lang="en", remote_loc=None):
240
+ from sparknlp.pretrained import ResourceDownloader
241
+ return ResourceDownloader.downloadModel(EntityRulerModel, name, lang, remote_loc)
242
+
243
+ @staticmethod
244
+ def loadStorage(path, spark, storage_ref):
245
+ HasStorageModel.loadStorages(path, spark, storage_ref, EntityRulerModel.database)
246
+
247
+
248
+ def setAutoMode(self, value):
249
+ """Sets the auto mode for predefined regex entity groups.
250
+
251
+ Parameters
252
+ ----------
253
+ value : str
254
+ Name of the auto mode to activate (e.g., 'communication_entities', 'network_entities', etc.)
255
+ """
256
+ return self._set(autoMode=value)
257
+
258
+
259
+ def setExtractEntities(self, value):
260
+ """Sets specific entities to extract, filtering only those defined in regexPatterns or autoMode.
261
+
262
+ Parameters
263
+ ----------
264
+ value : list[str]
265
+ List of entity names to extract, e.g., ['EMAIL_ADDRESS_PATTERN', 'IPV4_PATTERN'].
266
+ """
267
+ return self._set(extractEntities=value)
@@ -0,0 +1,368 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for GraphExtraction."""
15
+ from sparknlp.common import *
16
+
17
+
18
+ class GraphExtraction(AnnotatorModel):
19
+ """Extracts a dependency graph between entities.
20
+
21
+ The GraphExtraction class takes e.g. extracted entities from a
22
+ :class:`.NerDLModel` and creates a dependency tree which describes how the
23
+ entities relate to each other. For that a triple store format is used. Nodes
24
+ represent the entities and the edges represent the relations between those
25
+ entities. The graph can then be used to find relevant relationships between
26
+ words.
27
+
28
+ Both the :class:`.DependencyParserModel` and
29
+ :class:`.TypedDependencyParserModel` need to be
30
+ present in the pipeline. There are two ways to set them:
31
+
32
+ #. Both Annotators are present in the pipeline already. The dependencies are
33
+ taken implicitly from these two Annotators.
34
+ #. Setting :meth:`.setMergeEntities` to ``True`` will download the
35
+ default pretrained models for those two Annotators automatically. The
36
+ specific models can also be set with :meth:`.setDependencyParserModel`
37
+ and :meth:`.setTypedDependencyParserModel`:
38
+
39
+ >>> graph_extraction = GraphExtraction() \\
40
+ ... .setInputCols(["document", "token", "ner"]) \\
41
+ ... .setOutputCol("graph") \\
42
+ ... .setRelationshipTypes(["prefer-LOC"]) \\
43
+ ... .setMergeEntities(True)
44
+ >>> #.setDependencyParserModel(["dependency_conllu", "en", "public/models"])
45
+ >>> #.setTypedDependencyParserModel(["dependency_typed_conllu", "en", "public/models"])
46
+
47
+ ================================= ======================
48
+ Input Annotation types Output Annotation type
49
+ ================================= ======================
50
+ ``DOCUMENT, TOKEN, NAMED_ENTITY`` ``NODE``
51
+ ================================= ======================
52
+
53
+ Parameters
54
+ ----------
55
+ relationshipTypes
56
+ Paths to find between a pair of token and entity
57
+ entityTypes
58
+ Paths to find between a pair of entities
59
+ explodeEntities
60
+ When set to true find paths between entities
61
+ rootTokens
62
+ Tokens to be consider as root to start traversing the paths. Use it
63
+ along with explodeEntities
64
+ maxSentenceSize
65
+ Maximum sentence size that the annotator will process, by default 1000.
66
+ Above this, the sentence is skipped
67
+ minSentenceSize
68
+ Minimum sentence size that the annotator will process, by default 2.
69
+ Below this, the sentence is skipped.
70
+ mergeEntities
71
+ Merge same neighboring entities as a single token
72
+ includeEdges
73
+ Whether to include edges when building paths
74
+ delimiter
75
+ Delimiter symbol used for path output
76
+ posModel
77
+ Coordinates (name, lang, remoteLoc) to a pretrained POS model
78
+ dependencyParserModel
79
+ Coordinates (name, lang, remoteLoc) to a pretrained Dependency Parser
80
+ model
81
+ typedDependencyParserModel
82
+ Coordinates (name, lang, remoteLoc) to a pretrained Typed Dependency
83
+ Parser model
84
+
85
+ Examples
86
+ --------
87
+ >>> import sparknlp
88
+ >>> from sparknlp.base import *
89
+ >>> from sparknlp.annotator import *
90
+ >>> from pyspark.ml import Pipeline
91
+ >>> documentAssembler = DocumentAssembler() \\
92
+ ... .setInputCol("text") \\
93
+ ... .setOutputCol("document")
94
+ >>> sentence = SentenceDetector() \\
95
+ ... .setInputCols(["document"]) \\
96
+ ... .setOutputCol("sentence")
97
+ >>> tokenizer = Tokenizer() \\
98
+ ... .setInputCols(["sentence"]) \\
99
+ ... .setOutputCol("token")
100
+ >>> embeddings = WordEmbeddingsModel.pretrained() \\
101
+ ... .setInputCols(["sentence", "token"]) \\
102
+ ... .setOutputCol("embeddings")
103
+ >>> nerTagger = NerDLModel.pretrained() \\
104
+ ... .setInputCols(["sentence", "token", "embeddings"]) \\
105
+ ... .setOutputCol("ner")
106
+ >>> posTagger = PerceptronModel.pretrained() \\
107
+ ... .setInputCols(["sentence", "token"]) \\
108
+ ... .setOutputCol("pos")
109
+ >>> dependencyParser = DependencyParserModel.pretrained() \\
110
+ ... .setInputCols(["sentence", "pos", "token"]) \\
111
+ ... .setOutputCol("dependency")
112
+ >>> typedDependencyParser = TypedDependencyParserModel.pretrained() \\
113
+ ... .setInputCols(["dependency", "pos", "token"]) \\
114
+ ... .setOutputCol("dependency_type")
115
+ >>> graph_extraction = GraphExtraction() \\
116
+ ... .setInputCols(["document", "token", "ner"]) \\
117
+ ... .setOutputCol("graph") \\
118
+ ... .setRelationshipTypes(["prefer-LOC"])
119
+ >>> pipeline = Pipeline().setStages([
120
+ ... documentAssembler,
121
+ ... sentence,
122
+ ... tokenizer,
123
+ ... embeddings,
124
+ ... nerTagger,
125
+ ... posTagger,
126
+ ... dependencyParser,
127
+ ... typedDependencyParser,
128
+ ... graph_extraction
129
+ ... ])
130
+ >>> data = spark.createDataFrame([["You and John prefer the morning flight through Denver"]]).toDF("text")
131
+ >>> result = pipeline.fit(data).transform(data)
132
+ >>> result.select("graph").show(truncate=False)
133
+ +-----------------------------------------------------------------------------------------------------------------+
134
+ |graph |
135
+ +-----------------------------------------------------------------------------------------------------------------+
136
+ |[[node, 13, 18, prefer, [relationship -> prefer,LOC, path1 -> prefer,nsubj,morning,flat,flight,flat,Denver], []]]|
137
+ +-----------------------------------------------------------------------------------------------------------------+
138
+
139
+ See Also
140
+ --------
141
+ GraphFinisher : to output the paths in a more generic format, like RDF
142
+ """
143
+ name = "GraphExtraction"
144
+
145
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN, AnnotatorType.NAMED_ENTITY]
146
+
147
+ optionalInputAnnotatorTypes = [AnnotatorType.DEPENDENCY, AnnotatorType.LABELED_DEPENDENCY]
148
+
149
+ outputAnnotatorType = AnnotatorType.NODE
150
+
151
+ relationshipTypes = Param(Params._dummy(),
152
+ "relationshipTypes",
153
+ "Find paths between a pair of token and entity",
154
+ typeConverter=TypeConverters.toListString)
155
+
156
+ entityTypes = Param(Params._dummy(),
157
+ "entityTypes",
158
+ "Find paths between a pair of entities",
159
+ typeConverter=TypeConverters.toListString)
160
+
161
+ explodeEntities = Param(Params._dummy(),
162
+ "explodeEntities",
163
+ "When set to true find paths between entities",
164
+ typeConverter=TypeConverters.toBoolean)
165
+
166
+ rootTokens = Param(Params._dummy(),
167
+ "rootTokens",
168
+ "Tokens to be consider as root to start traversing the paths. Use it along with explodeEntities",
169
+ typeConverter=TypeConverters.toListString)
170
+
171
+ maxSentenceSize = Param(Params._dummy(),
172
+ "maxSentenceSize",
173
+ "Maximum sentence size that the annotator will process. Above this, the sentence is skipped",
174
+ typeConverter=TypeConverters.toInt)
175
+
176
+ minSentenceSize = Param(Params._dummy(),
177
+ "minSentenceSize",
178
+ "Minimum sentence size that the annotator will process. Above this, the sentence is skipped",
179
+ typeConverter=TypeConverters.toInt)
180
+
181
+ mergeEntities = Param(Params._dummy(),
182
+ "mergeEntities",
183
+ "Merge same neighboring entities as a single token",
184
+ typeConverter=TypeConverters.toBoolean)
185
+
186
+ mergeEntitiesIOBFormat = Param(Params._dummy(),
187
+ "mergeEntitiesIOBFormat",
188
+ "IOB format to apply when merging entities",
189
+ typeConverter=TypeConverters.toString)
190
+
191
+ includeEdges = Param(Params._dummy(),
192
+ "includeEdges",
193
+ "Whether to include edges when building paths",
194
+ typeConverter=TypeConverters.toBoolean)
195
+
196
+ delimiter = Param(Params._dummy(),
197
+ "delimiter",
198
+ "Delimiter symbol used for path output",
199
+ typeConverter=TypeConverters.toString)
200
+
201
+ posModel = Param(Params._dummy(),
202
+ "posModel",
203
+ "Coordinates (name, lang, remoteLoc) to a pretrained POS model",
204
+ typeConverter=TypeConverters.toListString)
205
+
206
+ dependencyParserModel = Param(Params._dummy(),
207
+ "dependencyParserModel",
208
+ "Coordinates (name, lang, remoteLoc) to a pretrained Dependency Parser model",
209
+ typeConverter=TypeConverters.toListString)
210
+
211
+ typedDependencyParserModel = Param(Params._dummy(),
212
+ "typedDependencyParserModel",
213
+ "Coordinates (name, lang, remoteLoc) to a pretrained Typed Dependency Parser model",
214
+ typeConverter=TypeConverters.toListString)
215
+
216
+ def setRelationshipTypes(self, value):
217
+ """Sets paths to find between a pair of token and entity.
218
+
219
+ Parameters
220
+ ----------
221
+ value : List[str]
222
+ Paths to find between a pair of token and entity
223
+ """
224
+ return self._set(relationshipTypes=value)
225
+
226
+ def setEntityTypes(self, value):
227
+ """Sets paths to find between a pair of entities.
228
+
229
+ Parameters
230
+ ----------
231
+ value : List[str]
232
+ Paths to find between a pair of entities
233
+ """
234
+ return self._set(entityTypes=value)
235
+
236
+ def setExplodeEntities(self, value):
237
+ """Sets whether to find paths between entities.
238
+
239
+ Parameters
240
+ ----------
241
+ value : bool
242
+ Whether to find paths between entities.
243
+ """
244
+ return self._set(explodeEntities=value)
245
+
246
+ def setRootTokens(self, value):
247
+ """Sets tokens to be considered as the root to start traversing the paths.
248
+
249
+ Use it along with explodeEntities.
250
+
251
+ Parameters
252
+ ----------
253
+ value : List[str]
254
+ Sets Tokens to be consider as root to start traversing the paths.
255
+ """
256
+ return self._set(rootTokens=value)
257
+
258
+ def setMaxSentenceSize(self, value):
259
+ """Sets Maximum sentence size that the annotator will process, by
260
+ default 1000.
261
+
262
+ Above this, the sentence is skipped.
263
+
264
+ Parameters
265
+ ----------
266
+ value : int
267
+ Maximum sentence size that the annotator will process
268
+ """
269
+ return self._set(maxSentenceSize=value)
270
+
271
+ def setMinSentenceSize(self, value):
272
+ """Sets Minimum sentence size that the annotator will process, by
273
+ default 2.
274
+
275
+ Below this, the sentence is skipped.
276
+
277
+ Parameters
278
+ ----------
279
+ value : int
280
+ Minimum sentence size that the annotator will process
281
+ """
282
+ return self._set(minSentenceSize=value)
283
+
284
+ def setMergeEntities(self, value):
285
+ """Sets whether to merge same neighboring entities as a single token.
286
+
287
+ Parameters
288
+ ----------
289
+ value : bool
290
+ Whether to merge same neighboring entities as a single token.
291
+ """
292
+ return self._set(mergeEntities=value)
293
+
294
+ def setMergeEntitiesIOBFormat(self, value):
295
+ """Sets IOB format to apply when merging entities.
296
+
297
+ Parameters
298
+ ----------
299
+ value : str
300
+ IOB format to apply when merging entities. Values IOB or IOB2
301
+ """
302
+ return self._set(mergeEntitiesIOBFormat=value)
303
+
304
+ def setIncludeEdges(self, value):
305
+ """Sets whether to include edges when building paths.
306
+
307
+ Parameters
308
+ ----------
309
+ value : bool
310
+ Whether to include edges when building paths
311
+ """
312
+ return self._set(includeEdges=value)
313
+
314
+ def setDelimiter(self, value):
315
+ """Sets delimiter symbol used for path output.
316
+
317
+ Parameters
318
+ ----------
319
+ value : str
320
+ Delimiter symbol used for path output
321
+ """
322
+ return self._set(delimiter=value)
323
+
324
+ def setPosModel(self, value):
325
+ """Sets Coordinates (name, lang, remoteLoc) to a pretrained POS model.
326
+
327
+ Parameters
328
+ ----------
329
+ value : List[str]
330
+ Coordinates (name, lang, remoteLoc) to a pretrained POS model
331
+ """
332
+ return self._set(posModel=value)
333
+
334
+ def setDependencyParserModel(self, value):
335
+ """Sets Coordinates (name, lang, remoteLoc) to a pretrained Dependency
336
+ Parser model.
337
+
338
+ Parameters
339
+ ----------
340
+ value : List[str]
341
+ Coordinates (name, lang, remoteLoc) to a pretrained Dependency
342
+ Parser model
343
+ """
344
+ return self._set(dependencyParserModel=value)
345
+
346
+ def setTypedDependencyParserModel(self, value):
347
+ """Sets Coordinates (name, lang, remoteLoc) to a pretrained Typed
348
+ Dependency Parser model.
349
+
350
+ Parameters
351
+ ----------
352
+ value : List[str]
353
+ Coordinates (name, lang, remoteLoc) to a pretrained Typed Dependency
354
+ Parser model
355
+ """
356
+ return self._set(typedDependencyParserModel=value)
357
+
358
+ @keyword_only
359
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.GraphExtraction", java_model=None):
360
+ super(GraphExtraction, self).__init__(
361
+ classname=classname,
362
+ java_model=java_model
363
+ )
364
+ self._setDefault(
365
+ maxSentenceSize=1000,
366
+ minSentenceSize=2
367
+ )
368
+
@@ -0,0 +1,16 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Module of annotators for keyword extraction."""
16
+ from sparknlp.annotator.keyword_extraction.yake_keyword_extraction import *