spark-nlp 2.6.3rc1__py2.py3-none-any.whl → 6.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (329) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. com/johnsnowlabs/nlp/__init__.py +4 -2
  4. spark_nlp-6.2.1.dist-info/METADATA +362 -0
  5. spark_nlp-6.2.1.dist-info/RECORD +292 -0
  6. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/WHEEL +1 -1
  7. sparknlp/__init__.py +281 -27
  8. sparknlp/annotation.py +137 -6
  9. sparknlp/annotation_audio.py +61 -0
  10. sparknlp/annotation_image.py +82 -0
  11. sparknlp/annotator/__init__.py +93 -0
  12. sparknlp/annotator/audio/__init__.py +16 -0
  13. sparknlp/annotator/audio/hubert_for_ctc.py +188 -0
  14. sparknlp/annotator/audio/wav2vec2_for_ctc.py +161 -0
  15. sparknlp/annotator/audio/whisper_for_ctc.py +251 -0
  16. sparknlp/annotator/chunk2_doc.py +85 -0
  17. sparknlp/annotator/chunker.py +137 -0
  18. sparknlp/annotator/classifier_dl/__init__.py +61 -0
  19. sparknlp/annotator/classifier_dl/albert_for_multiple_choice.py +161 -0
  20. sparknlp/annotator/classifier_dl/albert_for_question_answering.py +172 -0
  21. sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py +201 -0
  22. sparknlp/annotator/classifier_dl/albert_for_token_classification.py +179 -0
  23. sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
  24. sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py +225 -0
  25. sparknlp/annotator/classifier_dl/bert_for_multiple_choice.py +161 -0
  26. sparknlp/annotator/classifier_dl/bert_for_question_answering.py +168 -0
  27. sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py +202 -0
  28. sparknlp/annotator/classifier_dl/bert_for_token_classification.py +177 -0
  29. sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py +212 -0
  30. sparknlp/annotator/classifier_dl/camembert_for_question_answering.py +168 -0
  31. sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py +205 -0
  32. sparknlp/annotator/classifier_dl/camembert_for_token_classification.py +173 -0
  33. sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
  34. sparknlp/annotator/classifier_dl/classifier_dl.py +320 -0
  35. sparknlp/annotator/classifier_dl/deberta_for_question_answering.py +168 -0
  36. sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py +198 -0
  37. sparknlp/annotator/classifier_dl/deberta_for_token_classification.py +175 -0
  38. sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +193 -0
  39. sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py +168 -0
  40. sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py +201 -0
  41. sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py +175 -0
  42. sparknlp/annotator/classifier_dl/distil_bert_for_zero_shot_classification.py +211 -0
  43. sparknlp/annotator/classifier_dl/distilbert_for_multiple_choice.py +161 -0
  44. sparknlp/annotator/classifier_dl/longformer_for_question_answering.py +168 -0
  45. sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py +201 -0
  46. sparknlp/annotator/classifier_dl/longformer_for_token_classification.py +176 -0
  47. sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py +148 -0
  48. sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py +188 -0
  49. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  50. sparknlp/annotator/classifier_dl/multi_classifier_dl.py +395 -0
  51. sparknlp/annotator/classifier_dl/roberta_for_multiple_choice.py +161 -0
  52. sparknlp/annotator/classifier_dl/roberta_for_question_answering.py +168 -0
  53. sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py +201 -0
  54. sparknlp/annotator/classifier_dl/roberta_for_token_classification.py +189 -0
  55. sparknlp/annotator/classifier_dl/roberta_for_zero_shot_classification.py +225 -0
  56. sparknlp/annotator/classifier_dl/sentiment_dl.py +378 -0
  57. sparknlp/annotator/classifier_dl/tapas_for_question_answering.py +170 -0
  58. sparknlp/annotator/classifier_dl/xlm_roberta_for_multiple_choice.py +149 -0
  59. sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py +168 -0
  60. sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py +201 -0
  61. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +173 -0
  62. sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py +225 -0
  63. sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py +201 -0
  64. sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py +176 -0
  65. sparknlp/annotator/cleaners/__init__.py +15 -0
  66. sparknlp/annotator/cleaners/cleaner.py +202 -0
  67. sparknlp/annotator/cleaners/extractor.py +191 -0
  68. sparknlp/annotator/coref/__init__.py +1 -0
  69. sparknlp/annotator/coref/spanbert_coref.py +221 -0
  70. sparknlp/annotator/cv/__init__.py +29 -0
  71. sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
  72. sparknlp/annotator/cv/clip_for_zero_shot_classification.py +193 -0
  73. sparknlp/annotator/cv/convnext_for_image_classification.py +269 -0
  74. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  75. sparknlp/annotator/cv/gemma3_for_multimodal.py +346 -0
  76. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  77. sparknlp/annotator/cv/janus_for_multimodal.py +351 -0
  78. sparknlp/annotator/cv/llava_for_multimodal.py +328 -0
  79. sparknlp/annotator/cv/mllama_for_multimodal.py +340 -0
  80. sparknlp/annotator/cv/paligemma_for_multimodal.py +308 -0
  81. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +328 -0
  82. sparknlp/annotator/cv/qwen2vl_transformer.py +332 -0
  83. sparknlp/annotator/cv/smolvlm_transformer.py +426 -0
  84. sparknlp/annotator/cv/swin_for_image_classification.py +242 -0
  85. sparknlp/annotator/cv/vision_encoder_decoder_for_image_captioning.py +240 -0
  86. sparknlp/annotator/cv/vit_for_image_classification.py +217 -0
  87. sparknlp/annotator/dataframe_optimizer.py +216 -0
  88. sparknlp/annotator/date2_chunk.py +88 -0
  89. sparknlp/annotator/dependency/__init__.py +17 -0
  90. sparknlp/annotator/dependency/dependency_parser.py +294 -0
  91. sparknlp/annotator/dependency/typed_dependency_parser.py +318 -0
  92. sparknlp/annotator/document_character_text_splitter.py +228 -0
  93. sparknlp/annotator/document_normalizer.py +235 -0
  94. sparknlp/annotator/document_token_splitter.py +175 -0
  95. sparknlp/annotator/document_token_splitter_test.py +85 -0
  96. sparknlp/annotator/embeddings/__init__.py +45 -0
  97. sparknlp/annotator/embeddings/albert_embeddings.py +230 -0
  98. sparknlp/annotator/embeddings/auto_gguf_embeddings.py +539 -0
  99. sparknlp/annotator/embeddings/bert_embeddings.py +208 -0
  100. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +224 -0
  101. sparknlp/annotator/embeddings/bge_embeddings.py +199 -0
  102. sparknlp/annotator/embeddings/camembert_embeddings.py +210 -0
  103. sparknlp/annotator/embeddings/chunk_embeddings.py +149 -0
  104. sparknlp/annotator/embeddings/deberta_embeddings.py +208 -0
  105. sparknlp/annotator/embeddings/distil_bert_embeddings.py +221 -0
  106. sparknlp/annotator/embeddings/doc2vec.py +352 -0
  107. sparknlp/annotator/embeddings/e5_embeddings.py +195 -0
  108. sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
  109. sparknlp/annotator/embeddings/elmo_embeddings.py +251 -0
  110. sparknlp/annotator/embeddings/instructor_embeddings.py +204 -0
  111. sparknlp/annotator/embeddings/longformer_embeddings.py +211 -0
  112. sparknlp/annotator/embeddings/minilm_embeddings.py +189 -0
  113. sparknlp/annotator/embeddings/mpnet_embeddings.py +192 -0
  114. sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
  115. sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
  116. sparknlp/annotator/embeddings/roberta_embeddings.py +225 -0
  117. sparknlp/annotator/embeddings/roberta_sentence_embeddings.py +191 -0
  118. sparknlp/annotator/embeddings/sentence_embeddings.py +134 -0
  119. sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
  120. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  121. sparknlp/annotator/embeddings/universal_sentence_encoder.py +211 -0
  122. sparknlp/annotator/embeddings/word2vec.py +353 -0
  123. sparknlp/annotator/embeddings/word_embeddings.py +385 -0
  124. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +225 -0
  125. sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py +194 -0
  126. sparknlp/annotator/embeddings/xlnet_embeddings.py +227 -0
  127. sparknlp/annotator/er/__init__.py +16 -0
  128. sparknlp/annotator/er/entity_ruler.py +267 -0
  129. sparknlp/annotator/graph_extraction.py +368 -0
  130. sparknlp/annotator/keyword_extraction/__init__.py +16 -0
  131. sparknlp/annotator/keyword_extraction/yake_keyword_extraction.py +270 -0
  132. sparknlp/annotator/ld_dl/__init__.py +16 -0
  133. sparknlp/annotator/ld_dl/language_detector_dl.py +199 -0
  134. sparknlp/annotator/lemmatizer.py +250 -0
  135. sparknlp/annotator/matcher/__init__.py +20 -0
  136. sparknlp/annotator/matcher/big_text_matcher.py +272 -0
  137. sparknlp/annotator/matcher/date_matcher.py +303 -0
  138. sparknlp/annotator/matcher/multi_date_matcher.py +109 -0
  139. sparknlp/annotator/matcher/regex_matcher.py +221 -0
  140. sparknlp/annotator/matcher/text_matcher.py +290 -0
  141. sparknlp/annotator/n_gram_generator.py +141 -0
  142. sparknlp/annotator/ner/__init__.py +21 -0
  143. sparknlp/annotator/ner/ner_approach.py +94 -0
  144. sparknlp/annotator/ner/ner_converter.py +148 -0
  145. sparknlp/annotator/ner/ner_crf.py +397 -0
  146. sparknlp/annotator/ner/ner_dl.py +591 -0
  147. sparknlp/annotator/ner/ner_dl_graph_checker.py +293 -0
  148. sparknlp/annotator/ner/ner_overwriter.py +166 -0
  149. sparknlp/annotator/ner/zero_shot_ner_model.py +173 -0
  150. sparknlp/annotator/normalizer.py +230 -0
  151. sparknlp/annotator/openai/__init__.py +16 -0
  152. sparknlp/annotator/openai/openai_completion.py +349 -0
  153. sparknlp/annotator/openai/openai_embeddings.py +106 -0
  154. sparknlp/annotator/param/__init__.py +17 -0
  155. sparknlp/annotator/param/classifier_encoder.py +98 -0
  156. sparknlp/annotator/param/evaluation_dl_params.py +130 -0
  157. sparknlp/annotator/pos/__init__.py +16 -0
  158. sparknlp/annotator/pos/perceptron.py +263 -0
  159. sparknlp/annotator/sentence/__init__.py +17 -0
  160. sparknlp/annotator/sentence/sentence_detector.py +290 -0
  161. sparknlp/annotator/sentence/sentence_detector_dl.py +467 -0
  162. sparknlp/annotator/sentiment/__init__.py +17 -0
  163. sparknlp/annotator/sentiment/sentiment_detector.py +208 -0
  164. sparknlp/annotator/sentiment/vivekn_sentiment.py +242 -0
  165. sparknlp/annotator/seq2seq/__init__.py +35 -0
  166. sparknlp/annotator/seq2seq/auto_gguf_model.py +304 -0
  167. sparknlp/annotator/seq2seq/auto_gguf_reranker.py +334 -0
  168. sparknlp/annotator/seq2seq/auto_gguf_vision_model.py +336 -0
  169. sparknlp/annotator/seq2seq/bart_transformer.py +420 -0
  170. sparknlp/annotator/seq2seq/cohere_transformer.py +357 -0
  171. sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
  172. sparknlp/annotator/seq2seq/gpt2_transformer.py +363 -0
  173. sparknlp/annotator/seq2seq/llama2_transformer.py +343 -0
  174. sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
  175. sparknlp/annotator/seq2seq/m2m100_transformer.py +392 -0
  176. sparknlp/annotator/seq2seq/marian_transformer.py +374 -0
  177. sparknlp/annotator/seq2seq/mistral_transformer.py +348 -0
  178. sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
  179. sparknlp/annotator/seq2seq/olmo_transformer.py +326 -0
  180. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  181. sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
  182. sparknlp/annotator/seq2seq/phi4_transformer.py +387 -0
  183. sparknlp/annotator/seq2seq/qwen_transformer.py +340 -0
  184. sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
  185. sparknlp/annotator/seq2seq/t5_transformer.py +425 -0
  186. sparknlp/annotator/similarity/__init__.py +0 -0
  187. sparknlp/annotator/similarity/document_similarity_ranker.py +379 -0
  188. sparknlp/annotator/spell_check/__init__.py +18 -0
  189. sparknlp/annotator/spell_check/context_spell_checker.py +911 -0
  190. sparknlp/annotator/spell_check/norvig_sweeting.py +358 -0
  191. sparknlp/annotator/spell_check/symmetric_delete.py +299 -0
  192. sparknlp/annotator/stemmer.py +79 -0
  193. sparknlp/annotator/stop_words_cleaner.py +190 -0
  194. sparknlp/annotator/tf_ner_dl_graph_builder.py +179 -0
  195. sparknlp/annotator/token/__init__.py +19 -0
  196. sparknlp/annotator/token/chunk_tokenizer.py +118 -0
  197. sparknlp/annotator/token/recursive_tokenizer.py +205 -0
  198. sparknlp/annotator/token/regex_tokenizer.py +208 -0
  199. sparknlp/annotator/token/tokenizer.py +561 -0
  200. sparknlp/annotator/token2_chunk.py +76 -0
  201. sparknlp/annotator/ws/__init__.py +16 -0
  202. sparknlp/annotator/ws/word_segmenter.py +429 -0
  203. sparknlp/base/__init__.py +30 -0
  204. sparknlp/base/audio_assembler.py +95 -0
  205. sparknlp/base/doc2_chunk.py +169 -0
  206. sparknlp/base/document_assembler.py +164 -0
  207. sparknlp/base/embeddings_finisher.py +201 -0
  208. sparknlp/base/finisher.py +217 -0
  209. sparknlp/base/gguf_ranking_finisher.py +234 -0
  210. sparknlp/base/graph_finisher.py +125 -0
  211. sparknlp/base/has_recursive_fit.py +24 -0
  212. sparknlp/base/has_recursive_transform.py +22 -0
  213. sparknlp/base/image_assembler.py +172 -0
  214. sparknlp/base/light_pipeline.py +429 -0
  215. sparknlp/base/multi_document_assembler.py +164 -0
  216. sparknlp/base/prompt_assembler.py +207 -0
  217. sparknlp/base/recursive_pipeline.py +107 -0
  218. sparknlp/base/table_assembler.py +145 -0
  219. sparknlp/base/token_assembler.py +124 -0
  220. sparknlp/common/__init__.py +26 -0
  221. sparknlp/common/annotator_approach.py +41 -0
  222. sparknlp/common/annotator_model.py +47 -0
  223. sparknlp/common/annotator_properties.py +114 -0
  224. sparknlp/common/annotator_type.py +38 -0
  225. sparknlp/common/completion_post_processing.py +37 -0
  226. sparknlp/common/coverage_result.py +22 -0
  227. sparknlp/common/match_strategy.py +33 -0
  228. sparknlp/common/properties.py +1298 -0
  229. sparknlp/common/read_as.py +33 -0
  230. sparknlp/common/recursive_annotator_approach.py +35 -0
  231. sparknlp/common/storage.py +149 -0
  232. sparknlp/common/utils.py +39 -0
  233. sparknlp/functions.py +315 -5
  234. sparknlp/internal/__init__.py +1199 -0
  235. sparknlp/internal/annotator_java_ml.py +32 -0
  236. sparknlp/internal/annotator_transformer.py +37 -0
  237. sparknlp/internal/extended_java_wrapper.py +63 -0
  238. sparknlp/internal/params_getters_setters.py +71 -0
  239. sparknlp/internal/recursive.py +70 -0
  240. sparknlp/logging/__init__.py +15 -0
  241. sparknlp/logging/comet.py +467 -0
  242. sparknlp/partition/__init__.py +16 -0
  243. sparknlp/partition/partition.py +244 -0
  244. sparknlp/partition/partition_properties.py +902 -0
  245. sparknlp/partition/partition_transformer.py +200 -0
  246. sparknlp/pretrained/__init__.py +17 -0
  247. sparknlp/pretrained/pretrained_pipeline.py +158 -0
  248. sparknlp/pretrained/resource_downloader.py +216 -0
  249. sparknlp/pretrained/utils.py +35 -0
  250. sparknlp/reader/__init__.py +15 -0
  251. sparknlp/reader/enums.py +19 -0
  252. sparknlp/reader/pdf_to_text.py +190 -0
  253. sparknlp/reader/reader2doc.py +124 -0
  254. sparknlp/reader/reader2image.py +136 -0
  255. sparknlp/reader/reader2table.py +44 -0
  256. sparknlp/reader/reader_assembler.py +159 -0
  257. sparknlp/reader/sparknlp_reader.py +461 -0
  258. sparknlp/training/__init__.py +20 -0
  259. sparknlp/training/_tf_graph_builders/__init__.py +0 -0
  260. sparknlp/training/_tf_graph_builders/graph_builders.py +299 -0
  261. sparknlp/training/_tf_graph_builders/ner_dl/__init__.py +0 -0
  262. sparknlp/training/_tf_graph_builders/ner_dl/create_graph.py +41 -0
  263. sparknlp/training/_tf_graph_builders/ner_dl/dataset_encoder.py +78 -0
  264. sparknlp/training/_tf_graph_builders/ner_dl/ner_model.py +521 -0
  265. sparknlp/training/_tf_graph_builders/ner_dl/ner_model_saver.py +62 -0
  266. sparknlp/training/_tf_graph_builders/ner_dl/sentence_grouper.py +28 -0
  267. sparknlp/training/_tf_graph_builders/tf2contrib/__init__.py +36 -0
  268. sparknlp/training/_tf_graph_builders/tf2contrib/core_rnn_cell.py +385 -0
  269. sparknlp/training/_tf_graph_builders/tf2contrib/fused_rnn_cell.py +183 -0
  270. sparknlp/training/_tf_graph_builders/tf2contrib/gru_ops.py +235 -0
  271. sparknlp/training/_tf_graph_builders/tf2contrib/lstm_ops.py +665 -0
  272. sparknlp/training/_tf_graph_builders/tf2contrib/rnn.py +245 -0
  273. sparknlp/training/_tf_graph_builders/tf2contrib/rnn_cell.py +4006 -0
  274. sparknlp/training/_tf_graph_builders_1x/__init__.py +0 -0
  275. sparknlp/training/_tf_graph_builders_1x/graph_builders.py +277 -0
  276. sparknlp/training/_tf_graph_builders_1x/ner_dl/__init__.py +0 -0
  277. sparknlp/training/_tf_graph_builders_1x/ner_dl/create_graph.py +34 -0
  278. sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py +78 -0
  279. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py +532 -0
  280. sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py +62 -0
  281. sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py +28 -0
  282. sparknlp/training/conll.py +150 -0
  283. sparknlp/training/conllu.py +103 -0
  284. sparknlp/training/pos.py +103 -0
  285. sparknlp/training/pub_tator.py +76 -0
  286. sparknlp/training/spacy_to_annotation.py +57 -0
  287. sparknlp/training/tfgraphs.py +5 -0
  288. sparknlp/upload_to_hub.py +149 -0
  289. sparknlp/util.py +51 -5
  290. com/__init__.pyc +0 -0
  291. com/__pycache__/__init__.cpython-36.pyc +0 -0
  292. com/johnsnowlabs/__init__.pyc +0 -0
  293. com/johnsnowlabs/__pycache__/__init__.cpython-36.pyc +0 -0
  294. com/johnsnowlabs/nlp/__init__.pyc +0 -0
  295. com/johnsnowlabs/nlp/__pycache__/__init__.cpython-36.pyc +0 -0
  296. spark_nlp-2.6.3rc1.dist-info/METADATA +0 -36
  297. spark_nlp-2.6.3rc1.dist-info/RECORD +0 -48
  298. sparknlp/__init__.pyc +0 -0
  299. sparknlp/__pycache__/__init__.cpython-36.pyc +0 -0
  300. sparknlp/__pycache__/annotation.cpython-36.pyc +0 -0
  301. sparknlp/__pycache__/annotator.cpython-36.pyc +0 -0
  302. sparknlp/__pycache__/base.cpython-36.pyc +0 -0
  303. sparknlp/__pycache__/common.cpython-36.pyc +0 -0
  304. sparknlp/__pycache__/embeddings.cpython-36.pyc +0 -0
  305. sparknlp/__pycache__/functions.cpython-36.pyc +0 -0
  306. sparknlp/__pycache__/internal.cpython-36.pyc +0 -0
  307. sparknlp/__pycache__/pretrained.cpython-36.pyc +0 -0
  308. sparknlp/__pycache__/storage.cpython-36.pyc +0 -0
  309. sparknlp/__pycache__/training.cpython-36.pyc +0 -0
  310. sparknlp/__pycache__/util.cpython-36.pyc +0 -0
  311. sparknlp/annotation.pyc +0 -0
  312. sparknlp/annotator.py +0 -3006
  313. sparknlp/annotator.pyc +0 -0
  314. sparknlp/base.py +0 -347
  315. sparknlp/base.pyc +0 -0
  316. sparknlp/common.py +0 -193
  317. sparknlp/common.pyc +0 -0
  318. sparknlp/embeddings.py +0 -40
  319. sparknlp/embeddings.pyc +0 -0
  320. sparknlp/internal.py +0 -288
  321. sparknlp/internal.pyc +0 -0
  322. sparknlp/pretrained.py +0 -123
  323. sparknlp/pretrained.pyc +0 -0
  324. sparknlp/storage.py +0 -32
  325. sparknlp/storage.pyc +0 -0
  326. sparknlp/training.py +0 -62
  327. sparknlp/training.pyc +0 -0
  328. sparknlp/util.pyc +0 -0
  329. {spark_nlp-2.6.3rc1.dist-info → spark_nlp-6.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,79 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for the Stemmer."""
15
+ from sparknlp.common import *
16
+
17
+
18
+ class Stemmer(AnnotatorModel):
19
+ """Returns hard-stems out of words with the objective of retrieving the
20
+ meaningful part of the word.
21
+
22
+ For extended examples of usage, see the `Examples
23
+ <https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/stemmer/Word_Stemming_with_Stemmer.ipynb>`__.
24
+
25
+ ====================== ======================
26
+ Input Annotation types Output Annotation type
27
+ ====================== ======================
28
+ ``TOKEN`` ``TOKEN``
29
+ ====================== ======================
30
+
31
+ Parameters
32
+ ----------
33
+ None
34
+
35
+ Examples
36
+ --------
37
+ >>> import sparknlp
38
+ >>> from sparknlp.base import *
39
+ >>> from sparknlp.annotator import *
40
+ >>> from pyspark.ml import Pipeline
41
+ >>> documentAssembler = DocumentAssembler() \\
42
+ ... .setInputCol("text") \\
43
+ ... .setOutputCol("document")
44
+ >>> tokenizer = Tokenizer() \\
45
+ ... .setInputCols(["document"]) \\
46
+ ... .setOutputCol("token")
47
+ >>> stemmer = Stemmer() \\
48
+ ... .setInputCols(["token"]) \\
49
+ ... .setOutputCol("stem")
50
+ >>> pipeline = Pipeline().setStages([
51
+ ... documentAssembler,
52
+ ... tokenizer,
53
+ ... stemmer
54
+ ... ])
55
+ >>> data = spark.createDataFrame([["Peter Pipers employees are picking pecks of pickled peppers."]]) \\
56
+ ... .toDF("text")
57
+ >>> result = pipeline.fit(data).transform(data)
58
+ >>> result.selectExpr("stem.result").show(truncate = False)
59
+ +-------------------------------------------------------------+
60
+ |result |
61
+ +-------------------------------------------------------------+
62
+ |[peter, piper, employe, ar, pick, peck, of, pickl, pepper, .]|
63
+ +-------------------------------------------------------------+
64
+ """
65
+
66
+ inputAnnotatorTypes = [AnnotatorType.TOKEN]
67
+
68
+ outputAnnotatorType = AnnotatorType.TOKEN
69
+
70
+ language = Param(Params._dummy(), "language", "stemmer algorithm", typeConverter=TypeConverters.toString)
71
+
72
+ name = "Stemmer"
73
+
74
+ @keyword_only
75
+ def __init__(self):
76
+ super(Stemmer, self).__init__(classname="com.johnsnowlabs.nlp.annotators.Stemmer")
77
+ self._setDefault(
78
+ language="english"
79
+ )
@@ -0,0 +1,190 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for the StopWordsCleaner."""
15
+ from sparknlp.common import *
16
+
17
+
18
+ class StopWordsCleaner(AnnotatorModel):
19
+ """This annotator takes a sequence of strings (e.g. the output of a
20
+ Tokenizer, Normalizer, Lemmatizer, and Stemmer) and drops all the stop words
21
+ from the input sequences.
22
+
23
+ By default, it uses stop words from MLlibs `StopWordsRemover
24
+ <https://spark.apache.org/docs/latest/ml-features#stopwordsremover>`__. Stop
25
+ words can also be defined by explicitly setting them with
26
+ :meth:`.setStopWords` or loaded from pretrained models using ``pretrained``
27
+ of its companion object.
28
+
29
+
30
+ >>> stopWords = StopWordsCleaner.pretrained() \\
31
+ ... .setInputCols(["token"]) \\
32
+ ... .setOutputCol("cleanTokens")
33
+
34
+ This will load the default pretrained model ``"stopwords_en"``.
35
+
36
+ For available pretrained models please see the `Models Hub
37
+ <https://sparknlp.orgtask=Stop+Words+Removal>`__.
38
+
39
+ For extended examples of usage, see the `Examples
40
+ <https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/annotation/text/english/stop-words/StopWordsCleaner.ipynb>`__.
41
+
42
+ ====================== ======================
43
+ Input Annotation types Output Annotation type
44
+ ====================== ======================
45
+ ``TOKEN`` ``TOKEN``
46
+ ====================== ======================
47
+
48
+ Parameters
49
+ ----------
50
+ stopWords
51
+ The words to be filtered out, by default english stopwords from Spark ML
52
+ caseSensitive
53
+ Whether to consider case, by default False
54
+ locale
55
+ Locale of the input. ignored when case sensitive, by default locale of
56
+ the JVM
57
+
58
+ Examples
59
+ --------
60
+ >>> import sparknlp
61
+ >>> from sparknlp.base import *
62
+ >>> from sparknlp.annotator import *
63
+ >>> from pyspark.ml import Pipeline
64
+ >>> documentAssembler = DocumentAssembler() \\
65
+ ... .setInputCol("text") \\
66
+ ... .setOutputCol("document")
67
+ >>> sentenceDetector = SentenceDetector() \\
68
+ ... .setInputCols(["document"]) \\
69
+ ... .setOutputCol("sentence")
70
+ >>> tokenizer = Tokenizer() \\
71
+ ... .setInputCols(["sentence"]) \\
72
+ ... .setOutputCol("token")
73
+ >>> stopWords = StopWordsCleaner() \\
74
+ ... .setInputCols(["token"]) \\
75
+ ... .setOutputCol("cleanTokens") \\
76
+ ... .setCaseSensitive(False)
77
+ >>> pipeline = Pipeline().setStages([
78
+ ... documentAssembler,
79
+ ... sentenceDetector,
80
+ ... tokenizer,
81
+ ... stopWords
82
+ ... ])
83
+ >>> data = spark.createDataFrame([
84
+ ... ["This is my first sentence. This is my second."],
85
+ ... ["This is my third sentence. This is my forth."]
86
+ ... ]).toDF("text")
87
+ >>> result = pipeline.fit(data).transform(data)
88
+ >>> result.selectExpr("cleanTokens.result").show(truncate=False)
89
+ +-------------------------------+
90
+ |result |
91
+ +-------------------------------+
92
+ |[first, sentence, ., second, .]|
93
+ |[third, sentence, ., forth, .] |
94
+ +-------------------------------+
95
+ """
96
+
97
+ name = "StopWordsCleaner"
98
+
99
+ inputAnnotatorTypes = [AnnotatorType.TOKEN]
100
+
101
+ outputAnnotatorType = AnnotatorType.TOKEN
102
+
103
+ @keyword_only
104
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.StopWordsCleaner", java_model=None):
105
+ super(StopWordsCleaner, self).__init__(
106
+ classname=classname,
107
+ java_model=java_model
108
+ )
109
+ self._setDefault(
110
+ stopWords=StopWordsCleaner.loadDefaultStopWords("english"),
111
+ caseSensitive=False,
112
+ locale=self._java_obj.getLocale()
113
+ )
114
+
115
+ stopWords = Param(Params._dummy(), "stopWords", "The words to be filtered out",
116
+ typeConverter=TypeConverters.toListString)
117
+ caseSensitive = Param(Params._dummy(), "caseSensitive", "whether to do a case sensitive " +
118
+ "comparison over the stop words", typeConverter=TypeConverters.toBoolean)
119
+ locale = Param(Params._dummy(), "locale", "locale of the input. ignored when case sensitive " +
120
+ "is true", typeConverter=TypeConverters.toString)
121
+
122
+ def setStopWords(self, value):
123
+ """Sets the words to be filtered out, by default english stopwords from
124
+ Spark ML.
125
+
126
+ Parameters
127
+ ----------
128
+ value : List[str]
129
+ The words to be filtered out
130
+ """
131
+ return self._set(stopWords=value)
132
+
133
+ def setCaseSensitive(self, value):
134
+ """Sets whether to do a case sensitive comparison, by default False.
135
+
136
+ Parameters
137
+ ----------
138
+ value : bool
139
+ Whether to do a case sensitive comparison
140
+ """
141
+ return self._set(caseSensitive=value)
142
+
143
+ def setLocale(self, value):
144
+ """Sets locale of the input. Ignored when case sensitive, by default
145
+ locale of the JVM.
146
+
147
+ Parameters
148
+ ----------
149
+ value : str
150
+ Locale of the input
151
+ """
152
+ return self._set(locale=value)
153
+
154
+ def loadDefaultStopWords(language="english"):
155
+ """Loads the default stop words for the given language.
156
+
157
+ Supported languages: danish, dutch, english, finnish, french, german,
158
+ hungarian, italian, norwegian, portuguese, russian, spanish, swedish,
159
+ turkish
160
+
161
+ Parameters
162
+ ----------
163
+ language : str, optional
164
+ Language stopwords to load, by default "english"
165
+ """
166
+ from pyspark.ml.wrapper import _jvm
167
+ stopWordsObj = _jvm().org.apache.spark.ml.feature.StopWordsRemover
168
+ return list(stopWordsObj.loadDefaultStopWords(language))
169
+
170
+ @staticmethod
171
+ def pretrained(name="stopwords_en", lang="en", remote_loc=None):
172
+ """Downloads and loads a pretrained model.
173
+
174
+ Parameters
175
+ ----------
176
+ name : str, optional
177
+ Name of the pretrained model, by default "stopwords_en"
178
+ lang : str, optional
179
+ Language of the pretrained model, by default "en"
180
+ remote_loc : str, optional
181
+ Optional remote address of the resource, by default None. Will use
182
+ Spark NLPs repositories otherwise.
183
+
184
+ Returns
185
+ -------
186
+ StopWordsCleaner
187
+ The restored model
188
+ """
189
+ from sparknlp.pretrained import ResourceDownloader
190
+ return ResourceDownloader.downloadModel(StopWordsCleaner, name, lang, remote_loc)
@@ -0,0 +1,179 @@
1
+ from pyspark.ml import Model, Estimator
2
+ from pyspark.ml.util import DefaultParamsWritable, DefaultParamsReadable
3
+ from sparknlp.common import *
4
+
5
+
6
+ class TFNerDLGraphBuilderModel(Model, DefaultParamsWritable, DefaultParamsReadable):
7
+ def _transform(self, dataset):
8
+ return dataset
9
+
10
+
11
+ class TFNerDLGraphBuilder(Estimator, DefaultParamsWritable, DefaultParamsReadable):
12
+
13
+ inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN, AnnotatorType.WORD_EMBEDDINGS]
14
+
15
+ labelColumn = Param(Params._dummy(),
16
+ "labelColumn",
17
+ "Labels",
18
+ typeConverter=TypeConverters.toString)
19
+
20
+ inputCols = Param(Params._dummy(),
21
+ "inputCols",
22
+ "Input columns",
23
+ typeConverter=TypeConverters.toListString)
24
+
25
+ graphFolder = Param(Params._dummy(), "graphFolder", "Folder path that contain external graph files",
26
+ TypeConverters.toString)
27
+
28
+ graphFile = Param(Params._dummy(), "graphFile", "Graph file name. If empty, default name is generated.",
29
+ TypeConverters.toString)
30
+
31
+ hiddenUnitsNumber = Param(Params._dummy(),
32
+ "hiddenUnitsNumber",
33
+ "Number of hidden units",
34
+ typeConverter=TypeConverters.toInt)
35
+
36
+ def setHiddenUnitsNumber(self, value):
37
+ """Sets the number of hidden units for AssertionDLApproach and MedicalNerApproach
38
+
39
+ Parameters
40
+ ----------
41
+ value : int
42
+ Number of hidden units for AssertionDLApproach and MedicalNerApproach
43
+ """
44
+ return self._set(hiddenUnitsNumber=value)
45
+
46
+ def getHiddenUnitsNumber(self):
47
+ """Gets the number of hidden units for AssertionDLApproach and MedicalNerApproach."""
48
+ return self.getOrDefault(self.hiddenUnitsNumber)
49
+
50
+ def setLabelColumn(self, value):
51
+ """Sets the name of the column for data labels.
52
+
53
+ Parameters
54
+ ----------
55
+ value : str
56
+ Column for data labels
57
+ """
58
+ return self._set(labelColumn=value)
59
+
60
+ def getLabelColumn(self):
61
+ """Gets the name of the label column."""
62
+ return self.getOrDefault(self.labelColumn)
63
+
64
+ def setInputCols(self, *value):
65
+ """Sets column names of input annotations.
66
+
67
+ Parameters
68
+ ----------
69
+ *value : List[str]
70
+ Input columns for the annotator
71
+ """
72
+ if type(value[0]) == str or type(value[0]) == list:
73
+ self.inputColsValidation(value)
74
+ if len(value) == 1 and type(value[0]) == list:
75
+ return self._set(inputCols=value[0])
76
+ else:
77
+ return self._set(inputCols=list(value))
78
+ else:
79
+ raise TypeError("InputCols datatype not supported. It must be either str or list")
80
+
81
+ def inputColsValidation(self, value):
82
+ actual_columns = len(value)
83
+ if type(value[0]) == list:
84
+ actual_columns = len(value[0])
85
+
86
+ expected_columns = len(self.inputAnnotatorTypes)
87
+
88
+ if actual_columns != expected_columns:
89
+ raise TypeError(
90
+ f"setInputCols in {self.uid} expecting {expected_columns} columns. "
91
+ f"Provided column amount: {actual_columns}. "
92
+ f"Which should be columns from the following annotators: {self.inputAnnotatorTypes}")
93
+
94
+ def getInputCols(self):
95
+ """Gets current column names of input annotations."""
96
+ return self.getOrDefault(self.inputCols)
97
+
98
+ def setGraphFolder(self, value):
99
+ """Sets folder path that contain external graph files.
100
+
101
+ Parameters
102
+ ----------
103
+ value : srt
104
+ Folder path that contain external graph files.
105
+ """
106
+ return self._set(graphFolder=value)
107
+
108
+ def getGraphFolder(self):
109
+ """Gets the graph folder."""
110
+ return self.getOrDefault(self.graphFolder)
111
+
112
+ def setGraphFile(self, value):
113
+ """Sets the graph file name.
114
+
115
+ Parameters
116
+ ----------
117
+ value : srt
118
+ Greaph file name. If set to "auto", then the graph builder will use the model specific default graph
119
+ file name.
120
+ """
121
+ return self._set(graphFile=value)
122
+
123
+ def getGraphFile(self):
124
+ """Gets the graph file name."""
125
+ return self.getOrDefault(self.graphFile)
126
+
127
+ def _fit(self, dataset):
128
+ from ..training.tfgraphs import tf_graph, tf_graph_1x
129
+
130
+ build_params = {}
131
+
132
+ from sparknlp.internal import _NerDLGraphBuilder
133
+
134
+ params_java = _NerDLGraphBuilder(
135
+ dataset,
136
+ self.getInputCols(),
137
+ self.getLabelColumn())._java_obj
138
+ params = list(map(int, params_java.toString().replace("(", "").replace(")", "").split(",")))
139
+ build_params["ntags"] = params[0]
140
+ build_params["embeddings_dim"] = params[1]
141
+ build_params["nchars"] = params[2]
142
+ if self.getHiddenUnitsNumber() is not None:
143
+ build_params["lstm_size"] = self.getHiddenUnitsNumber()
144
+
145
+ graph_file = "auto"
146
+ if self.getGraphFile() is not None:
147
+ graph_file = self.getGraphFile()
148
+
149
+ graph_folder = ""
150
+ if self.getGraphFolder() is not None:
151
+ graph_folder = self.getGraphFolder()
152
+
153
+ print("Ner DL Graph Builder configuration:")
154
+ print("Graph folder: {}".format(graph_folder))
155
+ print("Graph file name: {}".format(graph_file))
156
+ print("Build params: ", end="")
157
+ print(build_params)
158
+
159
+ try:
160
+ tf_graph.build("ner_dl", build_params=build_params, model_location=self.getGraphFolder(),
161
+ model_filename=graph_file)
162
+ except Exception:
163
+ print("Can't build the tensorflow graph with TF 2 graph factory, attempting TF 1.15 factory")
164
+ try:
165
+ tf_graph_1x.build("ner_dl", build_params=build_params, model_location=self.getGraphFolder())
166
+ except Exception:
167
+ raise Exception("The tensorflow graphs can't be build.")
168
+
169
+ return TFNerDLGraphBuilderModel()
170
+
171
+ def __init__(self):
172
+ super(TFNerDLGraphBuilder, self).__init__()
173
+ self._setDefault(
174
+ labelColumn=None,
175
+ inputCols=None,
176
+ graphFolder=None,
177
+ graphFile=None,
178
+ hiddenUnitsNumber=None
179
+ )
@@ -0,0 +1,19 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Module of annotators for text tokenization."""
16
+ from sparknlp.annotator.token.chunk_tokenizer import *
17
+ from sparknlp.annotator.token.recursive_tokenizer import *
18
+ from sparknlp.annotator.token.regex_tokenizer import *
19
+ from sparknlp.annotator.token.tokenizer import *
@@ -0,0 +1,118 @@
1
+ # Copyright 2017-2022 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains classes for the ChunkTokenizer."""
15
+
16
+ from sparknlp.common import *
17
+ from sparknlp.annotator.token.tokenizer import Tokenizer, TokenizerModel
18
+
19
+
20
+ class ChunkTokenizer(Tokenizer):
21
+ """Tokenizes and flattens extracted NER chunks.
22
+
23
+ The ChunkTokenizer will split the extracted NER ``CHUNK`` type Annotations
24
+ and will create ``TOKEN`` type Annotations.
25
+ The result is then flattened, resulting in a single array.
26
+
27
+ ====================== ======================
28
+ Input Annotation types Output Annotation type
29
+ ====================== ======================
30
+ ``CHUNK`` ``TOKEN``
31
+ ====================== ======================
32
+
33
+ Parameters
34
+ ----------
35
+ None
36
+
37
+ Examples
38
+ --------
39
+ >>> import sparknlp
40
+ >>> from sparknlp.base import *
41
+ >>> from sparknlp.annotator import *
42
+ >>> sparknlp.common import *
43
+ >>> from pyspark.ml import Pipeline
44
+ >>> documentAssembler = DocumentAssembler() \\
45
+ ... .setInputCol("text") \\
46
+ ... .setOutputCol("document")
47
+ >>> sentenceDetector = SentenceDetector() \\
48
+ ... .setInputCols(["document"]) \\
49
+ ... .setOutputCol("sentence")
50
+ >>> tokenizer = Tokenizer() \\
51
+ ... .setInputCols(["sentence"]) \\
52
+ ... .setOutputCol("token")
53
+ >>> entityExtractor = TextMatcher() \\
54
+ ... .setInputCols(["sentence", "token"]) \\
55
+ ... .setEntities("src/test/resources/entity-extractor/test-chunks.txt", ReadAs.TEXT) \\
56
+ ... .setOutputCol("entity")
57
+ >>> chunkTokenizer = ChunkTokenizer() \\
58
+ ... .setInputCols(["entity"]) \\
59
+ ... .setOutputCol("chunk_token")
60
+ >>> pipeline = Pipeline().setStages([
61
+ ... documentAssembler,
62
+ ... sentenceDetector,
63
+ ... tokenizer,
64
+ ... entityExtractor,
65
+ ... chunkTokenizer
66
+ ... ])
67
+ >>> data = spark.createDataFrame([
68
+ ... ["Hello world, my name is Michael, I am an artist and I work at Benezar"],
69
+ ... ["Robert, an engineer from Farendell, graduated last year. The other one, Lucas, graduated last week."]
70
+ >>> ]).toDF("text")
71
+ >>> result = pipeline.fit(data).transform(data)
72
+ >>> result.selectExpr("entity.result as entity" , "chunk_token.result as chunk_token").show(truncate=False)
73
+ +-----------------------------------------------+---------------------------------------------------+
74
+ |entity |chunk_token |
75
+ +-----------------------------------------------+---------------------------------------------------+
76
+ |[world, Michael, work at Benezar] |[world, Michael, work, at, Benezar] |
77
+ |[engineer from Farendell, last year, last week]|[engineer, from, Farendell, last, year, last, week]|
78
+ +-----------------------------------------------+---------------------------------------------------+
79
+ """
80
+ name = 'ChunkTokenizer'
81
+
82
+ inputAnnotatorTypes = [AnnotatorType.CHUNK]
83
+
84
+ @keyword_only
85
+ def __init__(self):
86
+ super(Tokenizer, self).__init__(classname="com.johnsnowlabs.nlp.annotators.ChunkTokenizer")
87
+
88
+ def _create_model(self, java_model):
89
+ return ChunkTokenizerModel(java_model=java_model)
90
+
91
+
92
+ class ChunkTokenizerModel(TokenizerModel):
93
+ """Instantiated model of the ChunkTokenizer.
94
+
95
+ This is the instantiated model of the :class:`.ChunkTokenizer`.
96
+ For training your own model, please see the documentation of that class.
97
+
98
+ ====================== ======================
99
+ Input Annotation types Output Annotation type
100
+ ====================== ======================
101
+ ``CHUNK`` ``TOKEN``
102
+ ====================== ======================
103
+
104
+ Parameters
105
+ ----------
106
+ None
107
+ """
108
+ name = 'ChunkTokenizerModel'
109
+
110
+ inputAnnotatorTypes = [AnnotatorType.CHUNK]
111
+
112
+ @keyword_only
113
+ def __init__(self, classname="com.johnsnowlabs.nlp.annotators.ChunkTokenizerModel", java_model=None):
114
+ super(TokenizerModel, self).__init__(
115
+ classname=classname,
116
+ java_model=java_model
117
+ )
118
+