spark-nlp 6.0.1rc1__py2.py3-none-any.whl → 6.0.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (36) hide show
  1. {spark_nlp-6.0.1rc1.dist-info → spark_nlp-6.0.2.dist-info}/METADATA +13 -6
  2. {spark_nlp-6.0.1rc1.dist-info → spark_nlp-6.0.2.dist-info}/RECORD +36 -30
  3. {spark_nlp-6.0.1rc1.dist-info → spark_nlp-6.0.2.dist-info}/WHEEL +1 -1
  4. sparknlp/__init__.py +4 -2
  5. sparknlp/annotator/cv/__init__.py +2 -0
  6. sparknlp/annotator/cv/florence2_transformer.py +180 -0
  7. sparknlp/annotator/cv/gemma3_for_multimodal.py +5 -10
  8. sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
  9. sparknlp/annotator/cv/janus_for_multimodal.py +8 -13
  10. sparknlp/annotator/cv/llava_for_multimodal.py +1 -1
  11. sparknlp/annotator/cv/paligemma_for_multimodal.py +7 -7
  12. sparknlp/annotator/cv/phi3_vision_for_multimodal.py +1 -1
  13. sparknlp/annotator/cv/qwen2vl_transformer.py +1 -1
  14. sparknlp/annotator/cv/smolvlm_transformer.py +7 -13
  15. sparknlp/annotator/date2_chunk.py +1 -1
  16. sparknlp/annotator/document_character_text_splitter.py +8 -8
  17. sparknlp/annotator/document_token_splitter.py +7 -7
  18. sparknlp/annotator/embeddings/bge_embeddings.py +21 -19
  19. sparknlp/annotator/embeddings/snowflake_embeddings.py +15 -15
  20. sparknlp/annotator/openai/openai_completion.py +3 -4
  21. sparknlp/annotator/seq2seq/m2m100_transformer.py +1 -1
  22. sparknlp/annotator/seq2seq/mistral_transformer.py +2 -3
  23. sparknlp/annotator/seq2seq/nllb_transformer.py +1 -1
  24. sparknlp/annotator/seq2seq/qwen_transformer.py +26 -25
  25. sparknlp/annotator/spell_check/context_spell_checker.py +1 -1
  26. sparknlp/base/prompt_assembler.py +1 -1
  27. sparknlp/common/properties.py +7 -7
  28. sparknlp/internal/__init__.py +19 -0
  29. sparknlp/partition/__init__.py +16 -0
  30. sparknlp/partition/partition.py +244 -0
  31. sparknlp/partition/partition_properties.py +257 -0
  32. sparknlp/partition/partition_transformer.py +196 -0
  33. sparknlp/reader/pdf_to_text.py +50 -4
  34. sparknlp/reader/sparknlp_reader.py +56 -52
  35. sparknlp/training/spacy_to_annotation.py +7 -7
  36. {spark_nlp-6.0.1rc1.dist-info → spark_nlp-6.0.2.dist-info}/top_level.txt +0 -0
@@ -91,8 +91,7 @@ class MistralTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
91
91
 
92
92
  References
93
93
  ----------
94
- - `Mistral 7B
95
- <https://mistral.ai/news/announcing-mistral_7b/>`__
94
+ - `Mistral 7B <https://mistral.ai/news/announcing-mistral_7b/>`__
96
95
  - https://github.com/mistralai/mistral-src
97
96
 
98
97
  **Paper Abstract:**
@@ -126,7 +125,7 @@ class MistralTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
126
125
  +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
127
126
  |result |
128
127
  +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
129
- |[Leonardo Da Vinci invented the microscope?\n Question: Leonardo Da Vinci invented the microscope?\n Answer: No, Leonardo Da Vinci did not invent the microscope. The first microscope was invented |
128
+ |[Leonardo Da Vinci invented the microscope?\\n Question: Leonardo Da Vinci invented the microscope?\\n Answer: No, Leonardo Da Vinci did not invent the microscope. The first microscope was invented |
130
129
  | in the late 16th century, long after Leonardo'] |
131
130
  -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
132
131
  """
@@ -77,7 +77,7 @@ class NLLBTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
77
77
  Target Language (Default: `fr`)
78
78
 
79
79
  Languages Covered
80
- -----
80
+ -----------------
81
81
  Acehnese (Arabic script) (ace_Arab), Acehnese (Latin script) (ace_Latn), Mesopotamian Arabic
82
82
  (acm_Arab), Ta’izzi-Adeni Arabic (acq_Arab), Tunisian Arabic (aeb_Arab), Afrikaans (afr_Latn),
83
83
  South Levantine Arabic (ajp_Arab), Akan (aka_Latn), Amharic (amh_Ethi), North Levantine Arabic
@@ -52,6 +52,32 @@ class QwenTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
52
52
  ``DOCUMENT`` ``DOCUMENT``
53
53
  ====================== ======================
54
54
 
55
+ **References**
56
+
57
+ - `Qwen Technical Report
58
+ <https://arxiv.org/pdf/2309.16609.pdf>`__
59
+ - https://qwenlm.github.io/blog/qwen1.5/
60
+ - https://github.com/QwenLM/Qwen1.5
61
+
62
+ **Paper Abstract:**
63
+
64
+ *Large language models (LLMs) have revolutionized the field of artificial intelligence,
65
+ enabling natural language processing tasks that were previously thought to be exclusive to
66
+ humans. In this work, we introduce Qwen, the first installment of our large language model
67
+ series. Qwen is a comprehensive language model series that encompasses distinct models with
68
+ varying parameter counts. It includes Qwen, the base pretrained language models, and
69
+ Qwen-Chat, the chat models finetuned with human alignment techniques. The base language models
70
+ consistently demonstrate superior performance across a multitude of downstream tasks, and the
71
+ chat models, particularly those trained using Reinforcement Learning from Human Feedback
72
+ (RLHF), are highly competitive. The chat models possess advanced tool-use and planning
73
+ capabilities for creating agent applications, showcasing impressive performance even when
74
+ compared to bigger models on complex tasks like utilizing a code interpreter. Furthermore, we
75
+ have developed coding-specialized models, Code-Qwen and Code-Qwen-Chat, as well as
76
+ mathematics-focused models, Math-Qwen-Chat, which are built upon base language models. These
77
+ models demonstrate significantly improved performance in comparison with open-source models,
78
+ and slightly fall behind the proprietary models.*
79
+
80
+
55
81
  Parameters
56
82
  ----------
57
83
  configProtoBytes
@@ -87,31 +113,6 @@ class QwenTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
87
113
  This is a very computationally expensive module especially on larger
88
114
  sequence. The use of an accelerator such as GPU is recommended.
89
115
 
90
- References
91
- ----------
92
- - `Qwen Technical Report
93
- <https://arxiv.org/pdf/2309.16609.pdf>`__
94
- - https://qwenlm.github.io/blog/qwen1.5/
95
- - https://github.com/QwenLM/Qwen1.5
96
-
97
- **Paper Abstract:**
98
-
99
- *Large language models (LLMs) have revolutionized the field of artificial intelligence,
100
- enabling natural language processing tasks that were previously thought to be exclusive to
101
- humans. In this work, we introduce Qwen, the first installment of our large language model
102
- series. Qwen is a comprehensive language model series that encompasses distinct models with
103
- varying parameter counts. It includes Qwen, the base pretrained language models, and
104
- Qwen-Chat, the chat models finetuned with human alignment techniques. The base language models
105
- consistently demonstrate superior performance across a multitude of downstream tasks, and the
106
- chat models, particularly those trained using Reinforcement Learning from Human Feedback
107
- (RLHF), are highly competitive. The chat models possess advanced tool-use and planning
108
- capabilities for creating agent applications, showcasing impressive performance even when
109
- compared to bigger models on complex tasks like utilizing a code interpreter. Furthermore, we
110
- have developed coding-specialized models, Code-Qwen and Code-Qwen-Chat, as well as
111
- mathematics-focused models, Math-Qwen-Chat, which are built upon base language models. These
112
- models demonstrate significantly improved performance in comparison with open-source models,
113
- and slightly fall behind the proprietary models.*
114
-
115
116
  Examples
116
117
  --------
117
118
  >>> import sparknlp
@@ -565,7 +565,7 @@ class ContextSpellCheckerModel(AnnotatorModel, HasEngine):
565
565
 
566
566
 
567
567
  References
568
- -------------
568
+ ----------
569
569
  For an in-depth explanation of the module see the article `Applying Context
570
570
  Aware Spell Checking in Spark NLP
571
571
  <https://medium.com/spark-nlp/applying-context-aware-spell-checking-in-spark-nlp-3c29c46963bc>`__.
@@ -122,7 +122,7 @@ class PromptAssembler(AnnotatorTransformer):
122
122
  +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
123
123
  |result |
124
124
  +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
125
- |[<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello there, how can I help you?<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI need help with organizing my room.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n]|
125
+ |[<|start_header_id|>system<|end_header_id|>\\n\\nYou are a helpful assistant.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\nHello there, how can I help you?<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\nI need help with organizing my room.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n]|
126
126
  +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
127
127
  """
128
128
 
@@ -38,7 +38,7 @@ class HasBatchedAnnotate:
38
38
  int
39
39
  Current batch size
40
40
  """
41
- return self.getOrDefault("batchSize")
41
+ return self.getOrDefault(self.batchSize)
42
42
 
43
43
 
44
44
  class HasCaseSensitiveProperties:
@@ -245,7 +245,7 @@ class HasBatchedAnnotateImage:
245
245
  int
246
246
  Current batch size
247
247
  """
248
- return self.getOrDefault("batchSize")
248
+ return self.getOrDefault(self.batchSize)
249
249
 
250
250
 
251
251
  class HasImageFeatureProperties:
@@ -402,7 +402,7 @@ class HasBatchedAnnotateAudio:
402
402
  int
403
403
  Current batch size
404
404
  """
405
- return self.getOrDefault("batchSize")
405
+ return self.getOrDefault(self.batchSize)
406
406
 
407
407
 
408
408
  class HasAudioFeatureProperties:
@@ -1099,7 +1099,7 @@ class HasLlamaCppProperties:
1099
1099
  return self._set(flashAttention=flashAttention)
1100
1100
 
1101
1101
  def setInputPrefixBos(self, inputPrefixBos: bool):
1102
- """Whether to add prefix BOS to user inputs, preceding the `--in-prefix` bool"""
1102
+ """Whether to add prefix BOS to user inputs, preceding the `--in-prefix` string"""
1103
1103
  return self._set(inputPrefixBos=inputPrefixBos)
1104
1104
 
1105
1105
  def setUseMmap(self, useMmap: bool):
@@ -1114,7 +1114,7 @@ class HasLlamaCppProperties:
1114
1114
  """Whether to disable KV offload"""
1115
1115
  return self._set(noKvOffload=noKvOffload)
1116
1116
 
1117
- def setSystemPrompt(self, systemPrompt: bool):
1117
+ def setSystemPrompt(self, systemPrompt: str):
1118
1118
  """Set a system prompt to use"""
1119
1119
  return self._set(systemPrompt=systemPrompt)
1120
1120
 
@@ -1219,7 +1219,7 @@ class HasLlamaCppProperties:
1219
1219
  """Set the amount of tokens the samplers should return at least (0 = disabled)"""
1220
1220
  return self._set(minKeep=minKeep)
1221
1221
 
1222
- def setGrammar(self, grammar: bool):
1222
+ def setGrammar(self, grammar: str):
1223
1223
  """Set BNF-like grammar to constrain generations"""
1224
1224
  return self._set(grammar=grammar)
1225
1225
 
@@ -1261,7 +1261,7 @@ class HasLlamaCppProperties:
1261
1261
  return self._call_java("setTokenBias", tokenBias)
1262
1262
 
1263
1263
  def setLoraAdapters(self, loraAdapters: Dict[str, float]):
1264
- """Set token id bias"""
1264
+ """Set LoRA adapters with their scaling factors"""
1265
1265
  return self._call_java("setLoraAdapters", loraAdapters)
1266
1266
 
1267
1267
  def getMetadata(self):
@@ -281,6 +281,16 @@ class _Gemma3ForMultiModalLoader(ExtendedJavaWrapper):
281
281
  use_openvino
282
282
  )
283
283
 
284
+ class _InternVLForMultiModalLoader(ExtendedJavaWrapper):
285
+ def __init__(self, path, jspark, use_openvino=False):
286
+ super(_InternVLForMultiModalLoader, self).__init__(
287
+ "com.johnsnowlabs.nlp.annotators.cv.InternVLForMultiModal.loadSavedModel",
288
+ path,
289
+ jspark,
290
+ use_openvino
291
+ )
292
+
293
+
284
294
  class _JanusForMultiModalLoader(ExtendedJavaWrapper):
285
295
  def __init__(self, path, jspark, use_openvino=False):
286
296
  super(_JanusForMultiModalLoader, self).__init__(
@@ -1146,3 +1156,12 @@ class _SmolVLMTransformerLoader(ExtendedJavaWrapper):
1146
1156
  jspark,
1147
1157
  use_openvino
1148
1158
  )
1159
+
1160
+ class _Florence2TransformerLoader(ExtendedJavaWrapper):
1161
+ def __init__(self, path, jspark, use_openvino=False):
1162
+ super(_Florence2TransformerLoader, self).__init__(
1163
+ "com.johnsnowlabs.nlp.annotators.cv.Florence2Transformer.loadSavedModel",
1164
+ path,
1165
+ jspark,
1166
+ use_openvino,
1167
+ )
@@ -0,0 +1,16 @@
1
+ # Copyright 2017-2025 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Module to read various types of documents into chunks"""
15
+ from sparknlp.partition.partition import *
16
+ from sparknlp.partition.partition_transformer import *
@@ -0,0 +1,244 @@
1
+ # Copyright 2017-2025 John Snow Labs
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """Contains the Partition annotator for reading and processing various document types."""
15
+ import sparknlp
16
+ from sparknlp.internal import ExtendedJavaWrapper
17
+
18
+
19
+ class Partition(ExtendedJavaWrapper):
20
+ """
21
+ A unified interface for extracting structured content from various document types
22
+ using Spark NLP readers.
23
+
24
+ This class supports reading from files, URLs, in-memory strings, or byte arrays,
25
+ and returns parsed output as a structured Spark DataFrame.
26
+
27
+ Supported formats include:
28
+ - Plain text
29
+ - HTML
30
+ - Word (.doc/.docx)
31
+ - Excel (.xls/.xlsx)
32
+ - PowerPoint (.ppt/.pptx)
33
+ - Email files (.eml, .msg)
34
+ - PDFs
35
+
36
+ Parameters
37
+ ----------
38
+ params : dict, optional
39
+ Configuration parameters, including:
40
+
41
+ - content_type : str
42
+ Override automatic file type detection.
43
+ - store_content : bool
44
+ Include raw file content in the output DataFrame.
45
+ - timeout : int
46
+ Timeout for fetching HTML content.
47
+ - title_font_size : int
48
+ Font size used to identify titles.
49
+ - include_page_breaks : bool
50
+ Tag content with page break metadata.
51
+ - group_broken_paragraphs : bool
52
+ Merge broken lines into full paragraphs.
53
+ - title_length_size : int
54
+ Max character length to qualify as title.
55
+ - paragraph_split : str
56
+ Regex to detect paragraph boundaries.
57
+ - short_line_word_threshold : int
58
+ Max words in a line to be considered short.
59
+ - threshold : float
60
+ Ratio of empty lines for switching grouping.
61
+ - max_line_count : int
62
+ Max lines evaluated in paragraph analysis.
63
+ - include_slide_notes : bool
64
+ Include speaker notes in output.
65
+ - infer_table_structure : bool
66
+ Generate HTML table structure.
67
+ - append_cells : bool
68
+ Merge Excel rows into one block.
69
+ - cell_separator : str
70
+ Join cell values in a row.
71
+ - add_attachment_content : bool
72
+ Include text of plain-text attachments.
73
+ - headers : dict
74
+ Request headers when using URLs.
75
+
76
+ Examples
77
+ --------
78
+
79
+ Reading Text Files
80
+
81
+ >>> txt_directory = "/content/txtfiles/reader/txt"
82
+ >>> partition_df = Partition(content_type="text/plain").partition(txt_directory)
83
+ >>> partition_df.show()
84
+ >>> partition_df = Partition().partition("./email-files/test-several-attachments.eml")
85
+ >>> partition_df.show()
86
+ >>> partition_df = Partition().partition(
87
+ ... "https://www.wikipedia.com",
88
+ ... headers={"Accept-Language": "es-ES"}
89
+ ... )
90
+ >>> partition_df.show()
91
+ +--------------------+--------------------+
92
+ | path| txt|
93
+ +--------------------+--------------------+
94
+ |file:/content/txt...|[{Title, BIG DATA...|
95
+ +--------------------+--------------------+
96
+
97
+ Reading Email Files
98
+
99
+ >>> partition_df = Partition().partition("./email-files/test-several-attachments.eml")
100
+ >>> partition_df.show()
101
+ +--------------------+--------------------+
102
+ | path| email|
103
+ +--------------------+--------------------+
104
+ |file:/content/ema...|[{Title, Test Sev...|
105
+ +--------------------+--------------------+
106
+
107
+ Reading Webpages
108
+
109
+ >>> partition_df = Partition().partition("https://www.wikipedia.com", headers = {"Accept-Language": "es-ES"})
110
+ >>> partition_df.show()
111
+ +--------------------+--------------------+
112
+ | url| html|
113
+ +--------------------+--------------------+
114
+ |https://www.wikip...|[{Title, Wikipedi...|
115
+ +--------------------+--------------------+
116
+
117
+ For more examples, refer to:
118
+ `examples/python/data-preprocessing/SparkNLP_Partition_Reader_Demo.ipynb`
119
+ """
120
+ def __init__(self, **kwargs):
121
+ self.spark = sparknlp.start()
122
+ params = {}
123
+ for key, value in kwargs.items():
124
+ try:
125
+ params[key] = str(value)
126
+ except Exception as e:
127
+ raise ValueError(f"Invalid value for key '{key}': Cannot cast {type(value)} to string. Original error: {e}")
128
+
129
+ super(Partition, self).__init__("com.johnsnowlabs.partition.Partition", params)
130
+
131
+
132
+ def partition(self, path, headers=None):
133
+ """
134
+ Reads and parses content from a URL, file, or directory path.
135
+
136
+ Parameters
137
+ ----------
138
+ path : str
139
+ Path to file or directory. URLs and DFS are supported.
140
+ headers : dict, optional
141
+ Headers for URL requests.
142
+
143
+ Returns
144
+ -------
145
+ pyspark.sql.DataFrame
146
+ DataFrame with parsed content.
147
+ """
148
+ if headers is None:
149
+ headers = {}
150
+ jdf = self._java_obj.partition(path, headers)
151
+ dataframe = self.getDataFrame(self.spark, jdf)
152
+ return dataframe
153
+
154
+
155
+ def partition_urls(self, path, headers=None):
156
+ """
157
+ Reads and parses content from multiple URLs.
158
+
159
+ Parameters
160
+ ----------
161
+ path : list[str]
162
+ List of URLs.
163
+ headers : dict, optional
164
+ Request headers for URLs.
165
+
166
+ Returns
167
+ -------
168
+ pyspark.sql.DataFrame
169
+ DataFrame with parsed URL content.
170
+
171
+ Examples
172
+ --------
173
+ >>> urls_df = Partition().partition_urls([
174
+ ... "https://www.wikipedia.org", "https://example.com/"
175
+ ... ])
176
+ >>> urls_df.show()
177
+ +--------------------+--------------------+
178
+ | url| html|
179
+ +--------------------+--------------------+
180
+ |https://www.wikip...|[{Title, Wikipedi...|
181
+ |https://example.com/|[{Title, Example ...|
182
+ +--------------------+--------------------+
183
+
184
+ >>> urls_df.printSchema()
185
+ root
186
+ |-- url: string (nullable = true)
187
+ |-- html: array (nullable = true)
188
+ | |-- element: struct (containsNull = true)
189
+ | | |-- elementType: string (nullable = true)
190
+ | | |-- content: string (nullable = true)
191
+ | | |-- metadata: map (nullable = true)
192
+ | | | |-- key: string
193
+ | | | |-- value: string (valueContainsNull = true)
194
+ """
195
+ if headers is None:
196
+ headers = {}
197
+ jdf = self._java_obj.partitionUrlsJava(path, headers)
198
+ dataframe = self.getDataFrame(self.spark, jdf)
199
+ return dataframe
200
+
201
+
202
+ def partition_text(self, text):
203
+ """
204
+ Parses content from a raw text string.
205
+
206
+ Parameters
207
+ ----------
208
+ text : str
209
+ Raw text input.
210
+
211
+ Returns
212
+ -------
213
+ pyspark.sql.DataFrame
214
+ DataFrame with parsed text.
215
+
216
+ Examples
217
+ --------
218
+ >>> raw_text = (
219
+ ... "The big brown fox\\n"
220
+ ... "was walking down the lane.\\n"
221
+ ... "\\n"
222
+ ... "At the end of the lane,\\n"
223
+ ... "the fox met a bear."
224
+ ... )
225
+ >>> text_df = Partition(group_broken_paragraphs=True).partition_text(text=raw_text)
226
+ >>> text_df.show()
227
+ +--------------------------------------+
228
+ |txt |
229
+ +--------------------------------------+
230
+ |[{NarrativeText, The big brown fox was|
231
+ +--------------------------------------+
232
+ >>> text_df.printSchema()
233
+ root
234
+ |-- txt: array (nullable = true)
235
+ | |-- element: struct (containsNull = true)
236
+ | | |-- elementType: string (nullable = true)
237
+ | | |-- content: string (nullable = true)
238
+ | | |-- metadata: map (nullable = true)
239
+ | | | |-- key: string
240
+ | | | |-- value: string (valueContainsNull = true)
241
+ """
242
+ jdf = self._java_obj.partitionText(text)
243
+ dataframe = self.getDataFrame(self.spark, jdf)
244
+ return dataframe