spark-nlp 6.0.4__py2.py3-none-any.whl → 6.0.5__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark_nlp-6.0.4.dist-info → spark_nlp-6.0.5.dist-info}/METADATA +5 -5
- {spark_nlp-6.0.4.dist-info → spark_nlp-6.0.5.dist-info}/RECORD +6 -6
- sparknlp/__init__.py +1 -1
- sparknlp/reader/sparknlp_reader.py +46 -0
- {spark_nlp-6.0.4.dist-info → spark_nlp-6.0.5.dist-info}/WHEEL +0 -0
- {spark_nlp-6.0.4.dist-info → spark_nlp-6.0.5.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 6.0.
|
|
3
|
+
Version: 6.0.5
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -102,7 +102,7 @@ $ java -version
|
|
|
102
102
|
$ conda create -n sparknlp python=3.7 -y
|
|
103
103
|
$ conda activate sparknlp
|
|
104
104
|
# spark-nlp by default is based on pyspark 3.x
|
|
105
|
-
$ pip install spark-nlp==6.0.
|
|
105
|
+
$ pip install spark-nlp==6.0.5 pyspark==3.3.1
|
|
106
106
|
```
|
|
107
107
|
|
|
108
108
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
168
168
|
|
|
169
169
|
### Apache Spark Support
|
|
170
170
|
|
|
171
|
-
Spark NLP *6.0.
|
|
171
|
+
Spark NLP *6.0.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
172
172
|
|
|
173
173
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
174
174
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
198
198
|
|
|
199
199
|
### Databricks Support
|
|
200
200
|
|
|
201
|
-
Spark NLP 6.0.
|
|
201
|
+
Spark NLP 6.0.5 has been tested and is compatible with the following runtimes:
|
|
202
202
|
|
|
203
203
|
| **CPU** | **GPU** |
|
|
204
204
|
|--------------------|--------------------|
|
|
@@ -215,7 +215,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
215
215
|
|
|
216
216
|
### EMR Support
|
|
217
217
|
|
|
218
|
-
Spark NLP 6.0.
|
|
218
|
+
Spark NLP 6.0.5 has been tested and is compatible with the following EMR releases:
|
|
219
219
|
|
|
220
220
|
| **EMR Release** |
|
|
221
221
|
|--------------------|
|
|
@@ -3,7 +3,7 @@ com/johnsnowlabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
|
3
3
|
com/johnsnowlabs/ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
com/johnsnowlabs/ml/ai/__init__.py,sha256=YQiK2M7U4d8y5irPy_HB8ae0mSpqS9583MH44pnKJXc,295
|
|
5
5
|
com/johnsnowlabs/nlp/__init__.py,sha256=DPIVXtONO5xXyOk-HB0-sNiHAcco17NN13zPS_6Uw8c,294
|
|
6
|
-
sparknlp/__init__.py,sha256=
|
|
6
|
+
sparknlp/__init__.py,sha256=peVwWLyO7M5yWlQdRGzOBql6cvaWcdQyy5pNhGR8-sg,13814
|
|
7
7
|
sparknlp/annotation.py,sha256=I5zOxG5vV2RfPZfqN9enT1i4mo6oBcn3Lrzs37QiOiA,5635
|
|
8
8
|
sparknlp/annotation_audio.py,sha256=iRV_InSVhgvAwSRe9NTbUH9v6OGvTM-FPCpSAKVu0mE,1917
|
|
9
9
|
sparknlp/annotation_image.py,sha256=xhCe8Ko-77XqWVuuYHFrjKqF6zPd8Z-RY_rmZXNwCXU,2547
|
|
@@ -246,7 +246,7 @@ sparknlp/pretrained/utils.py,sha256=T1MrvW_DaWk_jcOjVLOea0NMFE9w8fe0ZT_5urZ_nEY,
|
|
|
246
246
|
sparknlp/reader/__init__.py,sha256=-Toj3AIBki-zXPpV8ezFTI2LX1yP_rK2bhpoa8nBkTw,685
|
|
247
247
|
sparknlp/reader/enums.py,sha256=MNGug9oJ1BBLM1Pbske13kAabalDzHa2kucF5xzFpHs,770
|
|
248
248
|
sparknlp/reader/pdf_to_text.py,sha256=eWw-cwjosmcSZ9eHso0F5QQoeGBBnwsOhzhCXXvMjZA,7169
|
|
249
|
-
sparknlp/reader/sparknlp_reader.py,sha256=
|
|
249
|
+
sparknlp/reader/sparknlp_reader.py,sha256=IG0_wYKT1cIIU3EibzOVBZ-GhvX50mC5meXYv0WsYKs,18524
|
|
250
250
|
sparknlp/training/__init__.py,sha256=qREi9u-5Vc2VjpL6-XZsyvu5jSEIdIhowW7_kKaqMqo,852
|
|
251
251
|
sparknlp/training/conll.py,sha256=wKBiSTrjc6mjsl7Nyt6B8f4yXsDJkZb-sn8iOjix9cE,6961
|
|
252
252
|
sparknlp/training/conllu.py,sha256=8r3i-tmyrLsyk1DtZ9uo2mMDCWb1yw2Y5W6UsV13MkY,4953
|
|
@@ -277,7 +277,7 @@ sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py,sha256=R4yHFN3
|
|
|
277
277
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py,sha256=EoCSdcIjqQ3wv13MAuuWrKV8wyVBP0SbOEW41omHlR0,23189
|
|
278
278
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py,sha256=k5CQ7gKV6HZbZMB8cKLUJuZxoZWlP_DFWdZ--aIDwsc,2356
|
|
279
279
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py,sha256=pAxjWhjazSX8Vg0MFqJiuRVw1IbnQNSs-8Xp26L4nko,870
|
|
280
|
-
spark_nlp-6.0.
|
|
281
|
-
spark_nlp-6.0.
|
|
282
|
-
spark_nlp-6.0.
|
|
283
|
-
spark_nlp-6.0.
|
|
280
|
+
spark_nlp-6.0.5.dist-info/METADATA,sha256=BL1PeMYps-L3LAkmNpwxMkrGUw_KwO164VZ5AoqDZLg,19722
|
|
281
|
+
spark_nlp-6.0.5.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
|
|
282
|
+
spark_nlp-6.0.5.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
|
|
283
|
+
spark_nlp-6.0.5.dist-info/RECORD,,
|
sparknlp/__init__.py
CHANGED
|
@@ -367,4 +367,50 @@ class SparkNLPReader(ExtendedJavaWrapper):
|
|
|
367
367
|
if not isinstance(docPath, str):
|
|
368
368
|
raise TypeError("docPath must be a string")
|
|
369
369
|
jdf = self._java_obj.xml(docPath)
|
|
370
|
+
return self.getDataFrame(self.spark, jdf)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def md(self, filePath):
|
|
374
|
+
"""Reads Markdown files and returns a Spark DataFrame.
|
|
375
|
+
|
|
376
|
+
Parameters
|
|
377
|
+
----------
|
|
378
|
+
filePath : str
|
|
379
|
+
Path to a Markdown file or a directory containing Markdown files.
|
|
380
|
+
|
|
381
|
+
Returns
|
|
382
|
+
-------
|
|
383
|
+
pyspark.sql.DataFrame
|
|
384
|
+
A DataFrame containing parsed Markdown content.
|
|
385
|
+
|
|
386
|
+
Examples
|
|
387
|
+
--------
|
|
388
|
+
>>> from sparknlp.reader import SparkNLPReader
|
|
389
|
+
>>> md_df = SparkNLPReader(spark).md("home/user/markdown-directory")
|
|
390
|
+
|
|
391
|
+
You can use SparkNLP for one line of code
|
|
392
|
+
|
|
393
|
+
>>> import sparknlp
|
|
394
|
+
>>> md_df = sparknlp.read().md("home/user/markdown-directory")
|
|
395
|
+
>>> md_df.show(truncate=False)
|
|
396
|
+
+-----------------------------------------------------------+
|
|
397
|
+
|md |
|
|
398
|
+
+-----------------------------------------------------------+
|
|
399
|
+
|[{Title, Sample Markdown Document, {elementId -> ..., tag -> title}}]|
|
|
400
|
+
+-----------------------------------------------------------+
|
|
401
|
+
|
|
402
|
+
>>> md_df.printSchema()
|
|
403
|
+
root
|
|
404
|
+
|-- path: string (nullable = true)
|
|
405
|
+
|-- md: array (nullable = true)
|
|
406
|
+
| |-- element: struct (containsNull = true)
|
|
407
|
+
| | |-- elementType: string (nullable = true)
|
|
408
|
+
| | |-- content: string (nullable = true)
|
|
409
|
+
| | |-- metadata: map (nullable = true)
|
|
410
|
+
| | | |-- key: string
|
|
411
|
+
| | | |-- value: string (valueContainsNull = true)
|
|
412
|
+
"""
|
|
413
|
+
if not isinstance(filePath, str):
|
|
414
|
+
raise TypeError("filePath must be a string")
|
|
415
|
+
jdf = self._java_obj.md(filePath)
|
|
370
416
|
return self.getDataFrame(self.spark, jdf)
|
|
File without changes
|
|
File without changes
|