PyPI - spark-nlp - Versions diffs - 6.0.4__py2.py3-none-any.whl → 6.0.5__py2.py3-none-any.whl - Mend

spark-nlp 6.0.4py2.py3-none-any.whl → 6.0.5py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of spark-nlp might be problematic. Click here for more details.

Files changed (6) hide show

{spark_nlp-6.0.4.dist-info → spark_nlp-6.0.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: spark-nlp
-Version: 6.0.4
+Version: 6.0.5
 Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
 Home-page: https://github.com/JohnSnowLabs/spark-nlp
 Author: John Snow Labs
@@ -102,7 +102,7 @@ $ java -version
 $ conda create -n sparknlp python=3.7 -y
 $ conda activate sparknlp
 # spark-nlp by default is based on pyspark 3.x
-$ pip install spark-nlp==6.0.4 pyspark==3.3.1
+$ pip install spark-nlp==6.0.5 pyspark==3.3.1
 ```
 In Python console or Jupyter `Python3` kernel:
@@ -168,7 +168,7 @@ For a quick example of using pipelines and models take a look at our official [d
 ### Apache Spark Support
-Spark NLP *6.0.4* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
+Spark NLP *6.0.5* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
 | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
 |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
@@ -198,7 +198,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
 ### Databricks Support
-Spark NLP 6.0.4 has been tested and is compatible with the following runtimes:
+Spark NLP 6.0.5 has been tested and is compatible with the following runtimes:
 | **CPU**            | **GPU**            |
 |--------------------|--------------------|
@@ -215,7 +215,7 @@ We are compatible with older runtimes. For a full list check databricks support
 ### EMR Support
-Spark NLP 6.0.4 has been tested and is compatible with the following EMR releases:
+Spark NLP 6.0.5 has been tested and is compatible with the following EMR releases:
 | **EMR Release**    |
 |--------------------|

{spark_nlp-6.0.4.dist-info → spark_nlp-6.0.5.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ com/johnsnowlabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
 com/johnsnowlabs/ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 com/johnsnowlabs/ml/ai/__init__.py,sha256=YQiK2M7U4d8y5irPy_HB8ae0mSpqS9583MH44pnKJXc,295
 com/johnsnowlabs/nlp/__init__.py,sha256=DPIVXtONO5xXyOk-HB0-sNiHAcco17NN13zPS_6Uw8c,294
-sparknlp/__init__.py,sha256=4IcJQhg7fuPsgeY0yoV96ZJPK_LVPdXnH3cl_azT7eU,13814
+sparknlp/__init__.py,sha256=peVwWLyO7M5yWlQdRGzOBql6cvaWcdQyy5pNhGR8-sg,13814
 sparknlp/annotation.py,sha256=I5zOxG5vV2RfPZfqN9enT1i4mo6oBcn3Lrzs37QiOiA,5635
 sparknlp/annotation_audio.py,sha256=iRV_InSVhgvAwSRe9NTbUH9v6OGvTM-FPCpSAKVu0mE,1917
 sparknlp/annotation_image.py,sha256=xhCe8Ko-77XqWVuuYHFrjKqF6zPd8Z-RY_rmZXNwCXU,2547
@@ -246,7 +246,7 @@ sparknlp/pretrained/utils.py,sha256=T1MrvW_DaWk_jcOjVLOea0NMFE9w8fe0ZT_5urZ_nEY,
 sparknlp/reader/__init__.py,sha256=-Toj3AIBki-zXPpV8ezFTI2LX1yP_rK2bhpoa8nBkTw,685
 sparknlp/reader/enums.py,sha256=MNGug9oJ1BBLM1Pbske13kAabalDzHa2kucF5xzFpHs,770
 sparknlp/reader/pdf_to_text.py,sha256=eWw-cwjosmcSZ9eHso0F5QQoeGBBnwsOhzhCXXvMjZA,7169
-sparknlp/reader/sparknlp_reader.py,sha256=ybnMlwJaBOVbjDw7ng39jcrshlQzexwq98_PTwVeM8g,16779
+sparknlp/reader/sparknlp_reader.py,sha256=IG0_wYKT1cIIU3EibzOVBZ-GhvX50mC5meXYv0WsYKs,18524
 sparknlp/training/__init__.py,sha256=qREi9u-5Vc2VjpL6-XZsyvu5jSEIdIhowW7_kKaqMqo,852
 sparknlp/training/conll.py,sha256=wKBiSTrjc6mjsl7Nyt6B8f4yXsDJkZb-sn8iOjix9cE,6961
 sparknlp/training/conllu.py,sha256=8r3i-tmyrLsyk1DtZ9uo2mMDCWb1yw2Y5W6UsV13MkY,4953
@@ -277,7 +277,7 @@ sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py,sha256=R4yHFN3
 sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py,sha256=EoCSdcIjqQ3wv13MAuuWrKV8wyVBP0SbOEW41omHlR0,23189
 sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py,sha256=k5CQ7gKV6HZbZMB8cKLUJuZxoZWlP_DFWdZ--aIDwsc,2356
 sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py,sha256=pAxjWhjazSX8Vg0MFqJiuRVw1IbnQNSs-8Xp26L4nko,870
-spark_nlp-6.0.4.dist-info/METADATA,sha256=xU_AVvIsdTMbYdmEsyU-05YDMQizz-l26J6zKkoC1C8,19722
-spark_nlp-6.0.4.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
-spark_nlp-6.0.4.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
-spark_nlp-6.0.4.dist-info/RECORD,,
+spark_nlp-6.0.5.dist-info/METADATA,sha256=BL1PeMYps-L3LAkmNpwxMkrGUw_KwO164VZ5AoqDZLg,19722
+spark_nlp-6.0.5.dist-info/WHEEL,sha256=JNWh1Fm1UdwIQV075glCn4MVuCRs0sotJIq-J6rbxCU,109
+spark_nlp-6.0.5.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
+spark_nlp-6.0.5.dist-info/RECORD,,

sparknlp/__init__.py CHANGED Viewed

@@ -66,7 +66,7 @@ sys.modules['com.johnsnowlabs.ml.ai'] = annotator
 annotators = annotator
 embeddings = annotator
-__version__ = "6.0.4"
+__version__ = "6.0.5"
 def start(gpu=False,

sparknlp/reader/sparknlp_reader.py CHANGED Viewed

@@ -367,4 +367,50 @@ class SparkNLPReader(ExtendedJavaWrapper):
         if not isinstance(docPath, str):
             raise TypeError("docPath must be a string")
         jdf = self._java_obj.xml(docPath)
+        return self.getDataFrame(self.spark, jdf)
+    def md(self, filePath):
+        """Reads Markdown files and returns a Spark DataFrame.
+        Parameters
+        ----------
+        filePath : str
+            Path to a Markdown file or a directory containing Markdown files.
+        Returns
+        -------
+        pyspark.sql.DataFrame
+            A DataFrame containing parsed Markdown content.
+        Examples
+        --------
+        >>> from sparknlp.reader import SparkNLPReader
+        >>> md_df = SparkNLPReader(spark).md("home/user/markdown-directory")
+        You can use SparkNLP for one line of code
+        >>> import sparknlp
+        >>> md_df = sparknlp.read().md("home/user/markdown-directory")
+        >>> md_df.show(truncate=False)
+        +-----------------------------------------------------------+
+        |md                                                         |
+        +-----------------------------------------------------------+
+        |[{Title, Sample Markdown Document, {elementId -> ..., tag -> title}}]|
+        +-----------------------------------------------------------+
+        >>> md_df.printSchema()
+        root
+         |-- path: string (nullable = true)
+         |-- md: array (nullable = true)
+         |    |-- element: struct (containsNull = true)
+         |    |    |-- elementType: string (nullable = true)
+         |    |    |-- content: string (nullable = true)
+         |    |    |-- metadata: map (nullable = true)
+         |    |    |    |-- key: string
+         |    |    |    |-- value: string (valueContainsNull = true)
+        """
+        if not isinstance(filePath, str):
+            raise TypeError("filePath must be a string")
+        jdf = self._java_obj.md(filePath)
         return self.getDataFrame(self.spark, jdf)

{spark_nlp-6.0.4.dist-info → spark_nlp-6.0.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{spark_nlp-6.0.4.dist-info → spark_nlp-6.0.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

spark-nlp 6.0.4__py2.py3-none-any.whl → 6.0.5__py2.py3-none-any.whl

Potentially problematic release.

spark-nlp 6.0.4py2.py3-none-any.whl → 6.0.5py2.py3-none-any.whl