spark-nlp 5.1.0__py2.py3-none-any.whl → 5.1.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark_nlp-5.1.0.dist-info → spark_nlp-5.1.1.dist-info}/METADATA +45 -45
- {spark_nlp-5.1.0.dist-info → spark_nlp-5.1.1.dist-info}/RECORD +7 -7
- sparknlp/__init__.py +2 -2
- sparknlp/annotator/embeddings/doc2vec.py +6 -0
- sparknlp/annotator/embeddings/word2vec.py +6 -0
- {spark_nlp-5.1.0.dist-info → spark_nlp-5.1.1.dist-info}/WHEEL +0 -0
- {spark_nlp-5.1.0.dist-info → spark_nlp-5.1.1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 5.1.
|
|
3
|
+
Version: 5.1.1
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -202,7 +202,7 @@ To use Spark NLP you need the following requirements:
|
|
|
202
202
|
|
|
203
203
|
**GPU (optional):**
|
|
204
204
|
|
|
205
|
-
Spark NLP 5.1.
|
|
205
|
+
Spark NLP 5.1.1 is built with ONNX 1.15.1 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
206
206
|
|
|
207
207
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
208
208
|
- CUDA® Toolkit 11.2
|
|
@@ -218,7 +218,7 @@ $ java -version
|
|
|
218
218
|
$ conda create -n sparknlp python=3.7 -y
|
|
219
219
|
$ conda activate sparknlp
|
|
220
220
|
# spark-nlp by default is based on pyspark 3.x
|
|
221
|
-
$ pip install spark-nlp==5.1.
|
|
221
|
+
$ pip install spark-nlp==5.1.1 pyspark==3.3.1
|
|
222
222
|
```
|
|
223
223
|
|
|
224
224
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -263,7 +263,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
263
263
|
|
|
264
264
|
## Apache Spark Support
|
|
265
265
|
|
|
266
|
-
Spark NLP *5.1.
|
|
266
|
+
Spark NLP *5.1.1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x
|
|
267
267
|
|
|
268
268
|
| Spark NLP | Apache Spark 2.3.x | Apache Spark 2.4.x | Apache Spark 3.0.x | Apache Spark 3.1.x | Apache Spark 3.2.x | Apache Spark 3.3.x | Apache Spark 3.4.x |
|
|
269
269
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -302,7 +302,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
302
302
|
|
|
303
303
|
## Databricks Support
|
|
304
304
|
|
|
305
|
-
Spark NLP 5.1.
|
|
305
|
+
Spark NLP 5.1.1 has been tested and is compatible with the following runtimes:
|
|
306
306
|
|
|
307
307
|
**CPU:**
|
|
308
308
|
|
|
@@ -363,7 +363,7 @@ Spark NLP 5.1.0 has been tested and is compatible with the following runtimes:
|
|
|
363
363
|
|
|
364
364
|
## EMR Support
|
|
365
365
|
|
|
366
|
-
Spark NLP 5.1.
|
|
366
|
+
Spark NLP 5.1.1 has been tested and is compatible with the following EMR releases:
|
|
367
367
|
|
|
368
368
|
- emr-6.2.0
|
|
369
369
|
- emr-6.3.0
|
|
@@ -408,11 +408,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
408
408
|
```sh
|
|
409
409
|
# CPU
|
|
410
410
|
|
|
411
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
411
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
412
412
|
|
|
413
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
413
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
414
414
|
|
|
415
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
415
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
416
416
|
```
|
|
417
417
|
|
|
418
418
|
The `spark-nlp` has been published to
|
|
@@ -421,11 +421,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
421
421
|
```sh
|
|
422
422
|
# GPU
|
|
423
423
|
|
|
424
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.
|
|
424
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
425
425
|
|
|
426
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.
|
|
426
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
427
427
|
|
|
428
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.
|
|
428
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.1.1
|
|
429
429
|
|
|
430
430
|
```
|
|
431
431
|
|
|
@@ -435,11 +435,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
435
435
|
```sh
|
|
436
436
|
# AArch64
|
|
437
437
|
|
|
438
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.
|
|
438
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
439
439
|
|
|
440
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.
|
|
440
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
441
441
|
|
|
442
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.
|
|
442
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.1.1
|
|
443
443
|
|
|
444
444
|
```
|
|
445
445
|
|
|
@@ -449,11 +449,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
449
449
|
```sh
|
|
450
450
|
# M1/M2 (Apple Silicon)
|
|
451
451
|
|
|
452
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.
|
|
452
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
453
453
|
|
|
454
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.
|
|
454
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
455
455
|
|
|
456
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.
|
|
456
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.1.1
|
|
457
457
|
|
|
458
458
|
```
|
|
459
459
|
|
|
@@ -467,7 +467,7 @@ set in your SparkSession:
|
|
|
467
467
|
spark-shell \
|
|
468
468
|
--driver-memory 16g \
|
|
469
469
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
470
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
470
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
471
471
|
```
|
|
472
472
|
|
|
473
473
|
## Scala
|
|
@@ -485,7 +485,7 @@ coordinates:
|
|
|
485
485
|
<dependency>
|
|
486
486
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
487
487
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
488
|
-
<version>5.1.
|
|
488
|
+
<version>5.1.1</version>
|
|
489
489
|
</dependency>
|
|
490
490
|
```
|
|
491
491
|
|
|
@@ -496,7 +496,7 @@ coordinates:
|
|
|
496
496
|
<dependency>
|
|
497
497
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
498
498
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
499
|
-
<version>5.1.
|
|
499
|
+
<version>5.1.1</version>
|
|
500
500
|
</dependency>
|
|
501
501
|
```
|
|
502
502
|
|
|
@@ -507,7 +507,7 @@ coordinates:
|
|
|
507
507
|
<dependency>
|
|
508
508
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
509
509
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
510
|
-
<version>5.1.
|
|
510
|
+
<version>5.1.1</version>
|
|
511
511
|
</dependency>
|
|
512
512
|
```
|
|
513
513
|
|
|
@@ -518,7 +518,7 @@ coordinates:
|
|
|
518
518
|
<dependency>
|
|
519
519
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
520
520
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
521
|
-
<version>5.1.
|
|
521
|
+
<version>5.1.1</version>
|
|
522
522
|
</dependency>
|
|
523
523
|
```
|
|
524
524
|
|
|
@@ -528,28 +528,28 @@ coordinates:
|
|
|
528
528
|
|
|
529
529
|
```sbtshell
|
|
530
530
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
531
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.
|
|
531
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.1.1"
|
|
532
532
|
```
|
|
533
533
|
|
|
534
534
|
**spark-nlp-gpu:**
|
|
535
535
|
|
|
536
536
|
```sbtshell
|
|
537
537
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
538
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.
|
|
538
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.1.1"
|
|
539
539
|
```
|
|
540
540
|
|
|
541
541
|
**spark-nlp-aarch64:**
|
|
542
542
|
|
|
543
543
|
```sbtshell
|
|
544
544
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
545
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.
|
|
545
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.1.1"
|
|
546
546
|
```
|
|
547
547
|
|
|
548
548
|
**spark-nlp-silicon:**
|
|
549
549
|
|
|
550
550
|
```sbtshell
|
|
551
551
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
552
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.
|
|
552
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.1.1"
|
|
553
553
|
```
|
|
554
554
|
|
|
555
555
|
Maven
|
|
@@ -571,7 +571,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
571
571
|
Pip:
|
|
572
572
|
|
|
573
573
|
```bash
|
|
574
|
-
pip install spark-nlp==5.1.
|
|
574
|
+
pip install spark-nlp==5.1.1
|
|
575
575
|
```
|
|
576
576
|
|
|
577
577
|
Conda:
|
|
@@ -600,7 +600,7 @@ spark = SparkSession.builder
|
|
|
600
600
|
.config("spark.driver.memory", "16G")
|
|
601
601
|
.config("spark.driver.maxResultSize", "0")
|
|
602
602
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
603
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
603
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
|
|
604
604
|
.getOrCreate()
|
|
605
605
|
```
|
|
606
606
|
|
|
@@ -671,7 +671,7 @@ Use either one of the following options
|
|
|
671
671
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
672
672
|
|
|
673
673
|
```bash
|
|
674
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
674
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
675
675
|
```
|
|
676
676
|
|
|
677
677
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -682,7 +682,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.0
|
|
|
682
682
|
Apart from the previous step, install the python module through pip
|
|
683
683
|
|
|
684
684
|
```bash
|
|
685
|
-
pip install spark-nlp==5.1.
|
|
685
|
+
pip install spark-nlp==5.1.1
|
|
686
686
|
```
|
|
687
687
|
|
|
688
688
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -710,7 +710,7 @@ launch the Jupyter from the same Python environment:
|
|
|
710
710
|
$ conda create -n sparknlp python=3.8 -y
|
|
711
711
|
$ conda activate sparknlp
|
|
712
712
|
# spark-nlp by default is based on pyspark 3.x
|
|
713
|
-
$ pip install spark-nlp==5.1.
|
|
713
|
+
$ pip install spark-nlp==5.1.1 pyspark==3.3.1 jupyter
|
|
714
714
|
$ jupyter notebook
|
|
715
715
|
```
|
|
716
716
|
|
|
@@ -727,7 +727,7 @@ export PYSPARK_PYTHON=python3
|
|
|
727
727
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
728
728
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
729
729
|
|
|
730
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
730
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
731
731
|
```
|
|
732
732
|
|
|
733
733
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -754,7 +754,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
754
754
|
# -s is for spark-nlp
|
|
755
755
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
756
756
|
# by default they are set to the latest
|
|
757
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.
|
|
757
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
|
|
758
758
|
```
|
|
759
759
|
|
|
760
760
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -777,7 +777,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
777
777
|
# -s is for spark-nlp
|
|
778
778
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
779
779
|
# by default they are set to the latest
|
|
780
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.
|
|
780
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.1.1
|
|
781
781
|
```
|
|
782
782
|
|
|
783
783
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -796,9 +796,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
796
796
|
|
|
797
797
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
798
798
|
|
|
799
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.1.
|
|
799
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.1.1` -> Install
|
|
800
800
|
|
|
801
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
801
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1` -> Install
|
|
802
802
|
|
|
803
803
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
804
804
|
|
|
@@ -849,7 +849,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
849
849
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
850
850
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
851
851
|
"spark.driver.maxResultSize": "0",
|
|
852
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
852
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1"
|
|
853
853
|
}
|
|
854
854
|
}]
|
|
855
855
|
```
|
|
@@ -858,7 +858,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
858
858
|
|
|
859
859
|
```.sh
|
|
860
860
|
aws emr create-cluster \
|
|
861
|
-
--name "Spark NLP 5.1.
|
|
861
|
+
--name "Spark NLP 5.1.1" \
|
|
862
862
|
--release-label emr-6.2.0 \
|
|
863
863
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
864
864
|
--instance-type m4.4xlarge \
|
|
@@ -922,7 +922,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
922
922
|
--enable-component-gateway \
|
|
923
923
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
924
924
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
925
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
925
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
926
926
|
```
|
|
927
927
|
|
|
928
928
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -961,7 +961,7 @@ spark = SparkSession.builder
|
|
|
961
961
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
962
962
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
963
963
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
964
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
964
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1")
|
|
965
965
|
.getOrCreate()
|
|
966
966
|
```
|
|
967
967
|
|
|
@@ -975,7 +975,7 @@ spark-shell \
|
|
|
975
975
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
976
976
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
977
977
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
978
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
978
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
979
979
|
```
|
|
980
980
|
|
|
981
981
|
**pyspark:**
|
|
@@ -988,7 +988,7 @@ pyspark \
|
|
|
988
988
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
989
989
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
990
990
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
991
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.
|
|
991
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.1.1
|
|
992
992
|
```
|
|
993
993
|
|
|
994
994
|
**Databricks:**
|
|
@@ -1260,7 +1260,7 @@ spark = SparkSession.builder
|
|
|
1260
1260
|
.config("spark.driver.memory", "16G")
|
|
1261
1261
|
.config("spark.driver.maxResultSize", "0")
|
|
1262
1262
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1263
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.1.
|
|
1263
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.1.1.jar")
|
|
1264
1264
|
.getOrCreate()
|
|
1265
1265
|
```
|
|
1266
1266
|
|
|
@@ -1269,7 +1269,7 @@ spark = SparkSession.builder
|
|
|
1269
1269
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, and 3.4.x)
|
|
1270
1270
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1271
1271
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1272
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.
|
|
1272
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.1.1.jar`)
|
|
1273
1273
|
|
|
1274
1274
|
Example of using pretrained Models and Pipelines in offline:
|
|
1275
1275
|
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
com/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
com/johnsnowlabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
com/johnsnowlabs/nlp/__init__.py,sha256=DPIVXtONO5xXyOk-HB0-sNiHAcco17NN13zPS_6Uw8c,294
|
|
4
|
-
sparknlp/__init__.py,sha256=
|
|
4
|
+
sparknlp/__init__.py,sha256=H3zu8K8le2PVdxKm2tgeRCeL61_U4nfb1gGCZo6TpP0,13588
|
|
5
5
|
sparknlp/annotation.py,sha256=I5zOxG5vV2RfPZfqN9enT1i4mo6oBcn3Lrzs37QiOiA,5635
|
|
6
6
|
sparknlp/annotation_audio.py,sha256=iRV_InSVhgvAwSRe9NTbUH9v6OGvTM-FPCpSAKVu0mE,1917
|
|
7
7
|
sparknlp/annotation_image.py,sha256=xhCe8Ko-77XqWVuuYHFrjKqF6zPd8Z-RY_rmZXNwCXU,2547
|
|
@@ -78,7 +78,7 @@ sparknlp/annotator/embeddings/camembert_embeddings.py,sha256=dBTXas-2Tas_JUR9Xt_
|
|
|
78
78
|
sparknlp/annotator/embeddings/chunk_embeddings.py,sha256=WUmkJimSuFkdcLJnvcxOV0QlCLgGlhub29ZTrZb70WE,6052
|
|
79
79
|
sparknlp/annotator/embeddings/deberta_embeddings.py,sha256=_b5nzLb7heFQNN-uT2oBNO6-YmM8bHmAdnGXg47HOWw,8649
|
|
80
80
|
sparknlp/annotator/embeddings/distil_bert_embeddings.py,sha256=4pyMCsbvvXYeTGIMVUir9wCDKR_1f_HKtXZrTDO1Thc,9275
|
|
81
|
-
sparknlp/annotator/embeddings/doc2vec.py,sha256=
|
|
81
|
+
sparknlp/annotator/embeddings/doc2vec.py,sha256=Xk3MdEkXatX9lRgbFbAdnIDrLgIxzUIGWFBZeo9BTq0,13226
|
|
82
82
|
sparknlp/annotator/embeddings/e5_embeddings.py,sha256=dfPHCAYpayCUMxXtol0t68cDs8-JVu0M4EslimwNS0Q,7684
|
|
83
83
|
sparknlp/annotator/embeddings/elmo_embeddings.py,sha256=KV-KPs0Pq_OpPaHsnqBz2k_S7VdzyFZ4632IeFNKqJ8,9858
|
|
84
84
|
sparknlp/annotator/embeddings/instructor_embeddings.py,sha256=CTKmbuBOx_KBM4JM-Y1U5LyR-6rrnpoBGbgGE_axS1c,8670
|
|
@@ -88,7 +88,7 @@ sparknlp/annotator/embeddings/roberta_embeddings.py,sha256=V4HGDUK2YBHhAZd1ygJEG
|
|
|
88
88
|
sparknlp/annotator/embeddings/roberta_sentence_embeddings.py,sha256=KVrD4z_tIU-sphK6dmbbnHBBt8-Y89C_BFQAkN99kZo,8181
|
|
89
89
|
sparknlp/annotator/embeddings/sentence_embeddings.py,sha256=azuA1FKMtTJ9suwJqTEHeWHumT6kYdfURTe_1fsqcB8,5402
|
|
90
90
|
sparknlp/annotator/embeddings/universal_sentence_encoder.py,sha256=_fTo-K78RjxiIKptpsI32mpW87RFCdXM16epHv4RVQY,8571
|
|
91
|
-
sparknlp/annotator/embeddings/word2vec.py,sha256=
|
|
91
|
+
sparknlp/annotator/embeddings/word2vec.py,sha256=UBhA4qUczQOx1t82Eu51lxx1-wJ_RLnCb__ncowSNhk,13229
|
|
92
92
|
sparknlp/annotator/embeddings/word_embeddings.py,sha256=CQxjx2yDdmSM9s8D-bzsbUQhT8t1cqC4ynxlf9INpMU,15388
|
|
93
93
|
sparknlp/annotator/embeddings/xlm_roberta_embeddings.py,sha256=t-Bg1bQcqI_fIqUWQbHt9rHK2_tyq0YXiq3uMw4xb94,9488
|
|
94
94
|
sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py,sha256=ojxD3H2VgDEn-RzDdCz0X485pojHBAFrlzsNemI05bY,8602
|
|
@@ -214,7 +214,7 @@ sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py,sha256=R4yHFN3
|
|
|
214
214
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py,sha256=EoCSdcIjqQ3wv13MAuuWrKV8wyVBP0SbOEW41omHlR0,23189
|
|
215
215
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py,sha256=k5CQ7gKV6HZbZMB8cKLUJuZxoZWlP_DFWdZ--aIDwsc,2356
|
|
216
216
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py,sha256=pAxjWhjazSX8Vg0MFqJiuRVw1IbnQNSs-8Xp26L4nko,870
|
|
217
|
-
spark_nlp-5.1.
|
|
218
|
-
spark_nlp-5.1.
|
|
219
|
-
spark_nlp-5.1.
|
|
220
|
-
spark_nlp-5.1.
|
|
217
|
+
spark_nlp-5.1.1.dist-info/METADATA,sha256=K7TnsSMny4JTGXK1CXDfahQVtfGqoPOIkpLBns1O_5s,53873
|
|
218
|
+
spark_nlp-5.1.1.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
|
219
|
+
spark_nlp-5.1.1.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
|
|
220
|
+
spark_nlp-5.1.1.dist-info/RECORD,,
|
sparknlp/__init__.py
CHANGED
|
@@ -128,7 +128,7 @@ def start(gpu=False,
|
|
|
128
128
|
The initiated Spark session.
|
|
129
129
|
|
|
130
130
|
"""
|
|
131
|
-
current_version = "5.1.
|
|
131
|
+
current_version = "5.1.1"
|
|
132
132
|
|
|
133
133
|
if params is None:
|
|
134
134
|
params = {}
|
|
@@ -309,4 +309,4 @@ def version():
|
|
|
309
309
|
str
|
|
310
310
|
The current Spark NLP version.
|
|
311
311
|
"""
|
|
312
|
-
return '5.1.
|
|
312
|
+
return '5.1.1'
|
|
@@ -344,3 +344,9 @@ class Doc2VecModel(AnnotatorModel, HasStorageRef, HasEmbeddingsProperties):
|
|
|
344
344
|
from sparknlp.pretrained import ResourceDownloader
|
|
345
345
|
return ResourceDownloader.downloadModel(Doc2VecModel, name, lang, remote_loc)
|
|
346
346
|
|
|
347
|
+
def getVectors(self):
|
|
348
|
+
"""
|
|
349
|
+
Returns the vector representation of the words as a dataframe
|
|
350
|
+
with two fields, word and vector.
|
|
351
|
+
"""
|
|
352
|
+
return self._call_java("getVectors")
|
|
@@ -345,3 +345,9 @@ class Word2VecModel(AnnotatorModel, HasStorageRef, HasEmbeddingsProperties):
|
|
|
345
345
|
from sparknlp.pretrained import ResourceDownloader
|
|
346
346
|
return ResourceDownloader.downloadModel(Word2VecModel, name, lang, remote_loc)
|
|
347
347
|
|
|
348
|
+
def getVectors(self):
|
|
349
|
+
"""
|
|
350
|
+
Returns the vector representation of the words as a dataframe
|
|
351
|
+
with two fields, word and vector.
|
|
352
|
+
"""
|
|
353
|
+
return self._call_java("getVectors")
|
|
File without changes
|
|
File without changes
|