spark-nlp 5.4.2__py2.py3-none-any.whl → 5.5.0rc1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark_nlp-5.4.2.dist-info → spark_nlp-5.5.0rc1.dist-info}/METADATA +45 -45
- {spark_nlp-5.4.2.dist-info → spark_nlp-5.5.0rc1.dist-info}/RECORD +23 -11
- sparknlp/__init__.py +2 -2
- sparknlp/annotator/classifier_dl/__init__.py +3 -1
- sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py +211 -0
- sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py +202 -0
- sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py +2 -15
- sparknlp/annotator/embeddings/__init__.py +3 -0
- sparknlp/annotator/embeddings/mxbai_embeddings.py +184 -0
- sparknlp/annotator/embeddings/nomic_embeddings.py +181 -0
- sparknlp/annotator/embeddings/snowflake_embeddings.py +202 -0
- sparknlp/annotator/seq2seq/__init__.py +7 -0
- sparknlp/annotator/seq2seq/auto_gguf_model.py +804 -0
- sparknlp/annotator/seq2seq/cpm_transformer.py +321 -0
- sparknlp/annotator/seq2seq/llama3_transformer.py +381 -0
- sparknlp/annotator/seq2seq/nllb_transformer.py +420 -0
- sparknlp/annotator/seq2seq/phi3_transformer.py +330 -0
- sparknlp/annotator/seq2seq/qwen_transformer.py +339 -0
- sparknlp/annotator/seq2seq/starcoder_transformer.py +335 -0
- sparknlp/internal/__init__.py +89 -0
- {spark_nlp-5.4.2.dist-info → spark_nlp-5.5.0rc1.dist-info}/.uuid +0 -0
- {spark_nlp-5.4.2.dist-info → spark_nlp-5.5.0rc1.dist-info}/WHEEL +0 -0
- {spark_nlp-5.4.2.dist-info → spark_nlp-5.5.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.5.0rc1
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -198,7 +198,7 @@ To use Spark NLP you need the following requirements:
|
|
|
198
198
|
|
|
199
199
|
**GPU (optional):**
|
|
200
200
|
|
|
201
|
-
Spark NLP 5.
|
|
201
|
+
Spark NLP 5.5.0-rc1 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
|
|
202
202
|
|
|
203
203
|
- NVIDIA® GPU drivers version 450.80.02 or higher
|
|
204
204
|
- CUDA® Toolkit 11.2
|
|
@@ -214,7 +214,7 @@ $ java -version
|
|
|
214
214
|
$ conda create -n sparknlp python=3.7 -y
|
|
215
215
|
$ conda activate sparknlp
|
|
216
216
|
# spark-nlp by default is based on pyspark 3.x
|
|
217
|
-
$ pip install spark-nlp==5.
|
|
217
|
+
$ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1
|
|
218
218
|
```
|
|
219
219
|
|
|
220
220
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -259,7 +259,7 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
|
|
|
259
259
|
|
|
260
260
|
## Apache Spark Support
|
|
261
261
|
|
|
262
|
-
Spark NLP *5.
|
|
262
|
+
Spark NLP *5.5.0-rc1* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
263
263
|
|
|
264
264
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
265
265
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -292,7 +292,7 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
|
|
|
292
292
|
|
|
293
293
|
## Databricks Support
|
|
294
294
|
|
|
295
|
-
Spark NLP 5.
|
|
295
|
+
Spark NLP 5.5.0-rc1 has been tested and is compatible with the following runtimes:
|
|
296
296
|
|
|
297
297
|
**CPU:**
|
|
298
298
|
|
|
@@ -365,7 +365,7 @@ Spark NLP 5.4.2 has been tested and is compatible with the following runtimes:
|
|
|
365
365
|
|
|
366
366
|
## EMR Support
|
|
367
367
|
|
|
368
|
-
Spark NLP 5.
|
|
368
|
+
Spark NLP 5.5.0-rc1 has been tested and is compatible with the following EMR releases:
|
|
369
369
|
|
|
370
370
|
- emr-6.2.0
|
|
371
371
|
- emr-6.3.0
|
|
@@ -415,11 +415,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
|
|
|
415
415
|
```sh
|
|
416
416
|
# CPU
|
|
417
417
|
|
|
418
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
418
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
419
419
|
|
|
420
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
420
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
421
421
|
|
|
422
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
422
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
423
423
|
```
|
|
424
424
|
|
|
425
425
|
The `spark-nlp` has been published to
|
|
@@ -428,11 +428,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
428
428
|
```sh
|
|
429
429
|
# GPU
|
|
430
430
|
|
|
431
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
431
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
|
|
432
432
|
|
|
433
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
433
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
|
|
434
434
|
|
|
435
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.
|
|
435
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.5.0-rc1
|
|
436
436
|
|
|
437
437
|
```
|
|
438
438
|
|
|
@@ -442,11 +442,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
442
442
|
```sh
|
|
443
443
|
# AArch64
|
|
444
444
|
|
|
445
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
445
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
|
|
446
446
|
|
|
447
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
447
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
|
|
448
448
|
|
|
449
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.
|
|
449
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.5.0-rc1
|
|
450
450
|
|
|
451
451
|
```
|
|
452
452
|
|
|
@@ -456,11 +456,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
|
|
|
456
456
|
```sh
|
|
457
457
|
# M1/M2 (Apple Silicon)
|
|
458
458
|
|
|
459
|
-
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
459
|
+
spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
|
|
460
460
|
|
|
461
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
461
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
|
|
462
462
|
|
|
463
|
-
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.
|
|
463
|
+
spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.5.0-rc1
|
|
464
464
|
|
|
465
465
|
```
|
|
466
466
|
|
|
@@ -474,7 +474,7 @@ set in your SparkSession:
|
|
|
474
474
|
spark-shell \
|
|
475
475
|
--driver-memory 16g \
|
|
476
476
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
477
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
477
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
478
478
|
```
|
|
479
479
|
|
|
480
480
|
## Scala
|
|
@@ -492,7 +492,7 @@ coordinates:
|
|
|
492
492
|
<dependency>
|
|
493
493
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
494
494
|
<artifactId>spark-nlp_2.12</artifactId>
|
|
495
|
-
<version>5.
|
|
495
|
+
<version>5.5.0-rc1</version>
|
|
496
496
|
</dependency>
|
|
497
497
|
```
|
|
498
498
|
|
|
@@ -503,7 +503,7 @@ coordinates:
|
|
|
503
503
|
<dependency>
|
|
504
504
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
505
505
|
<artifactId>spark-nlp-gpu_2.12</artifactId>
|
|
506
|
-
<version>5.
|
|
506
|
+
<version>5.5.0-rc1</version>
|
|
507
507
|
</dependency>
|
|
508
508
|
```
|
|
509
509
|
|
|
@@ -514,7 +514,7 @@ coordinates:
|
|
|
514
514
|
<dependency>
|
|
515
515
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
516
516
|
<artifactId>spark-nlp-aarch64_2.12</artifactId>
|
|
517
|
-
<version>5.
|
|
517
|
+
<version>5.5.0-rc1</version>
|
|
518
518
|
</dependency>
|
|
519
519
|
```
|
|
520
520
|
|
|
@@ -525,7 +525,7 @@ coordinates:
|
|
|
525
525
|
<dependency>
|
|
526
526
|
<groupId>com.johnsnowlabs.nlp</groupId>
|
|
527
527
|
<artifactId>spark-nlp-silicon_2.12</artifactId>
|
|
528
|
-
<version>5.
|
|
528
|
+
<version>5.5.0-rc1</version>
|
|
529
529
|
</dependency>
|
|
530
530
|
```
|
|
531
531
|
|
|
@@ -535,28 +535,28 @@ coordinates:
|
|
|
535
535
|
|
|
536
536
|
```sbtshell
|
|
537
537
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
|
|
538
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.
|
|
538
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.5.0-rc1"
|
|
539
539
|
```
|
|
540
540
|
|
|
541
541
|
**spark-nlp-gpu:**
|
|
542
542
|
|
|
543
543
|
```sbtshell
|
|
544
544
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
|
|
545
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.
|
|
545
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.5.0-rc1"
|
|
546
546
|
```
|
|
547
547
|
|
|
548
548
|
**spark-nlp-aarch64:**
|
|
549
549
|
|
|
550
550
|
```sbtshell
|
|
551
551
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
|
|
552
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.
|
|
552
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.5.0-rc1"
|
|
553
553
|
```
|
|
554
554
|
|
|
555
555
|
**spark-nlp-silicon:**
|
|
556
556
|
|
|
557
557
|
```sbtshell
|
|
558
558
|
// https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
|
|
559
|
-
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.
|
|
559
|
+
libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.5.0-rc1"
|
|
560
560
|
```
|
|
561
561
|
|
|
562
562
|
Maven
|
|
@@ -578,7 +578,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
|
|
|
578
578
|
Pip:
|
|
579
579
|
|
|
580
580
|
```bash
|
|
581
|
-
pip install spark-nlp==5.
|
|
581
|
+
pip install spark-nlp==5.5.0-rc1
|
|
582
582
|
```
|
|
583
583
|
|
|
584
584
|
Conda:
|
|
@@ -607,7 +607,7 @@ spark = SparkSession.builder
|
|
|
607
607
|
.config("spark.driver.memory", "16G")
|
|
608
608
|
.config("spark.driver.maxResultSize", "0")
|
|
609
609
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
610
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
610
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
|
|
611
611
|
.getOrCreate()
|
|
612
612
|
```
|
|
613
613
|
|
|
@@ -678,7 +678,7 @@ Use either one of the following options
|
|
|
678
678
|
- Add the following Maven Coordinates to the interpreter's library list
|
|
679
679
|
|
|
680
680
|
```bash
|
|
681
|
-
com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
681
|
+
com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
682
682
|
```
|
|
683
683
|
|
|
684
684
|
- Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
|
|
@@ -689,7 +689,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.2
|
|
|
689
689
|
Apart from the previous step, install the python module through pip
|
|
690
690
|
|
|
691
691
|
```bash
|
|
692
|
-
pip install spark-nlp==5.
|
|
692
|
+
pip install spark-nlp==5.5.0-rc1
|
|
693
693
|
```
|
|
694
694
|
|
|
695
695
|
Or you can install `spark-nlp` from inside Zeppelin by using Conda:
|
|
@@ -717,7 +717,7 @@ launch the Jupyter from the same Python environment:
|
|
|
717
717
|
$ conda create -n sparknlp python=3.8 -y
|
|
718
718
|
$ conda activate sparknlp
|
|
719
719
|
# spark-nlp by default is based on pyspark 3.x
|
|
720
|
-
$ pip install spark-nlp==5.
|
|
720
|
+
$ pip install spark-nlp==5.5.0-rc1 pyspark==3.3.1 jupyter
|
|
721
721
|
$ jupyter notebook
|
|
722
722
|
```
|
|
723
723
|
|
|
@@ -734,7 +734,7 @@ export PYSPARK_PYTHON=python3
|
|
|
734
734
|
export PYSPARK_DRIVER_PYTHON=jupyter
|
|
735
735
|
export PYSPARK_DRIVER_PYTHON_OPTS=notebook
|
|
736
736
|
|
|
737
|
-
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
737
|
+
pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
738
738
|
```
|
|
739
739
|
|
|
740
740
|
Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
|
|
@@ -761,7 +761,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
761
761
|
# -s is for spark-nlp
|
|
762
762
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
|
|
763
763
|
# by default they are set to the latest
|
|
764
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
764
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
|
|
765
765
|
```
|
|
766
766
|
|
|
767
767
|
[Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
|
|
@@ -784,7 +784,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
|
|
|
784
784
|
# -s is for spark-nlp
|
|
785
785
|
# -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
|
|
786
786
|
# by default they are set to the latest
|
|
787
|
-
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.
|
|
787
|
+
!wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.5.0-rc1
|
|
788
788
|
```
|
|
789
789
|
|
|
790
790
|
[Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
|
|
@@ -803,9 +803,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
|
|
|
803
803
|
|
|
804
804
|
3. In `Libraries` tab inside your cluster you need to follow these steps:
|
|
805
805
|
|
|
806
|
-
3.1. Install New -> PyPI -> `spark-nlp==5.
|
|
806
|
+
3.1. Install New -> PyPI -> `spark-nlp==5.5.0-rc1` -> Install
|
|
807
807
|
|
|
808
|
-
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
808
|
+
3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1` -> Install
|
|
809
809
|
|
|
810
810
|
4. Now you can attach your notebook to the cluster and use Spark NLP!
|
|
811
811
|
|
|
@@ -856,7 +856,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
|
|
|
856
856
|
"spark.kryoserializer.buffer.max": "2000M",
|
|
857
857
|
"spark.serializer": "org.apache.spark.serializer.KryoSerializer",
|
|
858
858
|
"spark.driver.maxResultSize": "0",
|
|
859
|
-
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
859
|
+
"spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1"
|
|
860
860
|
}
|
|
861
861
|
}]
|
|
862
862
|
```
|
|
@@ -865,7 +865,7 @@ A sample of AWS CLI to launch EMR cluster:
|
|
|
865
865
|
|
|
866
866
|
```.sh
|
|
867
867
|
aws emr create-cluster \
|
|
868
|
-
--name "Spark NLP 5.
|
|
868
|
+
--name "Spark NLP 5.5.0-rc1" \
|
|
869
869
|
--release-label emr-6.2.0 \
|
|
870
870
|
--applications Name=Hadoop Name=Spark Name=Hive \
|
|
871
871
|
--instance-type m4.4xlarge \
|
|
@@ -929,7 +929,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
|
|
|
929
929
|
--enable-component-gateway \
|
|
930
930
|
--metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
|
|
931
931
|
--initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
|
|
932
|
-
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
932
|
+
--properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
933
933
|
```
|
|
934
934
|
|
|
935
935
|
2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
|
|
@@ -972,7 +972,7 @@ spark = SparkSession.builder
|
|
|
972
972
|
.config("spark.kryoserializer.buffer.max", "2000m")
|
|
973
973
|
.config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
|
|
974
974
|
.config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
|
|
975
|
-
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
975
|
+
.config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1")
|
|
976
976
|
.getOrCreate()
|
|
977
977
|
```
|
|
978
978
|
|
|
@@ -986,7 +986,7 @@ spark-shell \
|
|
|
986
986
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
987
987
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
988
988
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
989
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
989
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
990
990
|
```
|
|
991
991
|
|
|
992
992
|
**pyspark:**
|
|
@@ -999,7 +999,7 @@ pyspark \
|
|
|
999
999
|
--conf spark.kryoserializer.buffer.max=2000M \
|
|
1000
1000
|
--conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
|
|
1001
1001
|
--conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
|
|
1002
|
-
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.
|
|
1002
|
+
--packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.5.0-rc1
|
|
1003
1003
|
```
|
|
1004
1004
|
|
|
1005
1005
|
**Databricks:**
|
|
@@ -1271,7 +1271,7 @@ spark = SparkSession.builder
|
|
|
1271
1271
|
.config("spark.driver.memory", "16G")
|
|
1272
1272
|
.config("spark.driver.maxResultSize", "0")
|
|
1273
1273
|
.config("spark.kryoserializer.buffer.max", "2000M")
|
|
1274
|
-
.config("spark.jars", "/tmp/spark-nlp-assembly-5.
|
|
1274
|
+
.config("spark.jars", "/tmp/spark-nlp-assembly-5.5.0-rc1.jar")
|
|
1275
1275
|
.getOrCreate()
|
|
1276
1276
|
```
|
|
1277
1277
|
|
|
@@ -1280,7 +1280,7 @@ spark = SparkSession.builder
|
|
|
1280
1280
|
version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
|
|
1281
1281
|
- If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
|
|
1282
1282
|
to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
|
|
1283
|
-
i.e., `hdfs:///tmp/spark-nlp-assembly-5.
|
|
1283
|
+
i.e., `hdfs:///tmp/spark-nlp-assembly-5.5.0-rc1.jar`)
|
|
1284
1284
|
|
|
1285
1285
|
Example of using pretrained Models and Pipelines in offline:
|
|
1286
1286
|
|
|
@@ -3,7 +3,7 @@ com/johnsnowlabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
|
3
3
|
com/johnsnowlabs/ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
com/johnsnowlabs/ml/ai/__init__.py,sha256=YQiK2M7U4d8y5irPy_HB8ae0mSpqS9583MH44pnKJXc,295
|
|
5
5
|
com/johnsnowlabs/nlp/__init__.py,sha256=DPIVXtONO5xXyOk-HB0-sNiHAcco17NN13zPS_6Uw8c,294
|
|
6
|
-
sparknlp/__init__.py,sha256=
|
|
6
|
+
sparknlp/__init__.py,sha256=KDifiXnwBik4d6cz2zhtUwi3nicSAxEoD1Pht3ELBo0,13646
|
|
7
7
|
sparknlp/annotation.py,sha256=I5zOxG5vV2RfPZfqN9enT1i4mo6oBcn3Lrzs37QiOiA,5635
|
|
8
8
|
sparknlp/annotation_audio.py,sha256=iRV_InSVhgvAwSRe9NTbUH9v6OGvTM-FPCpSAKVu0mE,1917
|
|
9
9
|
sparknlp/annotation_image.py,sha256=xhCe8Ko-77XqWVuuYHFrjKqF6zPd8Z-RY_rmZXNwCXU,2547
|
|
@@ -30,10 +30,11 @@ sparknlp/annotator/audio/__init__.py,sha256=dXjtvi5c0aTZFq1Q_JciUd1uFTBVSJoUdcq0
|
|
|
30
30
|
sparknlp/annotator/audio/hubert_for_ctc.py,sha256=76PfwPZZvOHU5kfDqLueCFbmqa4W8pMNRGoCvOqjsEA,7859
|
|
31
31
|
sparknlp/annotator/audio/wav2vec2_for_ctc.py,sha256=K78P1U6vA4O1UufsLYzy0H7arsKNmwPcIV7kzDFsA5Q,6210
|
|
32
32
|
sparknlp/annotator/audio/whisper_for_ctc.py,sha256=uII51umuohqwnAW0Q7VdxEFyr_j5LMnfpcRlf8TbetA,9800
|
|
33
|
-
sparknlp/annotator/classifier_dl/__init__.py,sha256=
|
|
33
|
+
sparknlp/annotator/classifier_dl/__init__.py,sha256=Oa5v2kInquQXLy3_Qs0bJAS_JJiTgqy9W0zeBF6y3B4,3933
|
|
34
34
|
sparknlp/annotator/classifier_dl/albert_for_question_answering.py,sha256=LG2dL6Fky1T35yXTUZBfIihIIGnkRFQ7ECQ3HRXXEG8,6517
|
|
35
35
|
sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py,sha256=kWx7f9pcKE2qw319gn8FN0Md5dX38gbmfeoY9gWCLNk,7842
|
|
36
36
|
sparknlp/annotator/classifier_dl/albert_for_token_classification.py,sha256=5rdsjWnsAVmtP-idU7ATKJ8lkH2rtlKZLnpi4Mq27eI,6839
|
|
37
|
+
sparknlp/annotator/classifier_dl/albert_for_zero_shot_classification.py,sha256=_TgV6EiIOiD_djA3fxfoz-o37mzMeKbn6iL2kZ6GzO0,8366
|
|
37
38
|
sparknlp/annotator/classifier_dl/bart_for_zero_shot_classification.py,sha256=yqQeDdpLbNOKuSZejZjSAjT8ydYyxsTVf2aFDgSSDfc,8767
|
|
38
39
|
sparknlp/annotator/classifier_dl/bert_for_question_answering.py,sha256=2euY_RAdMPA4IHJXZAd5MkQojFOtFNhB_hSc1iVQ5DQ,6433
|
|
39
40
|
sparknlp/annotator/classifier_dl/bert_for_sequence_classification.py,sha256=AzD3RQcRuQc0DDTbL6vGiacTtHlZnbAqksNvRQq7EQE,7800
|
|
@@ -42,11 +43,12 @@ sparknlp/annotator/classifier_dl/bert_for_zero_shot_classification.py,sha256=mli
|
|
|
42
43
|
sparknlp/annotator/classifier_dl/camembert_for_question_answering.py,sha256=BeE-62tFkXMoyiy3PtcnwgT2-wqzTFo5VZHrWUqsWmM,6510
|
|
43
44
|
sparknlp/annotator/classifier_dl/camembert_for_sequence_classification.py,sha256=06bkwhNBcmNS5gR_JrMjBDW3jAdjEI5YL4SuV16Va7E,7962
|
|
44
45
|
sparknlp/annotator/classifier_dl/camembert_for_token_classification.py,sha256=vjwDE_kZiBupENaYvUZOTTqVOb3KCsGse-QX3QOutz4,6522
|
|
46
|
+
sparknlp/annotator/classifier_dl/camembert_for_zero_shot_classification.py,sha256=YUfohQ-qIG3jntfYgrjx8bOFxGTTMrpB-Sj49PNAEEU,8360
|
|
45
47
|
sparknlp/annotator/classifier_dl/classifier_dl.py,sha256=Dj-T5ByCgzgFpah7LVz_07QKBB0qNdqXB6tkvPE-nsQ,12672
|
|
46
48
|
sparknlp/annotator/classifier_dl/deberta_for_question_answering.py,sha256=oikVBeVohsSR9HPV_yq_0U7zHps94UO4lXbYu9G7MF0,6486
|
|
47
49
|
sparknlp/annotator/classifier_dl/deberta_for_sequence_classification.py,sha256=H2LDT8ttD9hxfFDrymsyCq0EwCuWl5FE2-XVqT9LcRQ,7773
|
|
48
50
|
sparknlp/annotator/classifier_dl/deberta_for_token_classification.py,sha256=jj5hB9AV-0Of505E6z62lYPIWmsqNeTX0vRRq3_7T9I,6807
|
|
49
|
-
sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py,sha256=
|
|
51
|
+
sparknlp/annotator/classifier_dl/deberta_for_zero_shot_classification.py,sha256=R7eVMChBY_wm7oM2j1Y18ZJ9dcIm5ysq8XBSIiVxZKw,8280
|
|
50
52
|
sparknlp/annotator/classifier_dl/distil_bert_for_question_answering.py,sha256=yA4LrI4RN4f44wbIrdpwqderTJBhAkjAHpUxcCeCROE,6552
|
|
51
53
|
sparknlp/annotator/classifier_dl/distil_bert_for_sequence_classification.py,sha256=Cax3LcVLppiHs1dyahsBSq_TLHSwI2-K7LGCZHZNs1I,7926
|
|
52
54
|
sparknlp/annotator/classifier_dl/distil_bert_for_token_classification.py,sha256=y9S83LW0Mfn4fRzopRXFj8l2gb-Nrm1rr9zRftOckJU,6832
|
|
@@ -81,7 +83,7 @@ sparknlp/annotator/cv/vit_for_image_classification.py,sha256=D2V3pxAd3rBi1817lxV
|
|
|
81
83
|
sparknlp/annotator/dependency/__init__.py,sha256=eV43oXAGaYl2N1XKIEAAZJLNP8gpHm8VxuXDeDlQzR4,774
|
|
82
84
|
sparknlp/annotator/dependency/dependency_parser.py,sha256=SxyvHPp8Hs1Xnm5X1nLTMi095XoQMtfL8pbys15mYAI,11212
|
|
83
85
|
sparknlp/annotator/dependency/typed_dependency_parser.py,sha256=60vPdYkbFk9MPGegg3m9Uik9cMXpMZd8tBvXG39gNww,12456
|
|
84
|
-
sparknlp/annotator/embeddings/__init__.py,sha256=
|
|
86
|
+
sparknlp/annotator/embeddings/__init__.py,sha256=WifjEILUN6lZs4_WIX80xl5hrrsSrBK-4oVaLh-tONc,2343
|
|
85
87
|
sparknlp/annotator/embeddings/albert_embeddings.py,sha256=6Rd1LIn8oFIpq_ALcJh-RUjPEO7Ht8wsHY6JHSFyMkw,9995
|
|
86
88
|
sparknlp/annotator/embeddings/bert_embeddings.py,sha256=HVUjkg56kBcpGZCo-fmPG5uatMDF3swW_lnbpy1SgSI,8463
|
|
87
89
|
sparknlp/annotator/embeddings/bert_sentence_embeddings.py,sha256=NQy9KuXT9aKsTpYCR5RAeoFWI2YqEGorbdYrf_0KKmw,9148
|
|
@@ -96,9 +98,12 @@ sparknlp/annotator/embeddings/elmo_embeddings.py,sha256=KV-KPs0Pq_OpPaHsnqBz2k_S
|
|
|
96
98
|
sparknlp/annotator/embeddings/instructor_embeddings.py,sha256=CTKmbuBOx_KBM4JM-Y1U5LyR-6rrnpoBGbgGE_axS1c,8670
|
|
97
99
|
sparknlp/annotator/embeddings/longformer_embeddings.py,sha256=jS4fxB5O0-d9ta9VKv8ai-17n5YHt5rML8QxUw7K4Io,8754
|
|
98
100
|
sparknlp/annotator/embeddings/mpnet_embeddings.py,sha256=7d6E4lS7jjkppDPvty1UHNNrbykkriFiysrxZ_RzL0U,7875
|
|
101
|
+
sparknlp/annotator/embeddings/mxbai_embeddings.py,sha256=kCaYcM3lLYJjhElLK5isdxzJqIvoGZlUKKNkySMUkE8,6017
|
|
102
|
+
sparknlp/annotator/embeddings/nomic_embeddings.py,sha256=SfiTTpx0MqeHGC_nyoFNxJbfEQL4v-PrNH6hAOFsd8c,7338
|
|
99
103
|
sparknlp/annotator/embeddings/roberta_embeddings.py,sha256=q_WHby2lDcPc5bVHkGc6X_GwT3qyDUBLUVz5ZW4HCSY,9229
|
|
100
104
|
sparknlp/annotator/embeddings/roberta_sentence_embeddings.py,sha256=KVrD4z_tIU-sphK6dmbbnHBBt8-Y89C_BFQAkN99kZo,8181
|
|
101
105
|
sparknlp/annotator/embeddings/sentence_embeddings.py,sha256=azuA1FKMtTJ9suwJqTEHeWHumT6kYdfURTe_1fsqcB8,5402
|
|
106
|
+
sparknlp/annotator/embeddings/snowflake_embeddings.py,sha256=2k7oxSSgu2Y8U2fkfZCCneG9PQjXzmJsl41BDsD0hfE,7260
|
|
102
107
|
sparknlp/annotator/embeddings/uae_embeddings.py,sha256=sqTT67vcegVxcyoATISLPJSmOnA6J_otB6iREKOb6e4,8794
|
|
103
108
|
sparknlp/annotator/embeddings/universal_sentence_encoder.py,sha256=_fTo-K78RjxiIKptpsI32mpW87RFCdXM16epHv4RVQY,8571
|
|
104
109
|
sparknlp/annotator/embeddings/word2vec.py,sha256=UBhA4qUczQOx1t82Eu51lxx1-wJ_RLnCb__ncowSNhk,13229
|
|
@@ -139,14 +144,21 @@ sparknlp/annotator/sentence/sentence_detector_dl.py,sha256=-Osj9Bm9KyZRTAWkOsK9c
|
|
|
139
144
|
sparknlp/annotator/sentiment/__init__.py,sha256=Lq3vKaZS1YATLMg0VNXSVtkWL5q5G9taGBvdrvSwnfg,766
|
|
140
145
|
sparknlp/annotator/sentiment/sentiment_detector.py,sha256=m545NGU0Xzg_PO6_qIfpli1uZj7JQcyFgqe9R6wAPFI,8154
|
|
141
146
|
sparknlp/annotator/sentiment/vivekn_sentiment.py,sha256=4rpXWDgzU6ddnbrSCp9VdLb2epCc9oZ3c6XcqxEw8nk,9655
|
|
142
|
-
sparknlp/annotator/seq2seq/__init__.py,sha256=
|
|
147
|
+
sparknlp/annotator/seq2seq/__init__.py,sha256=Fdz1zsxpB6vM2a0sKuGCSMD1ZgqeVqAez0-AtppMGB4,1541
|
|
148
|
+
sparknlp/annotator/seq2seq/auto_gguf_model.py,sha256=YSWr2doOp9pwiodW_7qWgnQ2xdnl_WNEECBwxI6uq_c,39396
|
|
143
149
|
sparknlp/annotator/seq2seq/bart_transformer.py,sha256=I1flM4yeCzEAKOdQllBC30XuedxVJ7ferkFhZ6gwEbE,18481
|
|
150
|
+
sparknlp/annotator/seq2seq/cpm_transformer.py,sha256=zRbw_xAsaRnhuyYAW8UAGhz7mIyhpqk09nCsJb8-7rg,13298
|
|
144
151
|
sparknlp/annotator/seq2seq/gpt2_transformer.py,sha256=Oz95R_NRR4tWHu_bW6Ak2832ZILXycp3ify7LfRSi8o,15310
|
|
145
152
|
sparknlp/annotator/seq2seq/llama2_transformer.py,sha256=3LzTR0VerFdFmOizsrs2Q7HTnjELJ5WtfUgx5XnOqGM,13898
|
|
153
|
+
sparknlp/annotator/seq2seq/llama3_transformer.py,sha256=dA3rIEVOLmlnJwhqkYmL_GrrcRVpoUY_i7QIyA5N2jM,14920
|
|
146
154
|
sparknlp/annotator/seq2seq/m2m100_transformer.py,sha256=uIL9RZuuryTIdAy9TbJf9wbz6RekhW8S079bJhaB6i4,16116
|
|
147
155
|
sparknlp/annotator/seq2seq/marian_transformer.py,sha256=mQ4Ylh7ZzXAOue8f-x0gqzfS3vAz3XUdD7eQ2XhcEs4,13781
|
|
148
156
|
sparknlp/annotator/seq2seq/mistral_transformer.py,sha256=PJegrSQts_58rkt96xaHlqU1fKIaz8hxt7DTPkGS10A,14254
|
|
157
|
+
sparknlp/annotator/seq2seq/nllb_transformer.py,sha256=hbE2k5YDAZUWPk0qyx6-5xIZi3nBFeFqLcr6lEU9LZ8,19474
|
|
149
158
|
sparknlp/annotator/seq2seq/phi2_transformer.py,sha256=WwKCUOH8qGFv62YF63HjuT7bMVldh06gHvaZH3tbSDk,13787
|
|
159
|
+
sparknlp/annotator/seq2seq/phi3_transformer.py,sha256=rIFSS0sit9kUazUvMvwGFSRRsIuFNqpLH28bVKkFzx4,14219
|
|
160
|
+
sparknlp/annotator/seq2seq/qwen_transformer.py,sha256=UmxF84gQsqFVyofuki9TxaPTS_fP71WTj-ylCMLKsYY,14624
|
|
161
|
+
sparknlp/annotator/seq2seq/starcoder_transformer.py,sha256=BTXbSMRpXnDvrfh-6iFS5k6g6EcPV9zBl4U-SSC19wA,14293
|
|
150
162
|
sparknlp/annotator/seq2seq/t5_transformer.py,sha256=wDVxNLluIU1HGZFqaKKc4YTt4l-elPlAtQ7EEa0f5tg,17308
|
|
151
163
|
sparknlp/annotator/similarity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
152
164
|
sparknlp/annotator/similarity/document_similarity_ranker.py,sha256=BHV2XWA18YvBn_OKOVvR0TmPPnHSgiAgpZpaPz7ar_s,15826
|
|
@@ -188,7 +200,7 @@ sparknlp/common/read_as.py,sha256=imxPGwV7jr4Li_acbo0OAHHRGCBbYv-akzEGaBWEfcY,12
|
|
|
188
200
|
sparknlp/common/recursive_annotator_approach.py,sha256=vqugBw22cE3Ff7PIpRlnYFuOlchgL0nM26D8j-NdpqU,1449
|
|
189
201
|
sparknlp/common/storage.py,sha256=D91H3p8EIjNspjqAYu6ephRpCUtdcAir4_PrAbkIQWE,4842
|
|
190
202
|
sparknlp/common/utils.py,sha256=Yne6yYcwKxhOZC-U4qfYoDhWUP_6BIaAjI5X_P_df1E,1306
|
|
191
|
-
sparknlp/internal/__init__.py,sha256=
|
|
203
|
+
sparknlp/internal/__init__.py,sha256=nK-9lncAVRXmyP8ATbiMwRnLJVe4IEd_r5Z3gEqDK3g,33672
|
|
192
204
|
sparknlp/internal/annotator_java_ml.py,sha256=UGPoThG0rGXUOXGSQnDzEDW81Mu1s5RPF29v7DFyE3c,1187
|
|
193
205
|
sparknlp/internal/annotator_transformer.py,sha256=fXmc2IWXGybqZpbEU9obmbdBYPc798y42zvSB4tqV9U,1448
|
|
194
206
|
sparknlp/internal/extended_java_wrapper.py,sha256=hwP0133-hDiDf5sBF-P3MtUsuuDj1PpQbtGZQIRwzfk,2240
|
|
@@ -230,8 +242,8 @@ sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py,sha256=R4yHFN3
|
|
|
230
242
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py,sha256=EoCSdcIjqQ3wv13MAuuWrKV8wyVBP0SbOEW41omHlR0,23189
|
|
231
243
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py,sha256=k5CQ7gKV6HZbZMB8cKLUJuZxoZWlP_DFWdZ--aIDwsc,2356
|
|
232
244
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py,sha256=pAxjWhjazSX8Vg0MFqJiuRVw1IbnQNSs-8Xp26L4nko,870
|
|
233
|
-
spark_nlp-5.
|
|
234
|
-
spark_nlp-5.
|
|
235
|
-
spark_nlp-5.
|
|
236
|
-
spark_nlp-5.
|
|
237
|
-
spark_nlp-5.
|
|
245
|
+
spark_nlp-5.5.0rc1.dist-info/.uuid,sha256=1f6hF51aIuv9yCvh31NU9lOpS34NE-h3a0Et7R9yR6A,36
|
|
246
|
+
spark_nlp-5.5.0rc1.dist-info/METADATA,sha256=c9a30g4ogelUAj-Np3naOPq4lWx6mGRgXD59IjQlr1M,55774
|
|
247
|
+
spark_nlp-5.5.0rc1.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
|
248
|
+
spark_nlp-5.5.0rc1.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
|
|
249
|
+
spark_nlp-5.5.0rc1.dist-info/RECORD,,
|
sparknlp/__init__.py
CHANGED
|
@@ -129,7 +129,7 @@ def start(gpu=False,
|
|
|
129
129
|
The initiated Spark session.
|
|
130
130
|
|
|
131
131
|
"""
|
|
132
|
-
current_version = "5.
|
|
132
|
+
current_version = "5.5.0-rc1"
|
|
133
133
|
|
|
134
134
|
if params is None:
|
|
135
135
|
params = {}
|
|
@@ -310,4 +310,4 @@ def version():
|
|
|
310
310
|
str
|
|
311
311
|
The current Spark NLP version.
|
|
312
312
|
"""
|
|
313
|
-
return '5.
|
|
313
|
+
return '5.5.0-rc1'
|
|
@@ -51,4 +51,6 @@ from sparknlp.annotator.classifier_dl.bart_for_zero_shot_classification import *
|
|
|
51
51
|
from sparknlp.annotator.classifier_dl.deberta_for_zero_shot_classification import *
|
|
52
52
|
from sparknlp.annotator.classifier_dl.mpnet_for_sequence_classification import *
|
|
53
53
|
from sparknlp.annotator.classifier_dl.mpnet_for_question_answering import *
|
|
54
|
-
from sparknlp.annotator.classifier_dl.mpnet_for_token_classification import *
|
|
54
|
+
from sparknlp.annotator.classifier_dl.mpnet_for_token_classification import *
|
|
55
|
+
from sparknlp.annotator.classifier_dl.albert_for_zero_shot_classification import *
|
|
56
|
+
from sparknlp.annotator.classifier_dl.camembert_for_zero_shot_classification import *
|