spark-nlp 5.3.2__py2.py3-none-any.whl → 5.4.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of spark-nlp might be problematic. Click here for more details.

Files changed (28) hide show
  1. com/johnsnowlabs/ml/__init__.py +0 -0
  2. com/johnsnowlabs/ml/ai/__init__.py +10 -0
  3. {spark_nlp-5.3.2.dist-info → spark_nlp-5.4.0.dist-info}/METADATA +50 -60
  4. {spark_nlp-5.3.2.dist-info → spark_nlp-5.4.0.dist-info}/RECORD +28 -22
  5. sparknlp/__init__.py +3 -2
  6. sparknlp/annotator/classifier_dl/__init__.py +1 -0
  7. sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py +173 -0
  8. sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py +3 -3
  9. sparknlp/annotator/embeddings/__init__.py +1 -0
  10. sparknlp/annotator/embeddings/bert_embeddings.py +4 -2
  11. sparknlp/annotator/embeddings/bert_sentence_embeddings.py +4 -2
  12. sparknlp/annotator/embeddings/bge_embeddings.py +2 -0
  13. sparknlp/annotator/embeddings/e5_embeddings.py +6 -2
  14. sparknlp/annotator/embeddings/mpnet_embeddings.py +2 -0
  15. sparknlp/annotator/embeddings/roberta_embeddings.py +4 -2
  16. sparknlp/annotator/embeddings/uae_embeddings.py +211 -0
  17. sparknlp/annotator/embeddings/xlm_roberta_embeddings.py +4 -2
  18. sparknlp/annotator/openai/openai_embeddings.py +43 -69
  19. sparknlp/annotator/seq2seq/__init__.py +2 -0
  20. sparknlp/annotator/seq2seq/llama2_transformer.py +2 -2
  21. sparknlp/annotator/seq2seq/m2m100_transformer.py +2 -2
  22. sparknlp/annotator/seq2seq/mistral_transformer.py +349 -0
  23. sparknlp/annotator/seq2seq/phi2_transformer.py +326 -0
  24. sparknlp/internal/__init__.py +443 -148
  25. sparknlp/pretrained/resource_downloader.py +2 -3
  26. {spark_nlp-5.3.2.dist-info → spark_nlp-5.4.0.dist-info}/.uuid +0 -0
  27. {spark_nlp-5.3.2.dist-info → spark_nlp-5.4.0.dist-info}/WHEEL +0 -0
  28. {spark_nlp-5.3.2.dist-info → spark_nlp-5.4.0.dist-info}/top_level.txt +0 -0
File without changes
@@ -0,0 +1,10 @@
1
+ import sys
2
+
3
+ if sys.version_info[0] == 2:
4
+ raise ImportError(
5
+ "Spark NLP for Python 2.x is deprecated since version >= 4.0. "
6
+ "Please use an older versions to use it with this Python version."
7
+ )
8
+ else:
9
+ import sparknlp
10
+ sys.modules['com.johnsnowlabs.ml.ai'] = sparknlp
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: spark-nlp
3
- Version: 5.3.2
3
+ Version: 5.4.0
4
4
  Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
5
5
  Home-page: https://github.com/JohnSnowLabs/spark-nlp
6
6
  Author: John Snow Labs
@@ -146,6 +146,7 @@ documentation and examples
146
146
  - INSTRUCTOR Embeddings (HuggingFace models)
147
147
  - E5 Embeddings (HuggingFace models)
148
148
  - MPNet Embeddings (HuggingFace models)
149
+ - UAE Embeddings (HuggingFace models)
149
150
  - OpenAI Embeddings
150
151
  - Sentence & Chunk Embeddings
151
152
  - Unsupervised keywords extraction
@@ -170,7 +171,7 @@ documentation and examples
170
171
  - Text-To-Text Transfer Transformer (Google T5)
171
172
  - Generative Pre-trained Transformer 2 (OpenAI GPT2)
172
173
  - Seq2Seq for NLG, Translation, and Comprehension (Facebook BART)
173
- - Chat and Conversational LLMs (Facebook Llama-22)
174
+ - Chat and Conversational LLMs (Facebook Llama-2)
174
175
  - Vision Transformer (Google ViT)
175
176
  - Swin Image Classification (Microsoft Swin Transformer)
176
177
  - ConvNext Image Classification (Facebook ConvNext)
@@ -180,10 +181,10 @@ documentation and examples
180
181
  - Automatic Speech Recognition (HuBERT)
181
182
  - Automatic Speech Recognition (OpenAI Whisper)
182
183
  - Named entity recognition (Deep learning)
183
- - Easy ONNX and TensorFlow integrations
184
+ - Easy ONNX, OpenVINO, and TensorFlow integrations
184
185
  - GPU Support
185
186
  - Full integration with Spark ML functions
186
- - +30000 pre-trained models in +200 languages!
187
+ - +31000 pre-trained models in +200 languages!
187
188
  - +6000 pre-trained pipelines in +200 languages!
188
189
  - Multi-lingual NER models: Arabic, Bengali, Chinese, Danish, Dutch, English, Finnish, French, German, Hebrew, Italian,
189
190
  Japanese, Korean, Norwegian, Persian, Polish, Portuguese, Russian, Spanish, Swedish, Urdu, and more.
@@ -197,7 +198,7 @@ To use Spark NLP you need the following requirements:
197
198
 
198
199
  **GPU (optional):**
199
200
 
200
- Spark NLP 5.3.2 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
201
+ Spark NLP 5.4.0 is built with ONNX 1.17.0 and TensorFlow 2.7.1 deep learning engines. The minimum following NVIDIA® software are only required for GPU support:
201
202
 
202
203
  - NVIDIA® GPU drivers version 450.80.02 or higher
203
204
  - CUDA® Toolkit 11.2
@@ -213,7 +214,7 @@ $ java -version
213
214
  $ conda create -n sparknlp python=3.7 -y
214
215
  $ conda activate sparknlp
215
216
  # spark-nlp by default is based on pyspark 3.x
216
- $ pip install spark-nlp==5.3.2 pyspark==3.3.1
217
+ $ pip install spark-nlp==5.4.0 pyspark==3.3.1
217
218
  ```
218
219
 
219
220
  In Python console or Jupyter `Python3` kernel:
@@ -258,10 +259,11 @@ For more examples, you can visit our dedicated [examples](https://github.com/Joh
258
259
 
259
260
  ## Apache Spark Support
260
261
 
261
- Spark NLP *5.3.2* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
262
+ Spark NLP *5.4.0* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
262
263
 
263
264
  | Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
264
265
  |-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
266
+ | 5.4.x | YES | YES | YES | YES | YES | YES | NO | NO |
265
267
  | 5.3.x | YES | YES | YES | YES | YES | YES | NO | NO |
266
268
  | 5.2.x | YES | YES | YES | YES | YES | YES | NO | NO |
267
269
  | 5.1.x | Partially | YES | YES | YES | YES | YES | NO | NO |
@@ -271,12 +273,6 @@ Spark NLP *5.3.2* has been built on top of Apache Spark 3.4 while fully supports
271
273
  | 4.2.x | NO | NO | YES | YES | YES | YES | NO | NO |
272
274
  | 4.1.x | NO | NO | YES | YES | YES | YES | NO | NO |
273
275
  | 4.0.x | NO | NO | YES | YES | YES | YES | NO | NO |
274
- | 3.4.x | NO | NO | N/A | Partially | YES | YES | YES | YES |
275
- | 3.3.x | NO | NO | NO | NO | YES | YES | YES | YES |
276
- | 3.2.x | NO | NO | NO | NO | YES | YES | YES | YES |
277
- | 3.1.x | NO | NO | NO | NO | YES | YES | YES | YES |
278
- | 3.0.x | NO | NO | NO | NO | YES | YES | YES | YES |
279
- | 2.7.x | NO | NO | NO | NO | NO | NO | YES | YES |
280
276
 
281
277
  Find out more about `Spark NLP` versions from our [release notes](https://github.com/JohnSnowLabs/spark-nlp/releases).
282
278
 
@@ -293,16 +289,10 @@ Find out more about `Spark NLP` versions from our [release notes](https://github
293
289
  | 4.2.x | YES | YES | YES | YES | YES | NO | YES |
294
290
  | 4.1.x | YES | YES | YES | YES | NO | NO | YES |
295
291
  | 4.0.x | YES | YES | YES | YES | NO | NO | YES |
296
- | 3.4.x | YES | YES | YES | YES | NO | YES | YES |
297
- | 3.3.x | YES | YES | YES | NO | NO | YES | YES |
298
- | 3.2.x | YES | YES | YES | NO | NO | YES | YES |
299
- | 3.1.x | YES | YES | YES | NO | NO | YES | YES |
300
- | 3.0.x | YES | YES | YES | NO | NO | YES | YES |
301
- | 2.7.x | YES | YES | NO | NO | NO | YES | NO |
302
292
 
303
293
  ## Databricks Support
304
294
 
305
- Spark NLP 5.3.2 has been tested and is compatible with the following runtimes:
295
+ Spark NLP 5.4.0 has been tested and is compatible with the following runtimes:
306
296
 
307
297
  **CPU:**
308
298
 
@@ -375,7 +365,7 @@ Spark NLP 5.3.2 has been tested and is compatible with the following runtimes:
375
365
 
376
366
  ## EMR Support
377
367
 
378
- Spark NLP 5.3.2 has been tested and is compatible with the following EMR releases:
368
+ Spark NLP 5.4.0 has been tested and is compatible with the following EMR releases:
379
369
 
380
370
  - emr-6.2.0
381
371
  - emr-6.3.0
@@ -425,11 +415,11 @@ Spark NLP supports all major releases of Apache Spark 3.0.x, Apache Spark 3.1.x,
425
415
  ```sh
426
416
  # CPU
427
417
 
428
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
418
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
429
419
 
430
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
420
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
431
421
 
432
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
422
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
433
423
  ```
434
424
 
435
425
  The `spark-nlp` has been published to
@@ -438,11 +428,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
438
428
  ```sh
439
429
  # GPU
440
430
 
441
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.3.2
431
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
442
432
 
443
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.3.2
433
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
444
434
 
445
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.3.2
435
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-gpu_2.12:5.4.0
446
436
 
447
437
  ```
448
438
 
@@ -452,11 +442,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
452
442
  ```sh
453
443
  # AArch64
454
444
 
455
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.3.2
445
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
456
446
 
457
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.3.2
447
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
458
448
 
459
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.3.2
449
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-aarch64_2.12:5.4.0
460
450
 
461
451
  ```
462
452
 
@@ -466,11 +456,11 @@ the [Maven Repository](https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/s
466
456
  ```sh
467
457
  # M1/M2 (Apple Silicon)
468
458
 
469
- spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.3.2
459
+ spark-shell --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
470
460
 
471
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.3.2
461
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
472
462
 
473
- spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.3.2
463
+ spark-submit --packages com.johnsnowlabs.nlp:spark-nlp-silicon_2.12:5.4.0
474
464
 
475
465
  ```
476
466
 
@@ -484,7 +474,7 @@ set in your SparkSession:
484
474
  spark-shell \
485
475
  --driver-memory 16g \
486
476
  --conf spark.kryoserializer.buffer.max=2000M \
487
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
477
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
488
478
  ```
489
479
 
490
480
  ## Scala
@@ -502,7 +492,7 @@ coordinates:
502
492
  <dependency>
503
493
  <groupId>com.johnsnowlabs.nlp</groupId>
504
494
  <artifactId>spark-nlp_2.12</artifactId>
505
- <version>5.3.2</version>
495
+ <version>5.4.0</version>
506
496
  </dependency>
507
497
  ```
508
498
 
@@ -513,7 +503,7 @@ coordinates:
513
503
  <dependency>
514
504
  <groupId>com.johnsnowlabs.nlp</groupId>
515
505
  <artifactId>spark-nlp-gpu_2.12</artifactId>
516
- <version>5.3.2</version>
506
+ <version>5.4.0</version>
517
507
  </dependency>
518
508
  ```
519
509
 
@@ -524,7 +514,7 @@ coordinates:
524
514
  <dependency>
525
515
  <groupId>com.johnsnowlabs.nlp</groupId>
526
516
  <artifactId>spark-nlp-aarch64_2.12</artifactId>
527
- <version>5.3.2</version>
517
+ <version>5.4.0</version>
528
518
  </dependency>
529
519
  ```
530
520
 
@@ -535,7 +525,7 @@ coordinates:
535
525
  <dependency>
536
526
  <groupId>com.johnsnowlabs.nlp</groupId>
537
527
  <artifactId>spark-nlp-silicon_2.12</artifactId>
538
- <version>5.3.2</version>
528
+ <version>5.4.0</version>
539
529
  </dependency>
540
530
  ```
541
531
 
@@ -545,28 +535,28 @@ coordinates:
545
535
 
546
536
  ```sbtshell
547
537
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp
548
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.3.2"
538
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp" % "5.4.0"
549
539
  ```
550
540
 
551
541
  **spark-nlp-gpu:**
552
542
 
553
543
  ```sbtshell
554
544
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-gpu
555
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.3.2"
545
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-gpu" % "5.4.0"
556
546
  ```
557
547
 
558
548
  **spark-nlp-aarch64:**
559
549
 
560
550
  ```sbtshell
561
551
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-aarch64
562
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.3.2"
552
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-aarch64" % "5.4.0"
563
553
  ```
564
554
 
565
555
  **spark-nlp-silicon:**
566
556
 
567
557
  ```sbtshell
568
558
  // https://mvnrepository.com/artifact/com.johnsnowlabs.nlp/spark-nlp-silicon
569
- libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.3.2"
559
+ libraryDependencies += "com.johnsnowlabs.nlp" %% "spark-nlp-silicon" % "5.4.0"
570
560
  ```
571
561
 
572
562
  Maven
@@ -588,7 +578,7 @@ If you installed pyspark through pip/conda, you can install `spark-nlp` through
588
578
  Pip:
589
579
 
590
580
  ```bash
591
- pip install spark-nlp==5.3.2
581
+ pip install spark-nlp==5.4.0
592
582
  ```
593
583
 
594
584
  Conda:
@@ -617,7 +607,7 @@ spark = SparkSession.builder
617
607
  .config("spark.driver.memory", "16G")
618
608
  .config("spark.driver.maxResultSize", "0")
619
609
  .config("spark.kryoserializer.buffer.max", "2000M")
620
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2")
610
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
621
611
  .getOrCreate()
622
612
  ```
623
613
 
@@ -688,7 +678,7 @@ Use either one of the following options
688
678
  - Add the following Maven Coordinates to the interpreter's library list
689
679
 
690
680
  ```bash
691
- com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
681
+ com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
692
682
  ```
693
683
 
694
684
  - Add a path to pre-built jar from [here](#compiled-jars) in the interpreter's library list making sure the jar is
@@ -699,7 +689,7 @@ com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
699
689
  Apart from the previous step, install the python module through pip
700
690
 
701
691
  ```bash
702
- pip install spark-nlp==5.3.2
692
+ pip install spark-nlp==5.4.0
703
693
  ```
704
694
 
705
695
  Or you can install `spark-nlp` from inside Zeppelin by using Conda:
@@ -727,7 +717,7 @@ launch the Jupyter from the same Python environment:
727
717
  $ conda create -n sparknlp python=3.8 -y
728
718
  $ conda activate sparknlp
729
719
  # spark-nlp by default is based on pyspark 3.x
730
- $ pip install spark-nlp==5.3.2 pyspark==3.3.1 jupyter
720
+ $ pip install spark-nlp==5.4.0 pyspark==3.3.1 jupyter
731
721
  $ jupyter notebook
732
722
  ```
733
723
 
@@ -744,7 +734,7 @@ export PYSPARK_PYTHON=python3
744
734
  export PYSPARK_DRIVER_PYTHON=jupyter
745
735
  export PYSPARK_DRIVER_PYTHON_OPTS=notebook
746
736
 
747
- pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
737
+ pyspark --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
748
738
  ```
749
739
 
750
740
  Alternatively, you can mix in using `--jars` option for pyspark + `pip install spark-nlp`
@@ -771,7 +761,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
771
761
  # -s is for spark-nlp
772
762
  # -g will enable upgrading libcudnn8 to 8.1.0 on Google Colab for GPU usage
773
763
  # by default they are set to the latest
774
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.3.2
764
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
775
765
  ```
776
766
 
777
767
  [Spark NLP quick start on Google Colab](https://colab.research.google.com/github/JohnSnowLabs/spark-nlp/blob/master/examples/python/quick_start_google_colab.ipynb)
@@ -794,7 +784,7 @@ This script comes with the two options to define `pyspark` and `spark-nlp` versi
794
784
  # -s is for spark-nlp
795
785
  # -g will enable upgrading libcudnn8 to 8.1.0 on Kaggle for GPU usage
796
786
  # by default they are set to the latest
797
- !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.3.2
787
+ !wget https://setup.johnsnowlabs.com/colab.sh -O - | bash /dev/stdin -p 3.2.3 -s 5.4.0
798
788
  ```
799
789
 
800
790
  [Spark NLP quick start on Kaggle Kernel](https://www.kaggle.com/mozzie/spark-nlp-named-entity-recognition) is a live
@@ -813,9 +803,9 @@ demo on Kaggle Kernel that performs named entity recognitions by using Spark NLP
813
803
 
814
804
  3. In `Libraries` tab inside your cluster you need to follow these steps:
815
805
 
816
- 3.1. Install New -> PyPI -> `spark-nlp==5.3.2` -> Install
806
+ 3.1. Install New -> PyPI -> `spark-nlp==5.4.0` -> Install
817
807
 
818
- 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2` -> Install
808
+ 3.2. Install New -> Maven -> Coordinates -> `com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0` -> Install
819
809
 
820
810
  4. Now you can attach your notebook to the cluster and use Spark NLP!
821
811
 
@@ -866,7 +856,7 @@ A sample of your software configuration in JSON on S3 (must be public access):
866
856
  "spark.kryoserializer.buffer.max": "2000M",
867
857
  "spark.serializer": "org.apache.spark.serializer.KryoSerializer",
868
858
  "spark.driver.maxResultSize": "0",
869
- "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2"
859
+ "spark.jars.packages": "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0"
870
860
  }
871
861
  }]
872
862
  ```
@@ -875,7 +865,7 @@ A sample of AWS CLI to launch EMR cluster:
875
865
 
876
866
  ```.sh
877
867
  aws emr create-cluster \
878
- --name "Spark NLP 5.3.2" \
868
+ --name "Spark NLP 5.4.0" \
879
869
  --release-label emr-6.2.0 \
880
870
  --applications Name=Hadoop Name=Spark Name=Hive \
881
871
  --instance-type m4.4xlarge \
@@ -939,7 +929,7 @@ gcloud dataproc clusters create ${CLUSTER_NAME} \
939
929
  --enable-component-gateway \
940
930
  --metadata 'PIP_PACKAGES=spark-nlp spark-nlp-display google-cloud-bigquery google-cloud-storage' \
941
931
  --initialization-actions gs://goog-dataproc-initialization-actions-${REGION}/python/pip-install.sh \
942
- --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
932
+ --properties spark:spark.serializer=org.apache.spark.serializer.KryoSerializer,spark:spark.driver.maxResultSize=0,spark:spark.kryoserializer.buffer.max=2000M,spark:spark.jars.packages=com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
943
933
  ```
944
934
 
945
935
  2. On an existing one, you need to install spark-nlp and spark-nlp-display packages from PyPI.
@@ -982,7 +972,7 @@ spark = SparkSession.builder
982
972
  .config("spark.kryoserializer.buffer.max", "2000m")
983
973
  .config("spark.jsl.settings.pretrained.cache_folder", "sample_data/pretrained")
984
974
  .config("spark.jsl.settings.storage.cluster_tmp_dir", "sample_data/storage")
985
- .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2")
975
+ .config("spark.jars.packages", "com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0")
986
976
  .getOrCreate()
987
977
  ```
988
978
 
@@ -996,7 +986,7 @@ spark-shell \
996
986
  --conf spark.kryoserializer.buffer.max=2000M \
997
987
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
998
988
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
999
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
989
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
1000
990
  ```
1001
991
 
1002
992
  **pyspark:**
@@ -1009,7 +999,7 @@ pyspark \
1009
999
  --conf spark.kryoserializer.buffer.max=2000M \
1010
1000
  --conf spark.jsl.settings.pretrained.cache_folder="sample_data/pretrained" \
1011
1001
  --conf spark.jsl.settings.storage.cluster_tmp_dir="sample_data/storage" \
1012
- --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.3.2
1002
+ --packages com.johnsnowlabs.nlp:spark-nlp_2.12:5.4.0
1013
1003
  ```
1014
1004
 
1015
1005
  **Databricks:**
@@ -1281,7 +1271,7 @@ spark = SparkSession.builder
1281
1271
  .config("spark.driver.memory", "16G")
1282
1272
  .config("spark.driver.maxResultSize", "0")
1283
1273
  .config("spark.kryoserializer.buffer.max", "2000M")
1284
- .config("spark.jars", "/tmp/spark-nlp-assembly-5.3.2.jar")
1274
+ .config("spark.jars", "/tmp/spark-nlp-assembly-5.4.0.jar")
1285
1275
  .getOrCreate()
1286
1276
  ```
1287
1277
 
@@ -1290,7 +1280,7 @@ spark = SparkSession.builder
1290
1280
  version (3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x)
1291
1281
  - If you are local, you can load the Fat JAR from your local FileSystem, however, if you are in a cluster setup you need
1292
1282
  to put the Fat JAR on a distributed FileSystem such as HDFS, DBFS, S3, etc. (
1293
- i.e., `hdfs:///tmp/spark-nlp-assembly-5.3.2.jar`)
1283
+ i.e., `hdfs:///tmp/spark-nlp-assembly-5.4.0.jar`)
1294
1284
 
1295
1285
  Example of using pretrained Models and Pipelines in offline:
1296
1286
 
@@ -1,7 +1,9 @@
1
1
  com/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  com/johnsnowlabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ com/johnsnowlabs/ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ com/johnsnowlabs/ml/ai/__init__.py,sha256=YQiK2M7U4d8y5irPy_HB8ae0mSpqS9583MH44pnKJXc,295
3
5
  com/johnsnowlabs/nlp/__init__.py,sha256=DPIVXtONO5xXyOk-HB0-sNiHAcco17NN13zPS_6Uw8c,294
4
- sparknlp/__init__.py,sha256=N74jXyWIf6cZnJADb-aBShj-r4eCjcLGDPmXcRNEBN0,13588
6
+ sparknlp/__init__.py,sha256=lRQR3K0noT97MQlXrjnJEgvD4QIvuUUMrbC7VCND4w4,13638
5
7
  sparknlp/annotation.py,sha256=I5zOxG5vV2RfPZfqN9enT1i4mo6oBcn3Lrzs37QiOiA,5635
6
8
  sparknlp/annotation_audio.py,sha256=iRV_InSVhgvAwSRe9NTbUH9v6OGvTM-FPCpSAKVu0mE,1917
7
9
  sparknlp/annotation_image.py,sha256=xhCe8Ko-77XqWVuuYHFrjKqF6zPd8Z-RY_rmZXNwCXU,2547
@@ -28,7 +30,7 @@ sparknlp/annotator/audio/__init__.py,sha256=dXjtvi5c0aTZFq1Q_JciUd1uFTBVSJoUdcq0
28
30
  sparknlp/annotator/audio/hubert_for_ctc.py,sha256=76PfwPZZvOHU5kfDqLueCFbmqa4W8pMNRGoCvOqjsEA,7859
29
31
  sparknlp/annotator/audio/wav2vec2_for_ctc.py,sha256=K78P1U6vA4O1UufsLYzy0H7arsKNmwPcIV7kzDFsA5Q,6210
30
32
  sparknlp/annotator/audio/whisper_for_ctc.py,sha256=uII51umuohqwnAW0Q7VdxEFyr_j5LMnfpcRlf8TbetA,9800
31
- sparknlp/annotator/classifier_dl/__init__.py,sha256=tGg78A8LUgobZFre_3ySN51KGNyl0Zx0inxT9yfL_g8,3686
33
+ sparknlp/annotator/classifier_dl/__init__.py,sha256=dsbceLBdAsk0VlvgcCcGANHMcyFMKi7-sdyu-Eg41ws,3763
32
34
  sparknlp/annotator/classifier_dl/albert_for_question_answering.py,sha256=LG2dL6Fky1T35yXTUZBfIihIIGnkRFQ7ECQ3HRXXEG8,6517
33
35
  sparknlp/annotator/classifier_dl/albert_for_sequence_classification.py,sha256=kWx7f9pcKE2qw319gn8FN0Md5dX38gbmfeoY9gWCLNk,7842
34
36
  sparknlp/annotator/classifier_dl/albert_for_token_classification.py,sha256=5rdsjWnsAVmtP-idU7ATKJ8lkH2rtlKZLnpi4Mq27eI,6839
@@ -54,6 +56,7 @@ sparknlp/annotator/classifier_dl/longformer_for_sequence_classification.py,sha25
54
56
  sparknlp/annotator/classifier_dl/longformer_for_token_classification.py,sha256=RmiFuBRhIAoJoQ8Rgcu997-PxBK1hhWuLVlS1qztMyk,6848
55
57
  sparknlp/annotator/classifier_dl/mpnet_for_question_answering.py,sha256=w9hHLrQbDIUHAdCKiXNDneAbohMKopixAKU2wkYkqbs,5522
56
58
  sparknlp/annotator/classifier_dl/mpnet_for_sequence_classification.py,sha256=M__giFElL6Q3I88QD6OoXDzdQDk_Zp5sS__Kh_XpLdo,7308
59
+ sparknlp/annotator/classifier_dl/mpnet_for_token_classification.py,sha256=SgFAJcv7ZE3BmJOehK_CjAaueqaaK6PR33zA5aE9-Ww,6754
57
60
  sparknlp/annotator/classifier_dl/multi_classifier_dl.py,sha256=ylKQzS7ROyeKeiOF4BZiIkQV1sfrnfUUQ9LXFSFK_Vo,16045
58
61
  sparknlp/annotator/classifier_dl/roberta_for_question_answering.py,sha256=WRxu1uhXnY9C4UHdtJ8qiVGhPSX7sCdSaML0AWHOdJw,6471
59
62
  sparknlp/annotator/classifier_dl/roberta_for_sequence_classification.py,sha256=z97uH5WkG8kPX1Y9qtpLwD7egl0kzbVmxtq4xzZgNNI,7857
@@ -63,7 +66,7 @@ sparknlp/annotator/classifier_dl/sentiment_dl.py,sha256=6Z7X3-ykxoaUz6vz-YIXkv2m
63
66
  sparknlp/annotator/classifier_dl/tapas_for_question_answering.py,sha256=2YBODMDUZT-j5ceOFTixrEkOqrztIM1kU-tsW_wao18,6317
64
67
  sparknlp/annotator/classifier_dl/xlm_roberta_for_question_answering.py,sha256=t_zCnKGCjDccKNj_2mjRkysOaNCWNBMKXehbuFSphQc,6538
65
68
  sparknlp/annotator/classifier_dl/xlm_roberta_for_sequence_classification.py,sha256=sudgwa8_QZQzaYvEMSt6J1bDDwyK2Hp1VFhh98P08hY,7930
66
- sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py,sha256=pe4Y1XDxDMQs1q32bwhbPC5_oKcJ4n5JFu-dsofLdSA,6850
69
+ sparknlp/annotator/classifier_dl/xlm_roberta_for_token_classification.py,sha256=ub5mMiZYKP4eBmXRzjkjfv_FFFR8E01XJs0RC__RxPo,6808
67
70
  sparknlp/annotator/classifier_dl/xlm_roberta_for_zero_shot_classification.py,sha256=4dBzpPj-VJcZul5hGcyjYkVMQ1PiaXZEGwvEaob3rss,8899
68
71
  sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py,sha256=CI9Ah2lyHkqwDHWGCbkk_gPbQd0NudpC7oXiHtWOucs,7811
69
72
  sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py,sha256=SndQpIfslsSYEOX-myLjpUS6-wVIeDG8MOhJYcu2_7M,6739
@@ -78,28 +81,29 @@ sparknlp/annotator/cv/vit_for_image_classification.py,sha256=D2V3pxAd3rBi1817lxV
78
81
  sparknlp/annotator/dependency/__init__.py,sha256=eV43oXAGaYl2N1XKIEAAZJLNP8gpHm8VxuXDeDlQzR4,774
79
82
  sparknlp/annotator/dependency/dependency_parser.py,sha256=SxyvHPp8Hs1Xnm5X1nLTMi095XoQMtfL8pbys15mYAI,11212
80
83
  sparknlp/annotator/dependency/typed_dependency_parser.py,sha256=60vPdYkbFk9MPGegg3m9Uik9cMXpMZd8tBvXG39gNww,12456
81
- sparknlp/annotator/embeddings/__init__.py,sha256=od9aVMywyLf0KYBueoTeUjFbbCnh4UIuIGbsXwGtOAQ,2097
84
+ sparknlp/annotator/embeddings/__init__.py,sha256=XQ6-UMsfvH54u3f0yceKiM8XJOAugIT3jwHE3ExoppI,2156
82
85
  sparknlp/annotator/embeddings/albert_embeddings.py,sha256=6Rd1LIn8oFIpq_ALcJh-RUjPEO7Ht8wsHY6JHSFyMkw,9995
83
- sparknlp/annotator/embeddings/bert_embeddings.py,sha256=uExpIlJNkQpuoZ3J_Zc2b2dV0hDNCRCAujNR4Lckly4,8369
84
- sparknlp/annotator/embeddings/bert_sentence_embeddings.py,sha256=XHls9qOkurwg9o6nDuwk77KSMNJmv1n4L5pcU22alWA,9054
85
- sparknlp/annotator/embeddings/bge_embeddings.py,sha256=FNmYxcynM1iLJvg5ZNmrZKkyIF0Gtr7G-CgZ72mrVyU,7842
86
+ sparknlp/annotator/embeddings/bert_embeddings.py,sha256=HVUjkg56kBcpGZCo-fmPG5uatMDF3swW_lnbpy1SgSI,8463
87
+ sparknlp/annotator/embeddings/bert_sentence_embeddings.py,sha256=NQy9KuXT9aKsTpYCR5RAeoFWI2YqEGorbdYrf_0KKmw,9148
88
+ sparknlp/annotator/embeddings/bge_embeddings.py,sha256=hXFFd9HOru1w2L9N5YGSZlaKyxqMsZccpaI4Z8-bNUE,7919
86
89
  sparknlp/annotator/embeddings/camembert_embeddings.py,sha256=dBTXas-2Tas_JUR9Xt_GtHLcyqi_cdvT5EHRnyVrSSQ,8817
87
90
  sparknlp/annotator/embeddings/chunk_embeddings.py,sha256=WUmkJimSuFkdcLJnvcxOV0QlCLgGlhub29ZTrZb70WE,6052
88
91
  sparknlp/annotator/embeddings/deberta_embeddings.py,sha256=_b5nzLb7heFQNN-uT2oBNO6-YmM8bHmAdnGXg47HOWw,8649
89
92
  sparknlp/annotator/embeddings/distil_bert_embeddings.py,sha256=4pyMCsbvvXYeTGIMVUir9wCDKR_1f_HKtXZrTDO1Thc,9275
90
93
  sparknlp/annotator/embeddings/doc2vec.py,sha256=Xk3MdEkXatX9lRgbFbAdnIDrLgIxzUIGWFBZeo9BTq0,13226
91
- sparknlp/annotator/embeddings/e5_embeddings.py,sha256=dfPHCAYpayCUMxXtol0t68cDs8-JVu0M4EslimwNS0Q,7684
94
+ sparknlp/annotator/embeddings/e5_embeddings.py,sha256=Esuvrq9JlogGaSSzFVVDkOFMwgYwFwr17I62ZiCDm0k,7858
92
95
  sparknlp/annotator/embeddings/elmo_embeddings.py,sha256=KV-KPs0Pq_OpPaHsnqBz2k_S7VdzyFZ4632IeFNKqJ8,9858
93
96
  sparknlp/annotator/embeddings/instructor_embeddings.py,sha256=CTKmbuBOx_KBM4JM-Y1U5LyR-6rrnpoBGbgGE_axS1c,8670
94
97
  sparknlp/annotator/embeddings/longformer_embeddings.py,sha256=jS4fxB5O0-d9ta9VKv8ai-17n5YHt5rML8QxUw7K4Io,8754
95
- sparknlp/annotator/embeddings/mpnet_embeddings.py,sha256=2sabImn5spYGzfNwBSH2zUU90Wjqrm2btCVbDbtsqPg,7796
96
- sparknlp/annotator/embeddings/roberta_embeddings.py,sha256=V4HGDUK2YBHhAZd1ygJEGUmxDgul0MrpKDm1UQcNqTs,9135
98
+ sparknlp/annotator/embeddings/mpnet_embeddings.py,sha256=7d6E4lS7jjkppDPvty1UHNNrbykkriFiysrxZ_RzL0U,7875
99
+ sparknlp/annotator/embeddings/roberta_embeddings.py,sha256=q_WHby2lDcPc5bVHkGc6X_GwT3qyDUBLUVz5ZW4HCSY,9229
97
100
  sparknlp/annotator/embeddings/roberta_sentence_embeddings.py,sha256=KVrD4z_tIU-sphK6dmbbnHBBt8-Y89C_BFQAkN99kZo,8181
98
101
  sparknlp/annotator/embeddings/sentence_embeddings.py,sha256=azuA1FKMtTJ9suwJqTEHeWHumT6kYdfURTe_1fsqcB8,5402
102
+ sparknlp/annotator/embeddings/uae_embeddings.py,sha256=sqTT67vcegVxcyoATISLPJSmOnA6J_otB6iREKOb6e4,8794
99
103
  sparknlp/annotator/embeddings/universal_sentence_encoder.py,sha256=_fTo-K78RjxiIKptpsI32mpW87RFCdXM16epHv4RVQY,8571
100
104
  sparknlp/annotator/embeddings/word2vec.py,sha256=UBhA4qUczQOx1t82Eu51lxx1-wJ_RLnCb__ncowSNhk,13229
101
105
  sparknlp/annotator/embeddings/word_embeddings.py,sha256=CQxjx2yDdmSM9s8D-bzsbUQhT8t1cqC4ynxlf9INpMU,15388
102
- sparknlp/annotator/embeddings/xlm_roberta_embeddings.py,sha256=t-Bg1bQcqI_fIqUWQbHt9rHK2_tyq0YXiq3uMw4xb94,9488
106
+ sparknlp/annotator/embeddings/xlm_roberta_embeddings.py,sha256=S2HHXOrSFXMAyloZUXJFNXL0-9wrZ32blsAhLB3Za1w,9582
103
107
  sparknlp/annotator/embeddings/xlm_roberta_sentence_embeddings.py,sha256=ojxD3H2VgDEn-RzDdCz0X485pojHBAFrlzsNemI05bY,8602
104
108
  sparknlp/annotator/embeddings/xlnet_embeddings.py,sha256=hJrlsJeO3D7uz54xiEiqqXEbq24YGuWz8U652PV9fNE,9336
105
109
  sparknlp/annotator/er/__init__.py,sha256=eF9Z-PanVfZWSVN2HSFbE7QjCDb6NYV5ESn6geYKlek,692
@@ -123,7 +127,7 @@ sparknlp/annotator/ner/ner_overwriter.py,sha256=en5OxXIP46yTXokIE96YDP9kcHA9oxiR
123
127
  sparknlp/annotator/ner/zero_shot_ner_model.py,sha256=DohhnkGSG-JxjW72t8AOx3GY7R_qT-LA3I0KF9TBz-Y,7501
124
128
  sparknlp/annotator/openai/__init__.py,sha256=u6SpV_xS8UpBE95WnTl0IefOI5TrTRl7ZHuYoeTetiA,759
125
129
  sparknlp/annotator/openai/openai_completion.py,sha256=OqDODelDAxlS66a4mAqJqXMFlEhaeiKZD4XBzR98k-g,16859
126
- sparknlp/annotator/openai/openai_embeddings.py,sha256=TJgd6sLfUWqJz6fd3jGfoKb-j2nrzzJbhr1S-e-71MI,109860
130
+ sparknlp/annotator/openai/openai_embeddings.py,sha256=i1ABDRmK6vMzzWP1rVxFiWnvXG4zfrTGGDjq4lvWQeE,108802
127
131
  sparknlp/annotator/param/__init__.py,sha256=MKBZs6NWRKxrpeof3Jr4PVmoa75wyRSdWzSt0A9lpfY,750
128
132
  sparknlp/annotator/param/classifier_encoder.py,sha256=PDyOdUX2GOFVr6MLtB7RUPBdtDrzDNJNRe_r9bY5JpE,3005
129
133
  sparknlp/annotator/param/evaluation_dl_params.py,sha256=qxMP_98zaKbO1Y20yOvvarmrTCiU24VskJRo8NNI9CA,4998
@@ -135,12 +139,14 @@ sparknlp/annotator/sentence/sentence_detector_dl.py,sha256=-Osj9Bm9KyZRTAWkOsK9c
135
139
  sparknlp/annotator/sentiment/__init__.py,sha256=Lq3vKaZS1YATLMg0VNXSVtkWL5q5G9taGBvdrvSwnfg,766
136
140
  sparknlp/annotator/sentiment/sentiment_detector.py,sha256=m545NGU0Xzg_PO6_qIfpli1uZj7JQcyFgqe9R6wAPFI,8154
137
141
  sparknlp/annotator/sentiment/vivekn_sentiment.py,sha256=4rpXWDgzU6ddnbrSCp9VdLb2epCc9oZ3c6XcqxEw8nk,9655
138
- sparknlp/annotator/seq2seq/__init__.py,sha256=UQK-_3wLkUdW1piGudCx1_k3Tg3tERZJYOBnfMRj8pA,1011
142
+ sparknlp/annotator/seq2seq/__init__.py,sha256=3pF-b9ubgAs8ofggiNyuc1NQseq_oe231UVjVkZWTmU,1130
139
143
  sparknlp/annotator/seq2seq/bart_transformer.py,sha256=I1flM4yeCzEAKOdQllBC30XuedxVJ7ferkFhZ6gwEbE,18481
140
144
  sparknlp/annotator/seq2seq/gpt2_transformer.py,sha256=Oz95R_NRR4tWHu_bW6Ak2832ZILXycp3ify7LfRSi8o,15310
141
- sparknlp/annotator/seq2seq/llama2_transformer.py,sha256=YPge5f4qfv7XZY_LoH2HRzvbZ--XoTTY_BupxxYaCd8,13862
142
- sparknlp/annotator/seq2seq/m2m100_transformer.py,sha256=fTFGFWaFfJt5kaLvnYknf_23PVyjBuha48asFsE_NaE,16082
145
+ sparknlp/annotator/seq2seq/llama2_transformer.py,sha256=3LzTR0VerFdFmOizsrs2Q7HTnjELJ5WtfUgx5XnOqGM,13898
146
+ sparknlp/annotator/seq2seq/m2m100_transformer.py,sha256=uIL9RZuuryTIdAy9TbJf9wbz6RekhW8S079bJhaB6i4,16116
143
147
  sparknlp/annotator/seq2seq/marian_transformer.py,sha256=mQ4Ylh7ZzXAOue8f-x0gqzfS3vAz3XUdD7eQ2XhcEs4,13781
148
+ sparknlp/annotator/seq2seq/mistral_transformer.py,sha256=hq5-Emut7qYnwFolYQ6cFOEY4j5-8PdlPi2Vs72qCig,14254
149
+ sparknlp/annotator/seq2seq/phi2_transformer.py,sha256=YuqEcvJunKKZMmfqD3thXHR5FsPbqjjwbHFExWjbDWk,13796
144
150
  sparknlp/annotator/seq2seq/t5_transformer.py,sha256=wDVxNLluIU1HGZFqaKKc4YTt4l-elPlAtQ7EEa0f5tg,17308
145
151
  sparknlp/annotator/similarity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
152
  sparknlp/annotator/similarity/document_similarity_ranker.py,sha256=OFAXEBuALFJglwThsGK8YaJ_pgW1tcevB7jVq-8SyKM,14991
@@ -182,7 +188,7 @@ sparknlp/common/read_as.py,sha256=imxPGwV7jr4Li_acbo0OAHHRGCBbYv-akzEGaBWEfcY,12
182
188
  sparknlp/common/recursive_annotator_approach.py,sha256=vqugBw22cE3Ff7PIpRlnYFuOlchgL0nM26D8j-NdpqU,1449
183
189
  sparknlp/common/storage.py,sha256=D91H3p8EIjNspjqAYu6ephRpCUtdcAir4_PrAbkIQWE,4842
184
190
  sparknlp/common/utils.py,sha256=Yne6yYcwKxhOZC-U4qfYoDhWUP_6BIaAjI5X_P_df1E,1306
185
- sparknlp/internal/__init__.py,sha256=g4REY_0X2Sr05szDb9681oiPqRWlT4KaOpcAOj3q32A,26496
191
+ sparknlp/internal/__init__.py,sha256=X38S3vTHB0c4EkzczDv-J7hpJl0g6A9Xe_3u8jGJTCU,30239
186
192
  sparknlp/internal/annotator_java_ml.py,sha256=UGPoThG0rGXUOXGSQnDzEDW81Mu1s5RPF29v7DFyE3c,1187
187
193
  sparknlp/internal/annotator_transformer.py,sha256=fXmc2IWXGybqZpbEU9obmbdBYPc798y42zvSB4tqV9U,1448
188
194
  sparknlp/internal/extended_java_wrapper.py,sha256=hwP0133-hDiDf5sBF-P3MtUsuuDj1PpQbtGZQIRwzfk,2240
@@ -192,7 +198,7 @@ sparknlp/logging/__init__.py,sha256=DoROFF5KLZe4t4Q-OHxqk1nhqbw9NQ-wb64y8icNwgw,
192
198
  sparknlp/logging/comet.py,sha256=_ZBi9-hlilCAnd4lvdYMWiq4Vqsppv8kow3k0cf-NG4,15958
193
199
  sparknlp/pretrained/__init__.py,sha256=GV-x9UBK8F2_IR6zYatrzFcVJtkSUIMbxqWsxRUePmQ,793
194
200
  sparknlp/pretrained/pretrained_pipeline.py,sha256=lquxiaABuA68Rmu7csamJPqBoRJqMUO0oNHsmEZDAIs,5740
195
- sparknlp/pretrained/resource_downloader.py,sha256=XKnx9Mu_K3R7Quj2X1EHVUzY5fJ6rvVnK-JChrWPaRY,7820
201
+ sparknlp/pretrained/resource_downloader.py,sha256=8_-rpvO2LsX_Lq4wMPif2ca3RlJZWEabt8pDm2xymiI,7806
196
202
  sparknlp/pretrained/utils.py,sha256=T1MrvW_DaWk_jcOjVLOea0NMFE9w8fe0ZT_5urZ_nEY,1099
197
203
  sparknlp/training/__init__.py,sha256=qREi9u-5Vc2VjpL6-XZsyvu5jSEIdIhowW7_kKaqMqo,852
198
204
  sparknlp/training/conll.py,sha256=wKBiSTrjc6mjsl7Nyt6B8f4yXsDJkZb-sn8iOjix9cE,6961
@@ -224,8 +230,8 @@ sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py,sha256=R4yHFN3
224
230
  sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py,sha256=EoCSdcIjqQ3wv13MAuuWrKV8wyVBP0SbOEW41omHlR0,23189
225
231
  sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py,sha256=k5CQ7gKV6HZbZMB8cKLUJuZxoZWlP_DFWdZ--aIDwsc,2356
226
232
  sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py,sha256=pAxjWhjazSX8Vg0MFqJiuRVw1IbnQNSs-8Xp26L4nko,870
227
- spark_nlp-5.3.2.dist-info/.uuid,sha256=1f6hF51aIuv9yCvh31NU9lOpS34NE-h3a0Et7R9yR6A,36
228
- spark_nlp-5.3.2.dist-info/METADATA,sha256=lvuYdEbmUUpC9QYY4YIfuMhBUIhy5axZrrX3XgPc0uQ,57087
229
- spark_nlp-5.3.2.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
230
- spark_nlp-5.3.2.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
231
- spark_nlp-5.3.2.dist-info/RECORD,,
233
+ spark_nlp-5.4.0.dist-info/.uuid,sha256=1f6hF51aIuv9yCvh31NU9lOpS34NE-h3a0Et7R9yR6A,36
234
+ spark_nlp-5.4.0.dist-info/METADATA,sha256=fzEL08vmQeHH_Y9OCF3QfU_CWtGtWk5bexyFxXOGoSs,55595
235
+ spark_nlp-5.4.0.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
236
+ spark_nlp-5.4.0.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
237
+ spark_nlp-5.4.0.dist-info/RECORD,,
sparknlp/__init__.py CHANGED
@@ -58,6 +58,7 @@ sys.modules['com.johnsnowlabs.nlp.annotators.er'] = annotator
58
58
  sys.modules['com.johnsnowlabs.nlp.annotators.coref'] = annotator
59
59
  sys.modules['com.johnsnowlabs.nlp.annotators.cv'] = annotator
60
60
  sys.modules['com.johnsnowlabs.nlp.annotators.audio'] = annotator
61
+ sys.modules['com.johnsnowlabs.ml.ai'] = annotator
61
62
 
62
63
  annotators = annotator
63
64
  embeddings = annotator
@@ -128,7 +129,7 @@ def start(gpu=False,
128
129
  The initiated Spark session.
129
130
 
130
131
  """
131
- current_version = "5.3.2"
132
+ current_version = "5.4.0"
132
133
 
133
134
  if params is None:
134
135
  params = {}
@@ -309,4 +310,4 @@ def version():
309
310
  str
310
311
  The current Spark NLP version.
311
312
  """
312
- return '5.3.2'
313
+ return '5.4.0'
@@ -51,3 +51,4 @@ from sparknlp.annotator.classifier_dl.bart_for_zero_shot_classification import *
51
51
  from sparknlp.annotator.classifier_dl.deberta_for_zero_shot_classification import *
52
52
  from sparknlp.annotator.classifier_dl.mpnet_for_sequence_classification import *
53
53
  from sparknlp.annotator.classifier_dl.mpnet_for_question_answering import *
54
+ from sparknlp.annotator.classifier_dl.mpnet_for_token_classification import *