spark-nlp 5.5.1__py2.py3-none-any.whl → 5.5.3__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of spark-nlp might be problematic. Click here for more details.
- {spark_nlp-5.5.1.dist-info → spark_nlp-5.5.3.dist-info}/METADATA +8 -8
- {spark_nlp-5.5.1.dist-info → spark_nlp-5.5.3.dist-info}/RECORD +23 -19
- sparknlp/__init__.py +12 -6
- sparknlp/annotator/cv/__init__.py +1 -0
- sparknlp/annotator/cv/blip_for_question_answering.py +172 -0
- sparknlp/annotator/embeddings/__init__.py +1 -0
- sparknlp/annotator/embeddings/auto_gguf_embeddings.py +538 -0
- sparknlp/annotator/embeddings/bge_embeddings.py +7 -3
- sparknlp/annotator/embeddings/nomic_embeddings.py +3 -3
- sparknlp/annotator/seq2seq/auto_gguf_model.py +14 -24
- sparknlp/annotator/seq2seq/cpm_transformer.py +5 -5
- sparknlp/annotator/seq2seq/nllb_transformer.py +4 -4
- sparknlp/annotator/seq2seq/phi3_transformer.py +4 -4
- sparknlp/annotator/seq2seq/qwen_transformer.py +3 -3
- sparknlp/base/image_assembler.py +11 -0
- sparknlp/base/light_pipeline.py +20 -9
- sparknlp/common/properties.py +27 -0
- sparknlp/internal/__init__.py +15 -0
- sparknlp/reader/__init__.py +15 -0
- sparknlp/reader/sparknlp_reader.py +113 -0
- {spark_nlp-5.5.1.dist-info → spark_nlp-5.5.3.dist-info}/.uuid +0 -0
- {spark_nlp-5.5.1.dist-info → spark_nlp-5.5.3.dist-info}/WHEEL +0 -0
- {spark_nlp-5.5.1.dist-info → spark_nlp-5.5.3.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: spark-nlp
|
|
3
|
-
Version: 5.5.
|
|
3
|
+
Version: 5.5.3
|
|
4
4
|
Summary: John Snow Labs Spark NLP is a natural language processing library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines, that scale easily in a distributed environment.
|
|
5
5
|
Home-page: https://github.com/JohnSnowLabs/spark-nlp
|
|
6
6
|
Author: John Snow Labs
|
|
@@ -95,7 +95,7 @@ $ java -version
|
|
|
95
95
|
$ conda create -n sparknlp python=3.7 -y
|
|
96
96
|
$ conda activate sparknlp
|
|
97
97
|
# spark-nlp by default is based on pyspark 3.x
|
|
98
|
-
$ pip install spark-nlp==5.5.
|
|
98
|
+
$ pip install spark-nlp==5.5.3 pyspark==3.3.1
|
|
99
99
|
```
|
|
100
100
|
|
|
101
101
|
In Python console or Jupyter `Python3` kernel:
|
|
@@ -161,7 +161,7 @@ For a quick example of using pipelines and models take a look at our official [d
|
|
|
161
161
|
|
|
162
162
|
### Apache Spark Support
|
|
163
163
|
|
|
164
|
-
Spark NLP *5.5.
|
|
164
|
+
Spark NLP *5.5.3* has been built on top of Apache Spark 3.4 while fully supports Apache Spark 3.0.x, 3.1.x, 3.2.x, 3.3.x, 3.4.x, and 3.5.x
|
|
165
165
|
|
|
166
166
|
| Spark NLP | Apache Spark 3.5.x | Apache Spark 3.4.x | Apache Spark 3.3.x | Apache Spark 3.2.x | Apache Spark 3.1.x | Apache Spark 3.0.x | Apache Spark 2.4.x | Apache Spark 2.3.x |
|
|
167
167
|
|-----------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
|
|
@@ -189,7 +189,7 @@ Find out more about 4.x `SparkNLP` versions in our official [documentation](http
|
|
|
189
189
|
|
|
190
190
|
### Databricks Support
|
|
191
191
|
|
|
192
|
-
Spark NLP 5.5.
|
|
192
|
+
Spark NLP 5.5.3 has been tested and is compatible with the following runtimes:
|
|
193
193
|
|
|
194
194
|
| **CPU** | **GPU** |
|
|
195
195
|
|--------------------|--------------------|
|
|
@@ -206,7 +206,7 @@ We are compatible with older runtimes. For a full list check databricks support
|
|
|
206
206
|
|
|
207
207
|
### EMR Support
|
|
208
208
|
|
|
209
|
-
Spark NLP 5.5.
|
|
209
|
+
Spark NLP 5.5.3 has been tested and is compatible with the following EMR releases:
|
|
210
210
|
|
|
211
211
|
| **EMR Release** |
|
|
212
212
|
|--------------------|
|
|
@@ -237,7 +237,7 @@ deployed to Maven central. To add any of our packages as a dependency in your ap
|
|
|
237
237
|
from our official documentation.
|
|
238
238
|
|
|
239
239
|
If you are interested, there is a simple SBT project for Spark NLP to guide you on how to use it in your
|
|
240
|
-
projects [Spark NLP SBT S5.5.
|
|
240
|
+
projects [Spark NLP SBT S5.5.3r](https://github.com/maziyarpanahi/spark-nlp-starter)
|
|
241
241
|
|
|
242
242
|
### Python
|
|
243
243
|
|
|
@@ -282,7 +282,7 @@ In Spark NLP we can define S3 locations to:
|
|
|
282
282
|
|
|
283
283
|
Please check [these instructions](https://sparknlp.org/docs/en/install#s3-integration) from our official documentation.
|
|
284
284
|
|
|
285
|
-
## Document5.5.
|
|
285
|
+
## Document5.5.3
|
|
286
286
|
|
|
287
287
|
### Examples
|
|
288
288
|
|
|
@@ -315,7 +315,7 @@ the Spark NLP library:
|
|
|
315
315
|
keywords = {Spark, Natural language processing, Deep learning, Tensorflow, Cluster},
|
|
316
316
|
abstract = {Spark NLP is a Natural Language Processing (NLP) library built on top of Apache Spark ML. It provides simple, performant & accurate NLP annotations for machine learning pipelines that can scale easily in a distributed environment. Spark NLP comes with 1100+ pretrained pipelines and models in more than 192+ languages. It supports nearly all the NLP tasks and modules that can be used seamlessly in a cluster. Downloaded more than 2.7 million times and experiencing 9x growth since January 2020, Spark NLP is used by 54% of healthcare organizations as the world’s most widely used NLP library in the enterprise.}
|
|
317
317
|
}
|
|
318
|
-
}5.5.
|
|
318
|
+
}5.5.3
|
|
319
319
|
```
|
|
320
320
|
|
|
321
321
|
## Community support
|
|
@@ -3,7 +3,7 @@ com/johnsnowlabs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
|
3
3
|
com/johnsnowlabs/ml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
com/johnsnowlabs/ml/ai/__init__.py,sha256=YQiK2M7U4d8y5irPy_HB8ae0mSpqS9583MH44pnKJXc,295
|
|
5
5
|
com/johnsnowlabs/nlp/__init__.py,sha256=DPIVXtONO5xXyOk-HB0-sNiHAcco17NN13zPS_6Uw8c,294
|
|
6
|
-
sparknlp/__init__.py,sha256=
|
|
6
|
+
sparknlp/__init__.py,sha256=Wmw9AZuFatQEjZ0WucHWPO4yF4HTsEZOVZ27IaEAbok,13783
|
|
7
7
|
sparknlp/annotation.py,sha256=I5zOxG5vV2RfPZfqN9enT1i4mo6oBcn3Lrzs37QiOiA,5635
|
|
8
8
|
sparknlp/annotation_audio.py,sha256=iRV_InSVhgvAwSRe9NTbUH9v6OGvTM-FPCpSAKVu0mE,1917
|
|
9
9
|
sparknlp/annotation_image.py,sha256=xhCe8Ko-77XqWVuuYHFrjKqF6zPd8Z-RY_rmZXNwCXU,2547
|
|
@@ -75,7 +75,8 @@ sparknlp/annotator/classifier_dl/xlnet_for_sequence_classification.py,sha256=CI9
|
|
|
75
75
|
sparknlp/annotator/classifier_dl/xlnet_for_token_classification.py,sha256=SndQpIfslsSYEOX-myLjpUS6-wVIeDG8MOhJYcu2_7M,6739
|
|
76
76
|
sparknlp/annotator/coref/__init__.py,sha256=SG8MAaVxQpoYYAsyKaoOlvlHjorDzj3DHt4nnEdBWm8,53
|
|
77
77
|
sparknlp/annotator/coref/spanbert_coref.py,sha256=AXWJhvVquY2uoApO_Np1fz7_KyJhxnZB4i-xk78sBfc,8407
|
|
78
|
-
sparknlp/annotator/cv/__init__.py,sha256=
|
|
78
|
+
sparknlp/annotator/cv/__init__.py,sha256=194aJ5N5eE3HOYRzAAdroHTTQ0o1qyCrgyRLddvqBp0,1006
|
|
79
|
+
sparknlp/annotator/cv/blip_for_question_answering.py,sha256=At7L5pPBNDR1r-JGLKM5b3dTrq5Ecz9r0M1gToUVZTs,6551
|
|
79
80
|
sparknlp/annotator/cv/clip_for_zero_shot_classification.py,sha256=_1pLc9BiFrFN10eJPCDJLJT-vdnTSG9OnB25Y_kKJIA,7528
|
|
80
81
|
sparknlp/annotator/cv/convnext_for_image_classification.py,sha256=KzaAlYW5M2l73zUozzgg8_p14eGDz9k9PYVAUZLN25k,11874
|
|
81
82
|
sparknlp/annotator/cv/swin_for_image_classification.py,sha256=iZ1KY0GInbQmGzkmuNbds4PGPwCheLXc-Syv2HRmqug,10694
|
|
@@ -84,11 +85,12 @@ sparknlp/annotator/cv/vit_for_image_classification.py,sha256=D2V3pxAd3rBi1817lxV
|
|
|
84
85
|
sparknlp/annotator/dependency/__init__.py,sha256=eV43oXAGaYl2N1XKIEAAZJLNP8gpHm8VxuXDeDlQzR4,774
|
|
85
86
|
sparknlp/annotator/dependency/dependency_parser.py,sha256=SxyvHPp8Hs1Xnm5X1nLTMi095XoQMtfL8pbys15mYAI,11212
|
|
86
87
|
sparknlp/annotator/dependency/typed_dependency_parser.py,sha256=60vPdYkbFk9MPGegg3m9Uik9cMXpMZd8tBvXG39gNww,12456
|
|
87
|
-
sparknlp/annotator/embeddings/__init__.py,sha256=
|
|
88
|
+
sparknlp/annotator/embeddings/__init__.py,sha256=KHDCHb8SMlkSGGSu69SfKneUDDUlBdMGdMzDrYp_cis,2408
|
|
88
89
|
sparknlp/annotator/embeddings/albert_embeddings.py,sha256=6Rd1LIn8oFIpq_ALcJh-RUjPEO7Ht8wsHY6JHSFyMkw,9995
|
|
90
|
+
sparknlp/annotator/embeddings/auto_gguf_embeddings.py,sha256=ngqjiXUqkL3xOrmt42bY8pp7azgbIWqXGfbKud1CijM,19981
|
|
89
91
|
sparknlp/annotator/embeddings/bert_embeddings.py,sha256=HVUjkg56kBcpGZCo-fmPG5uatMDF3swW_lnbpy1SgSI,8463
|
|
90
92
|
sparknlp/annotator/embeddings/bert_sentence_embeddings.py,sha256=NQy9KuXT9aKsTpYCR5RAeoFWI2YqEGorbdYrf_0KKmw,9148
|
|
91
|
-
sparknlp/annotator/embeddings/bge_embeddings.py,sha256=
|
|
93
|
+
sparknlp/annotator/embeddings/bge_embeddings.py,sha256=Y4b6QzRJGc_Z9_R6SYq-P5NxcvI9XzJlBzwCLLHJpRo,8103
|
|
92
94
|
sparknlp/annotator/embeddings/camembert_embeddings.py,sha256=dBTXas-2Tas_JUR9Xt_GtHLcyqi_cdvT5EHRnyVrSSQ,8817
|
|
93
95
|
sparknlp/annotator/embeddings/chunk_embeddings.py,sha256=WUmkJimSuFkdcLJnvcxOV0QlCLgGlhub29ZTrZb70WE,6052
|
|
94
96
|
sparknlp/annotator/embeddings/deberta_embeddings.py,sha256=_b5nzLb7heFQNN-uT2oBNO6-YmM8bHmAdnGXg47HOWw,8649
|
|
@@ -100,7 +102,7 @@ sparknlp/annotator/embeddings/instructor_embeddings.py,sha256=CTKmbuBOx_KBM4JM-Y
|
|
|
100
102
|
sparknlp/annotator/embeddings/longformer_embeddings.py,sha256=jS4fxB5O0-d9ta9VKv8ai-17n5YHt5rML8QxUw7K4Io,8754
|
|
101
103
|
sparknlp/annotator/embeddings/mpnet_embeddings.py,sha256=7d6E4lS7jjkppDPvty1UHNNrbykkriFiysrxZ_RzL0U,7875
|
|
102
104
|
sparknlp/annotator/embeddings/mxbai_embeddings.py,sha256=kCaYcM3lLYJjhElLK5isdxzJqIvoGZlUKKNkySMUkE8,6017
|
|
103
|
-
sparknlp/annotator/embeddings/nomic_embeddings.py,sha256=
|
|
105
|
+
sparknlp/annotator/embeddings/nomic_embeddings.py,sha256=WTllH3htx9wDD2Le8pZgKVPM_U8XNmroJb6f4PeVeP8,7347
|
|
104
106
|
sparknlp/annotator/embeddings/roberta_embeddings.py,sha256=q_WHby2lDcPc5bVHkGc6X_GwT3qyDUBLUVz5ZW4HCSY,9229
|
|
105
107
|
sparknlp/annotator/embeddings/roberta_sentence_embeddings.py,sha256=KVrD4z_tIU-sphK6dmbbnHBBt8-Y89C_BFQAkN99kZo,8181
|
|
106
108
|
sparknlp/annotator/embeddings/sentence_embeddings.py,sha256=azuA1FKMtTJ9suwJqTEHeWHumT6kYdfURTe_1fsqcB8,5402
|
|
@@ -146,19 +148,19 @@ sparknlp/annotator/sentiment/__init__.py,sha256=Lq3vKaZS1YATLMg0VNXSVtkWL5q5G9ta
|
|
|
146
148
|
sparknlp/annotator/sentiment/sentiment_detector.py,sha256=m545NGU0Xzg_PO6_qIfpli1uZj7JQcyFgqe9R6wAPFI,8154
|
|
147
149
|
sparknlp/annotator/sentiment/vivekn_sentiment.py,sha256=4rpXWDgzU6ddnbrSCp9VdLb2epCc9oZ3c6XcqxEw8nk,9655
|
|
148
150
|
sparknlp/annotator/seq2seq/__init__.py,sha256=Fdz1zsxpB6vM2a0sKuGCSMD1ZgqeVqAez0-AtppMGB4,1541
|
|
149
|
-
sparknlp/annotator/seq2seq/auto_gguf_model.py,sha256=
|
|
151
|
+
sparknlp/annotator/seq2seq/auto_gguf_model.py,sha256=pTQq3KztHQq3fybdCmXEq5wTlb0t-5ANCfdQ_-7oQRg,38343
|
|
150
152
|
sparknlp/annotator/seq2seq/bart_transformer.py,sha256=I1flM4yeCzEAKOdQllBC30XuedxVJ7ferkFhZ6gwEbE,18481
|
|
151
|
-
sparknlp/annotator/seq2seq/cpm_transformer.py,sha256=
|
|
153
|
+
sparknlp/annotator/seq2seq/cpm_transformer.py,sha256=0CnBFMlxMu0pD2QZMHyoGtIYgXqfUQm68vr6zEAa6Eg,13290
|
|
152
154
|
sparknlp/annotator/seq2seq/gpt2_transformer.py,sha256=Oz95R_NRR4tWHu_bW6Ak2832ZILXycp3ify7LfRSi8o,15310
|
|
153
155
|
sparknlp/annotator/seq2seq/llama2_transformer.py,sha256=3LzTR0VerFdFmOizsrs2Q7HTnjELJ5WtfUgx5XnOqGM,13898
|
|
154
156
|
sparknlp/annotator/seq2seq/llama3_transformer.py,sha256=dA3rIEVOLmlnJwhqkYmL_GrrcRVpoUY_i7QIyA5N2jM,14920
|
|
155
157
|
sparknlp/annotator/seq2seq/m2m100_transformer.py,sha256=uIL9RZuuryTIdAy9TbJf9wbz6RekhW8S079bJhaB6i4,16116
|
|
156
158
|
sparknlp/annotator/seq2seq/marian_transformer.py,sha256=mQ4Ylh7ZzXAOue8f-x0gqzfS3vAz3XUdD7eQ2XhcEs4,13781
|
|
157
159
|
sparknlp/annotator/seq2seq/mistral_transformer.py,sha256=PJegrSQts_58rkt96xaHlqU1fKIaz8hxt7DTPkGS10A,14254
|
|
158
|
-
sparknlp/annotator/seq2seq/nllb_transformer.py,sha256=
|
|
160
|
+
sparknlp/annotator/seq2seq/nllb_transformer.py,sha256=hOmdJOgl_-_PxoADrV-tVYmlfFrqNwvn6Vn2RC4siZM,19534
|
|
159
161
|
sparknlp/annotator/seq2seq/phi2_transformer.py,sha256=WwKCUOH8qGFv62YF63HjuT7bMVldh06gHvaZH3tbSDk,13787
|
|
160
|
-
sparknlp/annotator/seq2seq/phi3_transformer.py,sha256=
|
|
161
|
-
sparknlp/annotator/seq2seq/qwen_transformer.py,sha256=
|
|
162
|
+
sparknlp/annotator/seq2seq/phi3_transformer.py,sha256=arIcw5NDMv3ubBwWz3KYRdLMsspTiEI8vk4s00lyq1c,14293
|
|
163
|
+
sparknlp/annotator/seq2seq/qwen_transformer.py,sha256=cOpOlz5r_apmVHZgp7uFjybSzVj2yxv8QYlYcGwFyKg,14645
|
|
162
164
|
sparknlp/annotator/seq2seq/starcoder_transformer.py,sha256=BTXbSMRpXnDvrfh-6iFS5k6g6EcPV9zBl4U-SSC19wA,14293
|
|
163
165
|
sparknlp/annotator/seq2seq/t5_transformer.py,sha256=wDVxNLluIU1HGZFqaKKc4YTt4l-elPlAtQ7EEa0f5tg,17308
|
|
164
166
|
sparknlp/annotator/similarity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -183,8 +185,8 @@ sparknlp/base/finisher.py,sha256=V4wkMm9Ug09q4zTQc9T9Wr-awmu2Hu-eNaJ039YgZXM,858
|
|
|
183
185
|
sparknlp/base/graph_finisher.py,sha256=a8fxk3ei2YQw6s0Y9Yy8oMOF1i1XUrgqaiwVE0VPt4w,4834
|
|
184
186
|
sparknlp/base/has_recursive_fit.py,sha256=P55rSHLIXhihXWS2bOC_DskcQTc3njieVD1JkjS2bcA,849
|
|
185
187
|
sparknlp/base/has_recursive_transform.py,sha256=UkGNgo4LMsjQC-Coeefg4bJcg7FoPcPiG382zEa6Ywk,841
|
|
186
|
-
sparknlp/base/image_assembler.py,sha256=
|
|
187
|
-
sparknlp/base/light_pipeline.py,sha256=
|
|
188
|
+
sparknlp/base/image_assembler.py,sha256=HH7ZJ-iZCXnBXVXekQLb1ei_HJuVxhYNVb94OrVLmeY,4068
|
|
189
|
+
sparknlp/base/light_pipeline.py,sha256=2lOstyyK0o6L3BHPIZWQBpIKtJ7LcSz3Pvgo6eZDs5U,17023
|
|
188
190
|
sparknlp/base/multi_document_assembler.py,sha256=4htET1fRAeOB6zhsNXsBq5rKZvn-LGD4vrFRjPZeqow,7070
|
|
189
191
|
sparknlp/base/prompt_assembler.py,sha256=ysU4Vbmnuv2UBHK0JBkYrxiZiJ7_GTcVMip1-QRmheI,11570
|
|
190
192
|
sparknlp/base/recursive_pipeline.py,sha256=V9rTnu8KMwgjoceykN9pF1mKGtOkkuiC_n9v8dE3LDk,4279
|
|
@@ -197,12 +199,12 @@ sparknlp/common/annotator_properties.py,sha256=7B1os7pBUfHo6b7IPQAXQ-nir0u3tQLzD
|
|
|
197
199
|
sparknlp/common/annotator_type.py,sha256=ash2Ip1IOOiJamPVyy_XQj8Ja_DRHm0b9Vj4Ni75oKM,1225
|
|
198
200
|
sparknlp/common/coverage_result.py,sha256=No4PSh1HSs3PyRI1zC47x65tWgfirqPI290icHQoXEI,823
|
|
199
201
|
sparknlp/common/match_strategy.py,sha256=kt1MUPqU1wCwk5qCdYk6jubHbU-5yfAYxb9jjAOrdnY,1678
|
|
200
|
-
sparknlp/common/properties.py,sha256=
|
|
202
|
+
sparknlp/common/properties.py,sha256=TMUpY0EQ3b-GXO9iuctkKrunLhRYePqu2fbmHfocr2w,23870
|
|
201
203
|
sparknlp/common/read_as.py,sha256=imxPGwV7jr4Li_acbo0OAHHRGCBbYv-akzEGaBWEfcY,1226
|
|
202
204
|
sparknlp/common/recursive_annotator_approach.py,sha256=vqugBw22cE3Ff7PIpRlnYFuOlchgL0nM26D8j-NdpqU,1449
|
|
203
205
|
sparknlp/common/storage.py,sha256=D91H3p8EIjNspjqAYu6ephRpCUtdcAir4_PrAbkIQWE,4842
|
|
204
206
|
sparknlp/common/utils.py,sha256=Yne6yYcwKxhOZC-U4qfYoDhWUP_6BIaAjI5X_P_df1E,1306
|
|
205
|
-
sparknlp/internal/__init__.py,sha256=
|
|
207
|
+
sparknlp/internal/__init__.py,sha256=BttGS21n2-LGjx8udi7f4_nNt_BeUnfif9WpeZchuFE,34502
|
|
206
208
|
sparknlp/internal/annotator_java_ml.py,sha256=UGPoThG0rGXUOXGSQnDzEDW81Mu1s5RPF29v7DFyE3c,1187
|
|
207
209
|
sparknlp/internal/annotator_transformer.py,sha256=fXmc2IWXGybqZpbEU9obmbdBYPc798y42zvSB4tqV9U,1448
|
|
208
210
|
sparknlp/internal/extended_java_wrapper.py,sha256=hwP0133-hDiDf5sBF-P3MtUsuuDj1PpQbtGZQIRwzfk,2240
|
|
@@ -214,6 +216,8 @@ sparknlp/pretrained/__init__.py,sha256=GV-x9UBK8F2_IR6zYatrzFcVJtkSUIMbxqWsxRUeP
|
|
|
214
216
|
sparknlp/pretrained/pretrained_pipeline.py,sha256=lquxiaABuA68Rmu7csamJPqBoRJqMUO0oNHsmEZDAIs,5740
|
|
215
217
|
sparknlp/pretrained/resource_downloader.py,sha256=8_-rpvO2LsX_Lq4wMPif2ca3RlJZWEabt8pDm2xymiI,7806
|
|
216
218
|
sparknlp/pretrained/utils.py,sha256=T1MrvW_DaWk_jcOjVLOea0NMFE9w8fe0ZT_5urZ_nEY,1099
|
|
219
|
+
sparknlp/reader/__init__.py,sha256=-Toj3AIBki-zXPpV8ezFTI2LX1yP_rK2bhpoa8nBkTw,685
|
|
220
|
+
sparknlp/reader/sparknlp_reader.py,sha256=cMliB2zDcmhxp44mu8aRcm5nFK2BXeFCuGgVUkhI8YQ,3825
|
|
217
221
|
sparknlp/training/__init__.py,sha256=qREi9u-5Vc2VjpL6-XZsyvu5jSEIdIhowW7_kKaqMqo,852
|
|
218
222
|
sparknlp/training/conll.py,sha256=wKBiSTrjc6mjsl7Nyt6B8f4yXsDJkZb-sn8iOjix9cE,6961
|
|
219
223
|
sparknlp/training/conllu.py,sha256=8r3i-tmyrLsyk1DtZ9uo2mMDCWb1yw2Y5W6UsV13MkY,4953
|
|
@@ -244,8 +248,8 @@ sparknlp/training/_tf_graph_builders_1x/ner_dl/dataset_encoder.py,sha256=R4yHFN3
|
|
|
244
248
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model.py,sha256=EoCSdcIjqQ3wv13MAuuWrKV8wyVBP0SbOEW41omHlR0,23189
|
|
245
249
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/ner_model_saver.py,sha256=k5CQ7gKV6HZbZMB8cKLUJuZxoZWlP_DFWdZ--aIDwsc,2356
|
|
246
250
|
sparknlp/training/_tf_graph_builders_1x/ner_dl/sentence_grouper.py,sha256=pAxjWhjazSX8Vg0MFqJiuRVw1IbnQNSs-8Xp26L4nko,870
|
|
247
|
-
spark_nlp-5.5.
|
|
248
|
-
spark_nlp-5.5.
|
|
249
|
-
spark_nlp-5.5.
|
|
250
|
-
spark_nlp-5.5.
|
|
251
|
-
spark_nlp-5.5.
|
|
251
|
+
spark_nlp-5.5.3.dist-info/.uuid,sha256=1f6hF51aIuv9yCvh31NU9lOpS34NE-h3a0Et7R9yR6A,36
|
|
252
|
+
spark_nlp-5.5.3.dist-info/METADATA,sha256=rZJcS1xIcl3Vota-hC2wHauvrHO45e9c8Y86MjVt4go,19156
|
|
253
|
+
spark_nlp-5.5.3.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
|
254
|
+
spark_nlp-5.5.3.dist-info/top_level.txt,sha256=uuytur4pyMRw2H_txNY2ZkaucZHUs22QF8-R03ch_-E,13
|
|
255
|
+
spark_nlp-5.5.3.dist-info/RECORD,,
|
sparknlp/__init__.py
CHANGED
|
@@ -12,17 +12,20 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import sys
|
|
16
15
|
import subprocess
|
|
16
|
+
import sys
|
|
17
17
|
import threading
|
|
18
|
+
|
|
19
|
+
from pyspark.conf import SparkConf
|
|
20
|
+
from pyspark.context import SparkContext
|
|
21
|
+
from pyspark.java_gateway import launch_gateway
|
|
18
22
|
from pyspark.sql import SparkSession
|
|
23
|
+
|
|
19
24
|
from sparknlp import annotator
|
|
20
25
|
# Must be declared here one by one or else PretrainedPipeline will fail with AttributeError
|
|
21
26
|
from sparknlp.base import DocumentAssembler, MultiDocumentAssembler, Finisher, EmbeddingsFinisher, TokenAssembler, \
|
|
22
27
|
Doc2Chunk, AudioAssembler, GraphFinisher, ImageAssembler, TableAssembler
|
|
23
|
-
from
|
|
24
|
-
from pyspark.context import SparkContext
|
|
25
|
-
from pyspark.java_gateway import launch_gateway
|
|
28
|
+
from sparknlp.reader import SparkNLPReader
|
|
26
29
|
|
|
27
30
|
sys.modules['com.johnsnowlabs.nlp.annotators'] = annotator
|
|
28
31
|
sys.modules['com.johnsnsowlabs.nlp.annotators.tokenizer'] = annotator
|
|
@@ -129,7 +132,7 @@ def start(gpu=False,
|
|
|
129
132
|
The initiated Spark session.
|
|
130
133
|
|
|
131
134
|
"""
|
|
132
|
-
current_version = "5.5.
|
|
135
|
+
current_version = "5.5.3"
|
|
133
136
|
|
|
134
137
|
if params is None:
|
|
135
138
|
params = {}
|
|
@@ -301,6 +304,9 @@ def start(gpu=False,
|
|
|
301
304
|
spark_session = start_without_realtime_output()
|
|
302
305
|
return spark_session
|
|
303
306
|
|
|
307
|
+
def read(params=None):
|
|
308
|
+
spark_session = start()
|
|
309
|
+
return SparkNLPReader(spark_session, params)
|
|
304
310
|
|
|
305
311
|
def version():
|
|
306
312
|
"""Returns the current Spark NLP version.
|
|
@@ -310,4 +316,4 @@ def version():
|
|
|
310
316
|
str
|
|
311
317
|
The current Spark NLP version.
|
|
312
318
|
"""
|
|
313
|
-
return '5.5.
|
|
319
|
+
return '5.5.3'
|
|
@@ -16,3 +16,4 @@ from sparknlp.annotator.cv.swin_for_image_classification import *
|
|
|
16
16
|
from sparknlp.annotator.cv.convnext_for_image_classification import *
|
|
17
17
|
from sparknlp.annotator.cv.vision_encoder_decoder_for_image_captioning import *
|
|
18
18
|
from sparknlp.annotator.cv.clip_for_zero_shot_classification import *
|
|
19
|
+
from sparknlp.annotator.cv.blip_for_question_answering import *
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Copyright 2017-2024 John Snow Labs
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from sparknlp.common import *
|
|
16
|
+
|
|
17
|
+
class BLIPForQuestionAnswering(AnnotatorModel,
|
|
18
|
+
HasBatchedAnnotateImage,
|
|
19
|
+
HasImageFeatureProperties,
|
|
20
|
+
HasEngine,
|
|
21
|
+
HasCandidateLabelsProperties,
|
|
22
|
+
HasRescaleFactor):
|
|
23
|
+
"""BLIPForQuestionAnswering can load BLIP models for visual question answering.
|
|
24
|
+
The model consists of a vision encoder, a text encoder as well as a text decoder.
|
|
25
|
+
The vision encoder will encode the input image, the text encoder will encode the input question together
|
|
26
|
+
with the encoding of the image, and the text decoder will output the answer to the question.
|
|
27
|
+
|
|
28
|
+
Pretrained models can be loaded with :meth:`.pretrained` of the companion
|
|
29
|
+
object:
|
|
30
|
+
|
|
31
|
+
>>> visualQAClassifier = BLIPForQuestionAnswering.pretrained() \\
|
|
32
|
+
... .setInputCols(["image_assembler"]) \\
|
|
33
|
+
... .setOutputCol("answer")
|
|
34
|
+
|
|
35
|
+
The default model is ``"blip_vqa_base"``, if no name is
|
|
36
|
+
provided.
|
|
37
|
+
|
|
38
|
+
For available pretrained models please see the `Models Hub
|
|
39
|
+
<https://sparknlp.org/models?task=Question+Answering>`__.
|
|
40
|
+
|
|
41
|
+
To see which models are compatible and how to import them see
|
|
42
|
+
`Import Transformers into Spark NLP 🚀
|
|
43
|
+
<https://github.com/JohnSnowLabs/spark-nlp/discussions/5669>`_.
|
|
44
|
+
|
|
45
|
+
====================== ======================
|
|
46
|
+
Input Annotation types Output Annotation type
|
|
47
|
+
====================== ======================
|
|
48
|
+
``IMAGE`` ``DOCUMENT``
|
|
49
|
+
====================== ======================
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
batchSize
|
|
54
|
+
Batch size. Large values allows faster processing but requires more
|
|
55
|
+
memory, by default 2
|
|
56
|
+
configProtoBytes
|
|
57
|
+
ConfigProto from tensorflow, serialized into byte array.
|
|
58
|
+
maxSentenceLength
|
|
59
|
+
Max sentence length to process, by default 50
|
|
60
|
+
|
|
61
|
+
Examples
|
|
62
|
+
--------
|
|
63
|
+
>>> import sparknlp
|
|
64
|
+
>>> from sparknlp.base import *
|
|
65
|
+
>>> from sparknlp.annotator import *
|
|
66
|
+
>>> from pyspark.ml import Pipeline
|
|
67
|
+
>>> image_df = SparkSessionForTest.spark.read.format("image").load(path=images_path)
|
|
68
|
+
>>> test_df = image_df.withColumn("text", lit("What's this picture about?"))
|
|
69
|
+
>>> imageAssembler = ImageAssembler() \\
|
|
70
|
+
... .setInputCol("image") \\
|
|
71
|
+
... .setOutputCol("image_assembler")
|
|
72
|
+
>>> visualQAClassifier = BLIPForQuestionAnswering.pretrained() \\
|
|
73
|
+
... .setInputCols("image_assembler") \\
|
|
74
|
+
... .setOutputCol("answer") \\
|
|
75
|
+
... .setSize(384)
|
|
76
|
+
>>> pipeline = Pipeline().setStages([
|
|
77
|
+
... imageAssembler,
|
|
78
|
+
... visualQAClassifier
|
|
79
|
+
... ])
|
|
80
|
+
>>> result = pipeline.fit(test_df).transform(test_df)
|
|
81
|
+
>>> result.select("image_assembler.origin", "answer.result").show(false)
|
|
82
|
+
+--------------------------------------+------+
|
|
83
|
+
|origin |result|
|
|
84
|
+
+--------------------------------------+------+
|
|
85
|
+
|[file:///content/images/cat_image.jpg]|[cats]|
|
|
86
|
+
+--------------------------------------+------+
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
name = "BLIPForQuestionAnswering"
|
|
90
|
+
|
|
91
|
+
inputAnnotatorTypes = [AnnotatorType.IMAGE]
|
|
92
|
+
|
|
93
|
+
outputAnnotatorType = AnnotatorType.DOCUMENT
|
|
94
|
+
|
|
95
|
+
configProtoBytes = Param(Params._dummy(),
|
|
96
|
+
"configProtoBytes",
|
|
97
|
+
"ConfigProto from tensorflow, serialized into byte array. Get with "
|
|
98
|
+
"config_proto.SerializeToString()",
|
|
99
|
+
TypeConverters.toListInt)
|
|
100
|
+
|
|
101
|
+
maxSentenceLength = Param(Params._dummy(),
|
|
102
|
+
"maxSentenceLength",
|
|
103
|
+
"Maximum sentence length that the annotator will process. Above this, the sentence is skipped",
|
|
104
|
+
typeConverter=TypeConverters.toInt)
|
|
105
|
+
|
|
106
|
+
def setMaxSentenceSize(self, value):
|
|
107
|
+
"""Sets Maximum sentence length that the annotator will process, by
|
|
108
|
+
default 50.
|
|
109
|
+
|
|
110
|
+
Parameters
|
|
111
|
+
----------
|
|
112
|
+
value : int
|
|
113
|
+
Maximum sentence length that the annotator will process
|
|
114
|
+
"""
|
|
115
|
+
return self._set(maxSentenceLength=value)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@keyword_only
|
|
119
|
+
def __init__(self, classname="com.johnsnowlabs.nlp.annotators.cv.BLIPForQuestionAnswering",
|
|
120
|
+
java_model=None):
|
|
121
|
+
super(BLIPForQuestionAnswering, self).__init__(
|
|
122
|
+
classname=classname,
|
|
123
|
+
java_model=java_model
|
|
124
|
+
)
|
|
125
|
+
self._setDefault(
|
|
126
|
+
batchSize=2,
|
|
127
|
+
size=384,
|
|
128
|
+
maxSentenceLength=50
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
@staticmethod
|
|
132
|
+
def loadSavedModel(folder, spark_session):
|
|
133
|
+
"""Loads a locally saved model.
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
folder : str
|
|
138
|
+
Folder of the saved model
|
|
139
|
+
spark_session : pyspark.sql.SparkSession
|
|
140
|
+
The current SparkSession
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
CLIPForZeroShotClassification
|
|
145
|
+
The restored model
|
|
146
|
+
"""
|
|
147
|
+
from sparknlp.internal import _BLIPForQuestionAnswering
|
|
148
|
+
jModel = _BLIPForQuestionAnswering(folder, spark_session._jsparkSession)._java_obj
|
|
149
|
+
return BLIPForQuestionAnswering(java_model=jModel)
|
|
150
|
+
|
|
151
|
+
@staticmethod
|
|
152
|
+
def pretrained(name="blip_vqa_base", lang="en", remote_loc=None):
|
|
153
|
+
"""Downloads and loads a pretrained model.
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
name : str, optional
|
|
158
|
+
Name of the pretrained model, by default
|
|
159
|
+
"blip_vqa_tf"
|
|
160
|
+
lang : str, optional
|
|
161
|
+
Language of the pretrained model, by default "en"
|
|
162
|
+
remote_loc : str, optional
|
|
163
|
+
Optional remote address of the resource, by default None. Will use
|
|
164
|
+
Spark NLPs repositories otherwise.
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
CLIPForZeroShotClassification
|
|
169
|
+
The restored model
|
|
170
|
+
"""
|
|
171
|
+
from sparknlp.pretrained import ResourceDownloader
|
|
172
|
+
return ResourceDownloader.downloadModel(BLIPForQuestionAnswering, name, lang, remote_loc)
|
|
@@ -40,3 +40,4 @@ from sparknlp.annotator.embeddings.uae_embeddings import *
|
|
|
40
40
|
from sparknlp.annotator.embeddings.mxbai_embeddings import *
|
|
41
41
|
from sparknlp.annotator.embeddings.snowflake_embeddings import *
|
|
42
42
|
from sparknlp.annotator.embeddings.nomic_embeddings import *
|
|
43
|
+
from sparknlp.annotator.embeddings.auto_gguf_embeddings import *
|