PyPI - spark-nlp - Versions diffs - 6.0.1rc1__py2.py3-none-any.whl → 6.0.3__py2.py3-none-any.whl - Mend

spark-nlp 6.0.1rc1py2.py3-none-any.whl → 6.0.3py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of spark-nlp might be problematic. Click here for more details.

Files changed (39) hide show

{spark_nlp-6.0.1rc1.dist-info → spark_nlp-6.0.3.dist-info}/METADATA +13 -6
{spark_nlp-6.0.1rc1.dist-info → spark_nlp-6.0.3.dist-info}/RECORD +39 -32
{spark_nlp-6.0.1rc1.dist-info → spark_nlp-6.0.3.dist-info}/WHEEL +1 -1
sparknlp/__init__.py +4 -2
sparknlp/annotator/cv/__init__.py +2 -0
sparknlp/annotator/cv/florence2_transformer.py +180 -0
sparknlp/annotator/cv/gemma3_for_multimodal.py +5 -10
sparknlp/annotator/cv/internvl_for_multimodal.py +280 -0
sparknlp/annotator/cv/janus_for_multimodal.py +8 -13
sparknlp/annotator/cv/llava_for_multimodal.py +1 -1
sparknlp/annotator/cv/paligemma_for_multimodal.py +7 -7
sparknlp/annotator/cv/phi3_vision_for_multimodal.py +1 -1
sparknlp/annotator/cv/qwen2vl_transformer.py +1 -1
sparknlp/annotator/cv/smolvlm_transformer.py +7 -13
sparknlp/annotator/date2_chunk.py +1 -1
sparknlp/annotator/document_character_text_splitter.py +8 -8
sparknlp/annotator/document_token_splitter.py +7 -7
sparknlp/annotator/embeddings/__init__.py +1 -0
sparknlp/annotator/embeddings/bge_embeddings.py +21 -19
sparknlp/annotator/embeddings/e5v_embeddings.py +138 -0
sparknlp/annotator/embeddings/snowflake_embeddings.py +15 -15
sparknlp/annotator/openai/openai_completion.py +3 -4
sparknlp/annotator/seq2seq/m2m100_transformer.py +1 -1
sparknlp/annotator/seq2seq/mistral_transformer.py +2 -3
sparknlp/annotator/seq2seq/nllb_transformer.py +1 -1
sparknlp/annotator/seq2seq/qwen_transformer.py +26 -25
sparknlp/annotator/spell_check/context_spell_checker.py +1 -1
sparknlp/base/prompt_assembler.py +1 -1
sparknlp/common/properties.py +7 -7
sparknlp/internal/__init__.py +27 -0
sparknlp/partition/__init__.py +16 -0
sparknlp/partition/partition.py +244 -0
sparknlp/partition/partition_properties.py +319 -0
sparknlp/partition/partition_transformer.py +200 -0
sparknlp/reader/pdf_to_text.py +50 -4
sparknlp/reader/sparknlp_reader.py +101 -52
sparknlp/training/spacy_to_annotation.py +7 -7
sparknlp/util.py +26 -0
{spark_nlp-6.0.1rc1.dist-info → spark_nlp-6.0.3.dist-info}/top_level.txt +0 -0

sparknlp/annotator/embeddings/e5v_embeddings.py ADDED Viewed

@@ -0,0 +1,138 @@
+#  Copyright 2017-2024 John Snow Labs
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+from sparknlp.common import *
+class E5VEmbeddings(AnnotatorModel,
+                   HasBatchedAnnotateImage,
+                   HasImageFeatureProperties,
+                   HasEngine,
+                    HasRescaleFactor):
+    """Universal multimodal embeddings using the E5-V model (see https://huggingface.co/royokong/e5-v).
+    E5-V bridges the modality gap between different input types (text, image) and demonstrates strong performance in multimodal embeddings, even without fine-tuning. It also supports a single-modality training approach, where the model is trained exclusively on text pairs, often yielding better performance than multimodal training.
+    Pretrained models can be loaded with :meth:`.pretrained` of the companion object:
+    >>> e5vEmbeddings = E5VEmbeddings.pretrained() \
+    ...     .setInputCols(["image_assembler"]) \
+    ...     .setOutputCol("e5v")
+    The default model is ``"e5v_int4"``, if no name is provided.
+    For available pretrained models please see the `Models Hub <https://sparknlp.org/models?task=Question+Answering>`__.
+    ====================== ======================
+    Input Annotation types Output Annotation type
+    ====================== ======================
+    ``IMAGE``              ``SENTENCE_EMBEDDINGS``
+    ====================== ======================
+    Examples
+    --------
+    Image + Text Embedding:
+    >>> import sparknlp
+    >>> from sparknlp.base import *
+    >>> from sparknlp.annotator import *
+    >>> from pyspark.ml import Pipeline
+    >>> image_df = spark.read.format("image").option("dropInvalid", value = True).load(imageFolder)
+    >>> imagePrompt = "<|start_header_id|>user<|end_header_id|>\n\n<image>\\nSummary above image in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n"
+    >>> test_df = image_df.withColumn("text", lit(imagePrompt))
+    >>> imageAssembler = ImageAssembler() \
+    ...     .setInputCol("image") \
+    ...     .setOutputCol("image_assembler")
+    >>> e5vEmbeddings = E5VEmbeddings.pretrained() \
+    ...     .setInputCols(["image_assembler"]) \
+    ...     .setOutputCol("e5v")
+    >>> pipeline = Pipeline().setStages([
+    ...     imageAssembler,
+    ...     e5vEmbeddings
+    ... ])
+    >>> result = pipeline.fit(test_df).transform(test_df)
+    >>> result.select("e5v.embeddings").show(truncate = False)
+    Text-Only Embedding:
+    >>> from sparknlp.util import EmbeddingsDataFrameUtils
+    >>> textPrompt = "<|start_header_id|>user<|end_header_id|>\n\n<sent>\\nSummary above sentence in one word: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n \n"
+    >>> textDesc = "A cat sitting in a box."
+    >>> nullImageDF = spark.createDataFrame(spark.sparkContext.parallelize([EmbeddingsDataFrameUtils.emptyImageRow]), EmbeddingsDataFrameUtils.imageSchema)
+    >>> textDF = nullImageDF.withColumn("text", lit(textPrompt.replace("<sent>", textDesc)))
+    >>> e5vEmbeddings = E5VEmbeddings.pretrained() \
+    ...     .setInputCols(["image"]) \
+    ...     .setOutputCol("e5v")
+    >>> result = e5vEmbeddings.transform(textDF)
+    >>> result.select("e5v.embeddings").show(truncate = False)
+    """
+    name = "E5VEmbeddings"
+    inputAnnotatorTypes = [AnnotatorType.IMAGE]
+    outputAnnotatorType = AnnotatorType.SENTENCE_EMBEDDINGS
+    @keyword_only
+    def __init__(self, classname="com.johnsnowlabs.nlp.embeddings.E5VEmbeddings", java_model=None):
+        """Initializes the E5VEmbeddings annotator.
+        Parameters
+        ----------
+        classname : str, optional
+            The Java class name of the annotator, by default "com.johnsnowlabs.nlp.annotators.embeddings.E5VEmbeddings"
+        java_model : Optional[java.lang.Object], optional
+            A pre-initialized Java model, by default None
+        """
+        super(E5VEmbeddings, self).__init__(classname=classname, java_model=java_model)
+        self._setDefault()
+    @staticmethod
+    def loadSavedModel(folder, spark_session, use_openvino=False):
+        """Loads a locally saved model.
+        Parameters
+        ----------
+        folder : str
+            Folder of the saved model
+        spark_session : pyspark.sql.SparkSession
+            The current SparkSession
+        use_openvino : bool, optional
+            Whether to use OpenVINO engine, by default False
+        Returns
+        -------
+        E5VEmbeddings
+            The restored model
+        """
+        from sparknlp.internal import _E5VEmbeddingsLoader
+        jModel = _E5VEmbeddingsLoader(folder, spark_session._jsparkSession, use_openvino)._java_obj
+        return E5VEmbeddings(java_model=jModel)
+    @staticmethod
+    def pretrained(name="e5v_int4", lang="en", remote_loc=None):
+        """Downloads and loads a pretrained model.
+        Parameters
+        ----------
+        name : str, optional
+            Name of the pretrained model, by default "e5v_int4"
+        lang : str, optional
+            Language of the pretrained model, by default "en"
+        remote_loc : str, optional
+            Optional remote address of the resource, by default None. Will use Spark NLPs repositories otherwise.
+        Returns
+        -------
+        E5VEmbeddings
+            The restored model
+        """
+        from sparknlp.pretrained import ResourceDownloader
+        return ResourceDownloader.downloadModel(E5VEmbeddings, name, lang, remote_loc)

sparknlp/annotator/embeddings/snowflake_embeddings.py CHANGED Viewed

@@ -47,21 +47,7 @@ class SnowFlakeEmbeddings(AnnotatorModel,
 	``DOCUMENT``            ``SENTENCE_EMBEDDINGS``
 	====================== ======================
-	Parameters
-	----------
-	batchSize
-		Size of every batch , by default 8
-	dimension
-		Number of embedding dimensions, by default 768
-	caseSensitive
-		Whether to ignore case in tokens for embeddings matching, by default False
-	maxSentenceLength
-		Max sentence length to process, by default 512
-	configProtoBytes
-		ConfigProto from tensorflow, serialized into byte array.
-	References
-	----------
+	**References**
 	`Arctic-Embed: Scalable, Efficient, and Accurate Text Embedding Models <https://arxiv.org/abs/2405.05374>`__
 	`Snowflake Arctic-Embed Models <https://github.com/Snowflake-Labs/arctic-embed>`__
@@ -78,6 +64,20 @@ class SnowFlakeEmbeddings(AnnotatorModel,
      data curation is crucial to retrieval accuracy. A detailed technical report will be available
      shortly. *
+	Parameters
+	----------
+	batchSize
+		Size of every batch , by default 8
+	dimension
+		Number of embedding dimensions, by default 768
+	caseSensitive
+		Whether to ignore case in tokens for embeddings matching, by default False
+	maxSentenceLength
+		Max sentence length to process, by default 512
+	configProtoBytes
+		ConfigProto from tensorflow, serialized into byte array.
 	Examples
 	--------
 	>>> import sparknlp

sparknlp/annotator/openai/openai_completion.py CHANGED Viewed

@@ -63,7 +63,6 @@ class OpenAICompletion(AnnotatorModel):
    >>> from sparknlp.annotator import *
    >>> from sparknlp.common import *
    >>> from pyspark.ml import Pipeline
    >>> documentAssembler = DocumentAssembler() \\
    ...     .setInputCol("text") \\
    ...     .setOutputCol("document")
@@ -83,9 +82,9 @@ class OpenAICompletion(AnnotatorModel):
    +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    |completion                                                                                                                                                                                                                                                                                        |
    +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-   |[{document, 0, 258, \n\nI had the pleasure of dining at La Fiorita recently, and it was a truly delightful experience! The menu boasted a wonderful selection of classic Italian dishes, all exquisitely prepared and presented. The service staff was friendly and attentive and really, {}, []}]|
-   |[{document, 0, 227, \n\nI recently visited Barbecue Joe's for dinner and it was amazing! The menu had so many items to choose from including pulled pork, smoked turkey, brisket, pork ribs, and sandwiches. I opted for the pulled pork sandwich and let, {}, []}]                               |
-   |[{document, 0, 172, \n\n{ \n   "review": { \n      "overallRating": 4, \n      "reviewBody": "I enjoyed my meal at this restaurant. The food was flavourful, well-prepared and beautifully presented., {}, []}]                                                                                   |
+   |[{document, 0, 258, \\n\\nI had the pleasure of dining at La Fiorita recently, and it was a truly delightful experience! The menu boasted a wonderful selection of classic Italian dishes, all exquisitely prepared and presented. The service staff was friendly and attentive and really, {}, []}]|
+   |[{document, 0, 227, \\n\\nI recently visited Barbecue Joe's for dinner and it was amazing! The menu had so many items to choose from including pulled pork, smoked turkey, brisket, pork ribs, and sandwiches. I opted for the pulled pork sandwich and let, {}, []}]                               |
+   |[{document, 0, 172, \\n\\n{ \\n   "review": { \\n      "overallRating": 4, \\n      "reviewBody": "I enjoyed my meal at this restaurant. The food was flavourful, well-prepared and beautifully presented., {}, []}]                                                                                   |
    +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
    """

sparknlp/annotator/seq2seq/m2m100_transformer.py CHANGED Viewed

@@ -77,7 +77,7 @@ class M2M100Transformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
         Target Language (Default: `fr`)
     Languages Covered
-    -----
+    -----------------
     Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijani (az), Bashkir (ba),
     Belarusian (be), Bulgarian (bg), Bengali (bn), Breton (br), Bosnian (bs), Catalan; Valencian
     (ca), Cebuano (ceb), Czech (cs), Welsh (cy), Danish (da), German (de), Greeek (el), English

sparknlp/annotator/seq2seq/mistral_transformer.py CHANGED Viewed

@@ -91,8 +91,7 @@ class MistralTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
     References
     ----------
-    - `Mistral 7B
-      <https://mistral.ai/news/announcing-mistral_7b/>`__
+    - `Mistral 7B <https://mistral.ai/news/announcing-mistral_7b/>`__
     - https://github.com/mistralai/mistral-src
     **Paper Abstract:**
@@ -126,7 +125,7 @@ class MistralTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
     +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     |result                                                                                                                                                                                              |
     +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    |[Leonardo Da Vinci invented the microscope?\n Question: Leonardo Da Vinci invented the microscope?\n Answer: No, Leonardo Da Vinci did not invent the microscope. The first microscope was invented |
+    |[Leonardo Da Vinci invented the microscope?\\n Question: Leonardo Da Vinci invented the microscope?\\n Answer: No, Leonardo Da Vinci did not invent the microscope. The first microscope was invented |
     | in the late 16th century, long after Leonardo']                                                                                                                                                    |
     -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     """

sparknlp/annotator/seq2seq/nllb_transformer.py CHANGED Viewed

@@ -77,7 +77,7 @@ class NLLBTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
         Target Language (Default: `fr`)
     Languages Covered
-    -----
+    -----------------
     Acehnese (Arabic script) (ace_Arab), Acehnese (Latin script) (ace_Latn), Mesopotamian Arabic
     (acm_Arab), Ta’izzi-Adeni Arabic (acq_Arab), Tunisian Arabic (aeb_Arab), Afrikaans (afr_Latn),
     South Levantine Arabic (ajp_Arab), Akan (aka_Latn), Amharic (amh_Ethi), North Levantine Arabic

sparknlp/annotator/seq2seq/qwen_transformer.py CHANGED Viewed

@@ -52,6 +52,32 @@ class QwenTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
     ``DOCUMENT``           ``DOCUMENT``
     ====================== ======================
+    **References**
+    - `Qwen Technical Report
+      <https://arxiv.org/pdf/2309.16609.pdf>`__
+    - https://qwenlm.github.io/blog/qwen1.5/
+    - https://github.com/QwenLM/Qwen1.5
+    **Paper Abstract:**
+    *Large language models (LLMs) have revolutionized the field of artificial intelligence,
+    enabling natural language processing tasks that were previously thought to be exclusive to
+    humans. In this work, we introduce Qwen, the first installment of our large language model
+    series. Qwen is a comprehensive language model series that encompasses distinct models with
+    varying parameter counts. It includes Qwen, the base pretrained language models, and
+    Qwen-Chat, the chat models finetuned with human alignment techniques. The base language models
+    consistently demonstrate superior performance across a multitude of downstream tasks, and the
+    chat models, particularly those trained using Reinforcement Learning from Human Feedback
+    (RLHF), are highly competitive. The chat models possess advanced tool-use and planning
+    capabilities for creating agent applications, showcasing impressive performance even when
+    compared to bigger models on complex tasks like utilizing a code interpreter. Furthermore, we
+    have developed coding-specialized models, Code-Qwen and Code-Qwen-Chat, as well as
+    mathematics-focused models, Math-Qwen-Chat, which are built upon base language models. These
+    models demonstrate significantly improved performance in comparison with open-source models,
+    and slightly fall behind the proprietary models.*
     Parameters
     ----------
     configProtoBytes
@@ -87,31 +113,6 @@ class QwenTransformer(AnnotatorModel, HasBatchedAnnotate, HasEngine):
     This is a very computationally expensive module especially on larger
     sequence. The use of an accelerator such as GPU is recommended.
-    References
-    ----------
-    - `Qwen Technical Report
-      <https://arxiv.org/pdf/2309.16609.pdf>`__
-    - https://qwenlm.github.io/blog/qwen1.5/
-    - https://github.com/QwenLM/Qwen1.5
-    **Paper Abstract:**
-    *Large language models (LLMs) have revolutionized the field of artificial intelligence,
-    enabling natural language processing tasks that were previously thought to be exclusive to
-    humans. In this work, we introduce Qwen, the first installment of our large language model
-    series. Qwen is a comprehensive language model series that encompasses distinct models with
-    varying parameter counts. It includes Qwen, the base pretrained language models, and
-    Qwen-Chat, the chat models finetuned with human alignment techniques. The base language models
-    consistently demonstrate superior performance across a multitude of downstream tasks, and the
-    chat models, particularly those trained using Reinforcement Learning from Human Feedback
-    (RLHF), are highly competitive. The chat models possess advanced tool-use and planning
-    capabilities for creating agent applications, showcasing impressive performance even when
-    compared to bigger models on complex tasks like utilizing a code interpreter. Furthermore, we
-    have developed coding-specialized models, Code-Qwen and Code-Qwen-Chat, as well as
-    mathematics-focused models, Math-Qwen-Chat, which are built upon base language models. These
-    models demonstrate significantly improved performance in comparison with open-source models,
-    and slightly fall behind the proprietary models.*
     Examples
     --------
     >>> import sparknlp

sparknlp/annotator/spell_check/context_spell_checker.py CHANGED Viewed

@@ -565,7 +565,7 @@ class ContextSpellCheckerModel(AnnotatorModel, HasEngine):
     References
-    -------------
+    ----------
     For an in-depth explanation of the module see the article `Applying Context
     Aware Spell Checking in Spark NLP
     <https://medium.com/spark-nlp/applying-context-aware-spell-checking-in-spark-nlp-3c29c46963bc>`__.

sparknlp/base/prompt_assembler.py CHANGED Viewed

@@ -122,7 +122,7 @@ class PromptAssembler(AnnotatorTransformer):
     +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     |result                                                                                                                                                                                                                                                                                                                      |
     +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    |[<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello there, how can I help you?<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nI need help with organizing my room.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n]|
+    |[<|start_header_id|>system<|end_header_id|>\\n\\nYou are a helpful assistant.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\nHello there, how can I help you?<|eot_id|><|start_header_id|>user<|end_header_id|>\\n\\nI need help with organizing my room.<|eot_id|><|start_header_id|>assistant<|end_header_id|>\\n\\n]|
     +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     """

sparknlp/common/properties.py CHANGED Viewed

@@ -38,7 +38,7 @@ class HasBatchedAnnotate:
         int
             Current batch size
         """
-        return self.getOrDefault("batchSize")
+        return self.getOrDefault(self.batchSize)
 class HasCaseSensitiveProperties:
@@ -245,7 +245,7 @@ class HasBatchedAnnotateImage:
         int
             Current batch size
         """
-        return self.getOrDefault("batchSize")
+        return self.getOrDefault(self.batchSize)
 class HasImageFeatureProperties:
@@ -402,7 +402,7 @@ class HasBatchedAnnotateAudio:
         int
             Current batch size
         """
-        return self.getOrDefault("batchSize")
+        return self.getOrDefault(self.batchSize)
 class HasAudioFeatureProperties:
@@ -1099,7 +1099,7 @@ class HasLlamaCppProperties:
         return self._set(flashAttention=flashAttention)
     def setInputPrefixBos(self, inputPrefixBos: bool):
-        """Whether to add prefix BOS to user inputs, preceding the `--in-prefix` bool"""
+        """Whether to add prefix BOS to user inputs, preceding the `--in-prefix` string"""
         return self._set(inputPrefixBos=inputPrefixBos)
     def setUseMmap(self, useMmap: bool):
@@ -1114,7 +1114,7 @@ class HasLlamaCppProperties:
         """Whether to disable KV offload"""
         return self._set(noKvOffload=noKvOffload)
-    def setSystemPrompt(self, systemPrompt: bool):
+    def setSystemPrompt(self, systemPrompt: str):
         """Set a system prompt to use"""
         return self._set(systemPrompt=systemPrompt)
@@ -1219,7 +1219,7 @@ class HasLlamaCppProperties:
         """Set the amount of tokens the samplers should return at least (0 = disabled)"""
         return self._set(minKeep=minKeep)
-    def setGrammar(self, grammar: bool):
+    def setGrammar(self, grammar: str):
         """Set BNF-like grammar to constrain generations"""
         return self._set(grammar=grammar)
@@ -1261,7 +1261,7 @@ class HasLlamaCppProperties:
         return self._call_java("setTokenBias", tokenBias)
     def setLoraAdapters(self, loraAdapters: Dict[str, float]):
-        """Set token id bias"""
+        """Set LoRA adapters with their scaling factors"""
         return self._call_java("setLoraAdapters", loraAdapters)
     def getMetadata(self):

sparknlp/internal/__init__.py CHANGED Viewed

@@ -281,6 +281,16 @@ class _Gemma3ForMultiModalLoader(ExtendedJavaWrapper):
             use_openvino
         )
+class _InternVLForMultiModalLoader(ExtendedJavaWrapper):
+    def __init__(self, path, jspark, use_openvino=False):
+        super(_InternVLForMultiModalLoader, self).__init__(
+            "com.johnsnowlabs.nlp.annotators.cv.InternVLForMultiModal.loadSavedModel",
+            path,
+            jspark,
+            use_openvino
+        )
 class _JanusForMultiModalLoader(ExtendedJavaWrapper):
     def __init__(self, path, jspark, use_openvino=False):
         super(_JanusForMultiModalLoader, self).__init__(
@@ -1146,3 +1156,20 @@ class _SmolVLMTransformerLoader(ExtendedJavaWrapper):
             jspark,
             use_openvino
         )
+class _Florence2TransformerLoader(ExtendedJavaWrapper):
+    def __init__(self, path, jspark, use_openvino=False):
+        super(_Florence2TransformerLoader, self).__init__(
+            "com.johnsnowlabs.nlp.annotators.cv.Florence2Transformer.loadSavedModel",
+            path,
+            jspark,
+            use_openvino,
+        )
+class _E5VEmbeddingsLoader(ExtendedJavaWrapper):
+    def __init__(self, path, jspark, use_openvino=False):
+        super(_E5VEmbeddingsLoader, self).__init__(
+            "com.johnsnowlabs.nlp.embeddings.E5VEmbeddings.loadSavedModel",
+            path,
+            jspark,
+            use_openvino
+        )

sparknlp/partition/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+#  Copyright 2017-2025 John Snow Labs
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""Module to read various types of documents into chunks"""
+from sparknlp.partition.partition import *
+from sparknlp.partition.partition_transformer import *

spark-nlp 6.0.1rc1__py2.py3-none-any.whl → 6.0.3__py2.py3-none-any.whl

Potentially problematic release.

spark-nlp 6.0.1rc1py2.py3-none-any.whl → 6.0.3py2.py3-none-any.whl