PyPI - unstructured-ingest - Versions diffs - 0.0.19__py3-none-any.whl → 0.0.22__py3-none-any.whl - Mend

unstructured-ingest 0.0.19py3-none-any.whl → 0.0.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (47) hide show

unstructured_ingest/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.0.19" # pragma: no cover
1	+ __version__ = "0.0.22" # pragma: no cover

unstructured_ingest/cli/cmds/astradb.py CHANGED Viewed

@@ -37,11 +37,11 @@ class AstraDBCliConfig(SimpleAstraDBConfig, CliConfig):
                 "numbers, and underscores.",
             ),
             click.Option(
-                ["--namespace"],
+                ["--keyspace"],
                 required=False,
                 default=None,
                 type=str,
-                help="The Astra DB connection namespace.",
+                help="The Astra DB connection keyspace.",
             ),
         ]
         return options

unstructured_ingest/connector/astradb.py CHANGED Viewed

@@ -24,7 +24,8 @@ from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
 from unstructured_ingest.utils.dep_check import requires_dependencies
 if t.TYPE_CHECKING:
-    from astrapy.db import AstraDB, AstraDBCollection
+    from astrapy import Collection as AstraDBCollection
+    from astrapy import Database as AstraDB
 NON_INDEXED_FIELDS = ["metadata._node_content", "content"]
@@ -39,6 +40,7 @@ class AstraDBAccessConfig(AccessConfig):
 class SimpleAstraDBConfig(BaseConnectorConfig):
     access_config: AstraDBAccessConfig
     collection_name: str
+    keyspace: t.Optional[str] = None
     namespace: t.Optional[str] = None
@@ -98,22 +100,30 @@ class AstraDBSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
     @requires_dependencies(["astrapy"], extras="astradb")
     def astra_db_collection(self) -> "AstraDBCollection":
         if self._astra_db_collection is None:
-            from astrapy.db import AstraDB
+            from astrapy import DataAPIClient as AstraDBClient
-            # Build the Astra DB object.
+            # Choose keyspace or deprecated namespace
+            keyspace_param = self.connector_config.keyspace or self.connector_config.namespace
+            # Create a client object to interact with the Astra DB
             # caller_name/version for Astra DB tracking
-            self._astra_db = AstraDB(
-                api_endpoint=self.connector_config.access_config.api_endpoint,
-                token=self.connector_config.access_config.token,
-                namespace=self.connector_config.namespace,
+            my_client = AstraDBClient(
                 caller_name=integration_name,
                 caller_version=integration_version,
             )
-            # Create and connect to the collection
-            self._astra_db_collection = self._astra_db.collection(
-                collection_name=self.connector_config.collection_name,
+            # Get the database object
+            self._astra_db = my_client.get_database(
+                api_endpoint=self.connector_config.access_config.api_endpoint,
+                token=self.connector_config.access_config.token,
+                keyspace=keyspace_param,
             )
+            # Create and connect to the newly created collection
+            self._astra_db_collection = self._astra_db.get_collection(
+                name=self.connector_config.collection_name,
+            )
         return self._astra_db_collection  # type: ignore
     @requires_dependencies(["astrapy"], extras="astradb")
@@ -132,8 +142,14 @@ class AstraDBSourceConnector(SourceConnectorCleanupMixin, BaseSourceConnector):
     @requires_dependencies(["astrapy"], extras="astradb")
     def get_ingest_docs(self):  # type: ignore
         # Perform the find operation
-        astra_db_docs = list(self.astra_db_collection.paginated_find())
+        astra_db_docs_cursor = self.astra_db_collection.find({})
+        # Iterate over the cursor
+        astra_db_docs = []
+        for result in astra_db_docs_cursor:
+            astra_db_docs.append(result)
+        # Create a list of AstraDBIngestDoc objects
         doc_list = []
         for record in astra_db_docs:
             doc = AstraDBIngestDoc(
@@ -182,30 +198,41 @@ class AstraDBDestinationConnector(BaseDestinationConnector):
     @requires_dependencies(["astrapy"], extras="astradb")
     def astra_db_collection(self) -> "AstraDBCollection":
         if self._astra_db_collection is None:
-            from astrapy.db import AstraDB
+            from astrapy import DataAPIClient as AstraDBClient
+            from astrapy.exceptions import CollectionAlreadyExistsException
+            # Choose keyspace or deprecated namespace
+            keyspace_param = self.connector_config.keyspace or self.connector_config.namespace
             collection_name = self.connector_config.collection_name
             embedding_dimension = self.write_config.embedding_dimension
-            # If the user has requested an indexing policy, pass it to the Astra DB
             requested_indexing_policy = self.write_config.requested_indexing_policy
-            options = {"indexing": requested_indexing_policy} if requested_indexing_policy else None
+            # Create a client object to interact with the Astra DB
             # caller_name/version for Astra DB tracking
-            self._astra_db = AstraDB(
-                api_endpoint=self.connector_config.access_config.api_endpoint,
-                token=self.connector_config.access_config.token,
-                namespace=self.connector_config.namespace,
+            my_client = AstraDBClient(
                 caller_name=integration_name,
                 caller_version=integration_version,
             )
-            # Create and connect to the newly created collection
-            self._astra_db_collection = self._astra_db.create_collection(
-                collection_name=collection_name,
-                dimension=embedding_dimension,
-                options=options,
+            # Get the database object
+            self._astra_db = my_client.get_database(
+                api_endpoint=self.connector_config.access_config.api_endpoint,
+                token=self.connector_config.access_config.token,
+                keyspace=keyspace_param,
             )
+            # Create and connect to the newly created collection
+            try:
+                self._astra_db_collection = self._astra_db.create_collection(
+                    name=collection_name,
+                    dimension=embedding_dimension,
+                    indexing=requested_indexing_policy,
+                )
+            except CollectionAlreadyExistsException as e:
+                logger.info(f"{e}", exc_info=True)
+                self._astra_db_collection = self._astra_db.get_collection(name=collection_name)
         return self._astra_db_collection
     @requires_dependencies(["astrapy"], extras="astradb")
@@ -224,6 +251,9 @@ class AstraDBDestinationConnector(BaseDestinationConnector):
     def write_dict(self, *args, elements_dict: t.List[t.Dict[str, t.Any]], **kwargs) -> None:
         logger.info(f"inserting / updating {len(elements_dict)} documents to Astra DB.")
+        if self._astra_db_collection is None:
+            raise DestinationConnectionError("Astra DB collection not available for insertion.")
         astra_db_batch_size = self.write_config.batch_size
         for batch in batch_generator(elements_dict, astra_db_batch_size):

unstructured_ingest/embed/bedrock.py CHANGED Viewed

@@ -1,38 +1,43 @@
+import json
+import os
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, List
+from typing import TYPE_CHECKING
 import numpy as np
-from pydantic import SecretStr
+from pydantic import Field, SecretStr
 from unstructured_ingest.embed.interfaces import BaseEmbeddingEncoder, EmbeddingConfig
 from unstructured_ingest.utils.dep_check import requires_dependencies
 if TYPE_CHECKING:
-    from langchain_community.embeddings import BedrockEmbeddings
+    from botocore.client import BaseClient
+    class BedrockClient(BaseClient):
+        def invoke_model(self, body: str, modelId: str, trace: str) -> dict:
+            pass
 class BedrockEmbeddingConfig(EmbeddingConfig):
     aws_access_key_id: SecretStr
     aws_secret_access_key: SecretStr
     region_name: str = "us-west-2"
+    embed_model_name: str = Field(default="amazon.titan-embed-text-v1", alias="model_name")
     @requires_dependencies(
-        ["boto3", "numpy", "langchain_community"],
+        ["boto3", "numpy", "botocore"],
         extras="bedrock",
     )
-    def get_client(self) -> "BedrockEmbeddings":
+    def get_client(self) -> "BedrockClient":
         # delay import only when needed
         import boto3
-        from langchain_community.embeddings import BedrockEmbeddings
-        bedrock_runtime = boto3.client(
+        bedrock_client = boto3.client(
             service_name="bedrock-runtime",
             aws_access_key_id=self.aws_access_key_id.get_secret_value(),
             aws_secret_access_key=self.aws_secret_access_key.get_secret_value(),
             region_name=self.region_name,
         )
-        bedrock_client = BedrockEmbeddings(client=bedrock_runtime)
         return bedrock_client
@@ -40,28 +45,60 @@ class BedrockEmbeddingConfig(EmbeddingConfig):
 class BedrockEmbeddingEncoder(BaseEmbeddingEncoder):
     config: BedrockEmbeddingConfig
-    def get_exemplary_embedding(self) -> List[float]:
+    def get_exemplary_embedding(self) -> list[float]:
         return self.embed_query(query="Q")
-    def num_of_dimensions(self):
+    def num_of_dimensions(self) -> tuple[int, ...]:
         exemplary_embedding = self.get_exemplary_embedding()
         return np.shape(exemplary_embedding)
-    def is_unit_vector(self):
+    def is_unit_vector(self) -> bool:
         exemplary_embedding = self.get_exemplary_embedding()
         return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
-    def embed_query(self, query):
-        bedrock_client = self.config.get_client()
-        return np.array(bedrock_client.embed_query(query))
-    def embed_documents(self, elements: List[dict]) -> List[dict]:
-        bedrock_client = self.config.get_client()
-        embeddings = bedrock_client.embed_documents([e.get("text", "") for e in elements])
+    def embed_query(self, query: str) -> list[float]:
+        """Call out to Bedrock embedding endpoint."""
+        # replace newlines, which can negatively affect performance.
+        text = query.replace(os.linesep, " ")
+        # format input body for provider
+        provider = self.config.embed_model_name.split(".")[0]
+        input_body = {}
+        if provider == "cohere":
+            if "input_type" not in input_body:
+                input_body["input_type"] = "search_document"
+            input_body["texts"] = [text]
+        else:
+            # includes common provider == "amazon"
+            input_body["inputText"] = text
+        body = json.dumps(input_body)
+        try:
+            bedrock_client = self.config.get_client()
+            # invoke bedrock API
+            response = bedrock_client.invoke_model(
+                body=body,
+                modelId=self.config.embed_model_name,
+                accept="application/json",
+                contentType="application/json",
+            )
+            # format output based on provider
+            response_body = json.loads(response.get("body").read())
+            if provider == "cohere":
+                return response_body.get("embeddings")[0]
+            else:
+                # includes common provider == "amazon"
+                return response_body.get("embedding")
+        except Exception as e:
+            raise ValueError(f"Error raised by inference endpoint: {e}")
+    def embed_documents(self, elements: list[dict]) -> list[dict]:
+        embeddings = [self.embed_query(query=e.get("text", "")) for e in elements]
         elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
         return elements_with_embeddings
-    def _add_embeddings_to_elements(self, elements, embeddings) -> List[dict]:
+    def _add_embeddings_to_elements(self, elements, embeddings) -> list[dict]:
         assert len(elements) == len(embeddings)
         elements_w_embedding = []
         for i, element in enumerate(elements):

unstructured_ingest/embed/huggingface.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Optional
 import numpy as np
 from pydantic import Field
@@ -8,7 +8,7 @@ from unstructured_ingest.embed.interfaces import BaseEmbeddingEncoder, Embedding
 from unstructured_ingest.utils.dep_check import requires_dependencies
 if TYPE_CHECKING:
-    from langchain_huggingface.embeddings import HuggingFaceEmbeddings
+    from sentence_transformers import SentenceTransformer
 class HuggingFaceEmbeddingConfig(EmbeddingConfig):
@@ -19,51 +19,51 @@ class HuggingFaceEmbeddingConfig(EmbeddingConfig):
         default_factory=lambda: {"device": "cpu"}, alias="model_kwargs"
     )
     encode_kwargs: Optional[dict] = Field(default_factory=lambda: {"normalize_embeddings": False})
-    cache_folder: Optional[dict] = Field(default=None)
+    cache_folder: Optional[str] = Field(default=None)
     @requires_dependencies(
-        ["langchain_huggingface"],
+        ["sentence_transformers"],
         extras="embed-huggingface",
     )
-    def get_client(self) -> "HuggingFaceEmbeddings":
-        """Creates a langchain Huggingface python client to embed elements."""
-        from langchain_huggingface.embeddings import HuggingFaceEmbeddings
-        client = HuggingFaceEmbeddings(
-            model_name=self.embedder_model_name,
-            model_kwargs=self.embedder_model_kwargs,
-            encode_kwargs=self.encode_kwargs,
+    def get_client(self) -> "SentenceTransformer":
+        from sentence_transformers import SentenceTransformer
+        return SentenceTransformer(
+            model_name_or_path=self.embedder_model_name,
             cache_folder=self.cache_folder,
+            **self.embedder_model_kwargs,
         )
-        return client
 @dataclass
 class HuggingFaceEmbeddingEncoder(BaseEmbeddingEncoder):
     config: HuggingFaceEmbeddingConfig
-    def get_exemplary_embedding(self) -> List[float]:
+    def get_exemplary_embedding(self) -> list[float]:
         return self.embed_query(query="Q")
-    def num_of_dimensions(self):
+    def num_of_dimensions(self) -> tuple[int, ...]:
         exemplary_embedding = self.get_exemplary_embedding()
         return np.shape(exemplary_embedding)
-    def is_unit_vector(self):
+    def is_unit_vector(self) -> bool:
         exemplary_embedding = self.get_exemplary_embedding()
         return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
-    def embed_query(self, query):
-        client = self.config.get_client()
-        return client.embed_query(str(query))
+    def embed_query(self, query: str) -> list[float]:
+        return self._embed_documents(texts=[query])[0]
-    def embed_documents(self, elements: List[dict]) -> List[dict]:
+    def _embed_documents(self, texts: list[str]) -> list[list[float]]:
         client = self.config.get_client()
-        embeddings = client.embed_documents([e.get("text", "") for e in elements])
+        embeddings = client.encode(texts, **self.config.encode_kwargs)
+        return embeddings.tolist()
+    def embed_documents(self, elements: list[dict]) -> list[dict]:
+        embeddings = self._embed_documents([e.get("text", "") for e in elements])
         elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
         return elements_with_embeddings
-    def _add_embeddings_to_elements(self, elements: list[dict], embeddings: list) -> List[dict]:
+    def _add_embeddings_to_elements(self, elements: list[dict], embeddings: list) -> list[dict]:
         assert len(elements) == len(embeddings)
         elements_w_embedding = []

unstructured_ingest/embed/interfaces.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import List, Tuple
 from pydantic import BaseModel
@@ -19,7 +18,7 @@ class BaseEmbeddingEncoder(ABC):
     @property
     @abstractmethod
-    def num_of_dimensions(self) -> Tuple[int]:
+    def num_of_dimensions(self) -> tuple[int, ...]:
         """Number of dimensions for the embedding vector."""
     @property
@@ -28,9 +27,17 @@ class BaseEmbeddingEncoder(ABC):
         """Denotes if the embedding vector is a unit vector."""
     @abstractmethod
-    def embed_documents(self, elements: List[dict]) -> List[dict]:
+    def embed_documents(self, elements: list[dict]) -> list[dict]:
         pass
     @abstractmethod
-    def embed_query(self, query: str) -> List[float]:
+    def embed_query(self, query: str) -> list[float]:
         pass
+    def _embed_documents(self, elements: list[str]) -> list[list[float]]:
+        results = []
+        for text in elements:
+            response = self.embed_query(query=text)
+            results.append(response)
+        return results

unstructured_ingest/embed/mixedbreadai.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Optional
 import numpy as np
 from pydantic import Field, SecretStr
@@ -67,10 +67,10 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
     config: MixedbreadAIEmbeddingConfig
-    _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
+    _exemplary_embedding: Optional[list[float]] = field(init=False, default=None)
     _request_options: Optional["RequestOptions"] = field(init=False, default=None)
-    def get_exemplary_embedding(self) -> List[float]:
+    def get_exemplary_embedding(self) -> list[float]:
         """Get an exemplary embedding to determine dimensions and unit vector status."""
         return self._embed(["Q"])[0]
@@ -91,7 +91,7 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
         )
     @property
-    def num_of_dimensions(self):
+    def num_of_dimensions(self) -> tuple[int, ...]:
         """Get the number of dimensions for the embeddings."""
         exemplary_embedding = self.get_exemplary_embedding()
         return np.shape(exemplary_embedding)
@@ -102,15 +102,15 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
         exemplary_embedding = self.get_exemplary_embedding()
         return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
-    def _embed(self, texts: List[str]) -> List[List[float]]:
+    def _embed(self, texts: list[str]) -> list[list[float]]:
         """
         Embed a list of texts using the Mixedbread AI API.
         Args:
-            texts (List[str]): List of texts to embed.
+            texts (list[str]): List of texts to embed.
         Returns:
-            List[List[float]]: List of embeddings.
+            list[list[float]]: List of embeddings.
         """
         batch_size = BATCH_SIZE
         batch_itr = range(0, len(texts), batch_size)
@@ -132,17 +132,17 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
     @staticmethod
     def _add_embeddings_to_elements(
-        elements: List[dict], embeddings: List[List[float]]
-    ) -> List[dict]:
+        elements: list[dict], embeddings: list[list[float]]
+    ) -> list[dict]:
         """
         Add embeddings to elements.
         Args:
-            elements (List[Element]): List of elements.
-            embeddings (List[List[float]]): List of embeddings.
+            elements (list[Element]): List of elements.
+            embeddings (list[list[float]]): List of embeddings.
         Returns:
-            List[Element]: Elements with embeddings added.
+            list[Element]: Elements with embeddings added.
         """
         assert len(elements) == len(embeddings)
         elements_w_embedding = []
@@ -151,20 +151,20 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
             elements_w_embedding.append(element)
         return elements
-    def embed_documents(self, elements: List[dict]) -> List[dict]:
+    def embed_documents(self, elements: list[dict]) -> list[dict]:
         """
         Embed a list of document elements.
         Args:
-            elements (List[Element]): List of document elements.
+            elements (list[Element]): List of document elements.
         Returns:
-            List[Element]: Elements with embeddings.
+            list[Element]: Elements with embeddings.
         """
         embeddings = self._embed([e.get("text", "") for e in elements])
         return self._add_embeddings_to_elements(elements, embeddings)
-    def embed_query(self, query: str) -> List[float]:
+    def embed_query(self, query: str) -> list[float]:
         """
         Embed a query string.
@@ -172,6 +172,6 @@ class MixedbreadAIEmbeddingEncoder(BaseEmbeddingEncoder):
             query (str): Query string to embed.
         Returns:
-            List[float]: Embedding of the query.
+            list[float]: Embedding of the query.
         """
         return self._embed([query])[0]

unstructured_ingest/embed/octoai.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, Optional
 import numpy as np
 from pydantic import Field, SecretStr
@@ -31,16 +31,16 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
 class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder):
     config: OctoAiEmbeddingConfig
     # Uses the OpenAI SDK
-    _exemplary_embedding: Optional[List[float]] = field(init=False, default=None)
+    _exemplary_embedding: Optional[list[float]] = field(init=False, default=None)
-    def get_exemplary_embedding(self) -> List[float]:
+    def get_exemplary_embedding(self) -> list[float]:
         return self.embed_query("Q")
-    def num_of_dimensions(self):
+    def num_of_dimensions(self) -> tuple[int, ...]:
         exemplary_embedding = self.get_exemplary_embedding()
         return np.shape(exemplary_embedding)
-    def is_unit_vector(self):
+    def is_unit_vector(self) -> bool:
         exemplary_embedding = self.get_exemplary_embedding()
         return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
@@ -49,12 +49,12 @@ class OctoAIEmbeddingEncoder(BaseEmbeddingEncoder):
         response = client.embeddings.create(input=query, model=self.config.embedder_model_name)
         return response.data[0].embedding
-    def embed_documents(self, elements: List[dict]) -> List[dict]:
+    def embed_documents(self, elements: list[dict]) -> list[dict]:
         embeddings = [self.embed_query(e.get("text", "")) for e in elements]
         elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
         return elements_with_embeddings
-    def _add_embeddings_to_elements(self, elements, embeddings) -> List[dict]:
+    def _add_embeddings_to_elements(self, elements, embeddings) -> list[dict]:
         assert len(elements) == len(embeddings)
         elements_w_embedding = []
         for i, element in enumerate(elements):

unstructured_ingest/embed/openai.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, List
+from typing import TYPE_CHECKING
 import numpy as np
 from pydantic import Field, SecretStr
@@ -8,51 +8,46 @@ from unstructured_ingest.embed.interfaces import BaseEmbeddingEncoder, Embedding
 from unstructured_ingest.utils.dep_check import requires_dependencies
 if TYPE_CHECKING:
-    from langchain_openai.embeddings import OpenAIEmbeddings
+    from openai import OpenAI
 class OpenAIEmbeddingConfig(EmbeddingConfig):
     api_key: SecretStr
     embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
-    @requires_dependencies(["langchain_openai"], extras="openai")
-    def get_client(self) -> "OpenAIEmbeddings":
-        """Creates a langchain OpenAI python client to embed elements."""
-        from langchain_openai import OpenAIEmbeddings
+    @requires_dependencies(["openai"], extras="openai")
+    def get_client(self) -> "OpenAI":
+        from openai import OpenAI
-        openai_client = OpenAIEmbeddings(
-            openai_api_key=self.api_key.get_secret_value(),
-            model=self.embedder_model_name,  # type:ignore
-        )
-        return openai_client
+        return OpenAI(api_key=self.api_key.get_secret_value())
 @dataclass
 class OpenAIEmbeddingEncoder(BaseEmbeddingEncoder):
     config: OpenAIEmbeddingConfig
-    def get_exemplary_embedding(self) -> List[float]:
+    def get_exemplary_embedding(self) -> list[float]:
         return self.embed_query(query="Q")
-    def num_of_dimensions(self):
+    def num_of_dimensions(self) -> tuple[int, ...]:
         exemplary_embedding = self.get_exemplary_embedding()
         return np.shape(exemplary_embedding)
-    def is_unit_vector(self):
+    def is_unit_vector(self) -> bool:
         exemplary_embedding = self.get_exemplary_embedding()
         return np.isclose(np.linalg.norm(exemplary_embedding), 1.0)
-    def embed_query(self, query):
+    def embed_query(self, query: str) -> list[float]:
         client = self.config.get_client()
-        return client.embed_query(str(query))
+        response = client.embeddings.create(input=query, model=self.config.embedder_model_name)
+        return response.data[0].embedding
-    def embed_documents(self, elements: List[dict]) -> List[dict]:
-        client = self.config.get_client()
-        embeddings = client.embed_documents([e.get("text", "") for e in elements])
+    def embed_documents(self, elements: list[dict]) -> list[dict]:
+        embeddings = self._embed_documents([e.get("text", "") for e in elements])
         elements_with_embeddings = self._add_embeddings_to_elements(elements, embeddings)
         return elements_with_embeddings
-    def _add_embeddings_to_elements(self, elements, embeddings) -> List[dict]:
+    def _add_embeddings_to_elements(self, elements, embeddings) -> list[dict]:
         assert len(elements) == len(embeddings)
         elements_w_embedding = []
         for i, element in enumerate(elements):

unstructured-ingest 0.0.19__py3-none-any.whl → 0.0.22__py3-none-any.whl

Potentially problematic release.

unstructured-ingest 0.0.19py3-none-any.whl → 0.0.22py3-none-any.whl