unstructured-ingest 1.0.17__py3-none-any.whl → 1.0.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/embed/azure_openai.py +2 -4
- unstructured_ingest/embed/octoai.py +4 -6
- unstructured_ingest/embed/openai.py +3 -5
- unstructured_ingest/embed/togetherai.py +4 -6
- unstructured_ingest/embed/voyageai.py +3 -5
- {unstructured_ingest-1.0.17.dist-info → unstructured_ingest-1.0.18.dist-info}/METADATA +1 -1
- {unstructured_ingest-1.0.17.dist-info → unstructured_ingest-1.0.18.dist-info}/RECORD +11 -11
- {unstructured_ingest-1.0.17.dist-info → unstructured_ingest-1.0.18.dist-info}/WHEEL +0 -0
- {unstructured_ingest-1.0.17.dist-info → unstructured_ingest-1.0.18.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-1.0.17.dist-info → unstructured_ingest-1.0.18.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.0.
|
|
1
|
+
__version__ = "1.0.18" # pragma: no cover
|
|
@@ -25,9 +25,8 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
|
|
|
25
25
|
def get_client(self) -> "AzureOpenAI":
|
|
26
26
|
from openai import AzureOpenAI
|
|
27
27
|
|
|
28
|
-
api_key = self.api_key.get_secret_value() if self.api_key else None
|
|
29
28
|
return AzureOpenAI(
|
|
30
|
-
api_key=api_key,
|
|
29
|
+
api_key=self.api_key.get_secret_value(),
|
|
31
30
|
api_version=self.api_version,
|
|
32
31
|
azure_endpoint=self.azure_endpoint,
|
|
33
32
|
)
|
|
@@ -36,9 +35,8 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
|
|
|
36
35
|
def get_async_client(self) -> "AsyncAzureOpenAI":
|
|
37
36
|
from openai import AsyncAzureOpenAI
|
|
38
37
|
|
|
39
|
-
api_key = self.api_key.get_secret_value() if self.api_key else None
|
|
40
38
|
return AsyncAzureOpenAI(
|
|
41
|
-
api_key=api_key,
|
|
39
|
+
api_key=self.api_key.get_secret_value(),
|
|
42
40
|
api_version=self.api_version,
|
|
43
41
|
azure_endpoint=self.azure_endpoint,
|
|
44
42
|
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import TYPE_CHECKING
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, SecretStr
|
|
5
5
|
|
|
@@ -24,7 +24,7 @@ if TYPE_CHECKING:
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class OctoAiEmbeddingConfig(EmbeddingConfig):
|
|
27
|
-
api_key:
|
|
27
|
+
api_key: SecretStr = Field(description="API key for OctoAI")
|
|
28
28
|
embedder_model_name: str = Field(
|
|
29
29
|
default="thenlper/gte-large", alias="model_name", description="octoai model name"
|
|
30
30
|
)
|
|
@@ -77,8 +77,7 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
|
|
|
77
77
|
"""Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
|
|
78
78
|
from openai import OpenAI
|
|
79
79
|
|
|
80
|
-
api_key
|
|
81
|
-
return OpenAI(api_key=api_key, base_url=self.base_url)
|
|
80
|
+
return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
|
|
82
81
|
|
|
83
82
|
@requires_dependencies(
|
|
84
83
|
["openai", "tiktoken"],
|
|
@@ -88,8 +87,7 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
|
|
|
88
87
|
"""Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
|
|
89
88
|
from openai import AsyncOpenAI
|
|
90
89
|
|
|
91
|
-
api_key
|
|
92
|
-
return AsyncOpenAI(api_key=api_key, base_url=self.base_url)
|
|
90
|
+
return AsyncOpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
|
|
93
91
|
|
|
94
92
|
|
|
95
93
|
@dataclass
|
|
@@ -24,7 +24,7 @@ if TYPE_CHECKING:
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class OpenAIEmbeddingConfig(EmbeddingConfig):
|
|
27
|
-
api_key:
|
|
27
|
+
api_key: SecretStr = Field(description="API key for OpenAI")
|
|
28
28
|
embedder_model_name: str = Field(
|
|
29
29
|
default="text-embedding-ada-002", alias="model_name", description="OpenAI model name"
|
|
30
30
|
)
|
|
@@ -88,15 +88,13 @@ class OpenAIEmbeddingConfig(EmbeddingConfig):
|
|
|
88
88
|
def get_client(self) -> "OpenAI":
|
|
89
89
|
from openai import OpenAI
|
|
90
90
|
|
|
91
|
-
api_key
|
|
92
|
-
return OpenAI(api_key=api_key, base_url=self.base_url)
|
|
91
|
+
return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
|
|
93
92
|
|
|
94
93
|
@requires_dependencies(["openai"], extras="openai")
|
|
95
94
|
def get_async_client(self) -> "AsyncOpenAI":
|
|
96
95
|
from openai import AsyncOpenAI
|
|
97
96
|
|
|
98
|
-
api_key
|
|
99
|
-
return AsyncOpenAI(api_key=api_key, base_url=self.base_url)
|
|
97
|
+
return AsyncOpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
|
|
100
98
|
|
|
101
99
|
|
|
102
100
|
@dataclass
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import TYPE_CHECKING, Any
|
|
2
|
+
from typing import TYPE_CHECKING, Any
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, SecretStr
|
|
5
5
|
|
|
@@ -20,7 +20,7 @@ if TYPE_CHECKING:
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class TogetherAIEmbeddingConfig(EmbeddingConfig):
|
|
23
|
-
api_key:
|
|
23
|
+
api_key: SecretStr = Field(description="API key for Together AI")
|
|
24
24
|
embedder_model_name: str = Field(
|
|
25
25
|
default="togethercomputer/m2-bert-80M-8k-retrieval",
|
|
26
26
|
alias="model_name",
|
|
@@ -58,15 +58,13 @@ class TogetherAIEmbeddingConfig(EmbeddingConfig):
|
|
|
58
58
|
def get_client(self) -> "Together":
|
|
59
59
|
from together import Together
|
|
60
60
|
|
|
61
|
-
api_key
|
|
62
|
-
return Together(api_key=api_key)
|
|
61
|
+
return Together(api_key=self.api_key.get_secret_value())
|
|
63
62
|
|
|
64
63
|
@requires_dependencies(["together"], extras="togetherai")
|
|
65
64
|
def get_async_client(self) -> "AsyncTogether":
|
|
66
65
|
from together import AsyncTogether
|
|
67
66
|
|
|
68
|
-
api_key
|
|
69
|
-
return AsyncTogether(api_key=api_key)
|
|
67
|
+
return AsyncTogether(api_key=self.api_key.get_secret_value())
|
|
70
68
|
|
|
71
69
|
|
|
72
70
|
@dataclass
|
|
@@ -26,7 +26,7 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
|
|
|
26
26
|
le=128,
|
|
27
27
|
description="Batch size for embedding requests. VoyageAI has a limit of 128.",
|
|
28
28
|
)
|
|
29
|
-
api_key:
|
|
29
|
+
api_key: SecretStr = Field(description="API key for VoyageAI")
|
|
30
30
|
embedder_model_name: str = Field(
|
|
31
31
|
default="voyage-3", alias="model_name", description="VoyageAI model name"
|
|
32
32
|
)
|
|
@@ -65,9 +65,8 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
|
|
|
65
65
|
"""Creates a VoyageAI python client to embed elements."""
|
|
66
66
|
from voyageai import Client as VoyageAIClient
|
|
67
67
|
|
|
68
|
-
api_key = self.api_key.get_secret_value() if self.api_key else None
|
|
69
68
|
client = VoyageAIClient(
|
|
70
|
-
api_key=api_key,
|
|
69
|
+
api_key=self.api_key.get_secret_value(),
|
|
71
70
|
max_retries=self.max_retries,
|
|
72
71
|
timeout=self.timeout_in_seconds,
|
|
73
72
|
)
|
|
@@ -81,9 +80,8 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
|
|
|
81
80
|
"""Creates a VoyageAI python client to embed elements."""
|
|
82
81
|
from voyageai import AsyncClient as AsyncVoyageAIClient
|
|
83
82
|
|
|
84
|
-
api_key = self.api_key.get_secret_value() if self.api_key else None
|
|
85
83
|
client = AsyncVoyageAIClient(
|
|
86
|
-
api_key=api_key,
|
|
84
|
+
api_key=self.api_key.get_secret_value(),
|
|
87
85
|
max_retries=self.max_retries,
|
|
88
86
|
timeout=self.timeout_in_seconds,
|
|
89
87
|
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=_WdLW6DLLv5QXi1-R0iwdBbseUYvOyWeY5pyAXABOCY,43
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
4
|
unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
|
|
5
5
|
unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
|
|
@@ -22,16 +22,16 @@ unstructured_ingest/data_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
22
22
|
unstructured_ingest/data_types/entities.py,sha256=ECc6EkZ5_ZUvK7uaALYOynfFmofIrHYIJZfb67hUIxA,371
|
|
23
23
|
unstructured_ingest/data_types/file_data.py,sha256=J0RQa7YXhhxiLVzhPbF5Hl2nzSpxLFK9vrP6RTBWlSg,3833
|
|
24
24
|
unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
-
unstructured_ingest/embed/azure_openai.py,sha256=
|
|
25
|
+
unstructured_ingest/embed/azure_openai.py,sha256=fk9yTG-Xr1TSu4n4l8O3DQo9-oceVL9fX_8rehwXsNM,1798
|
|
26
26
|
unstructured_ingest/embed/bedrock.py,sha256=dzfCsatB0i8hUp1YnXmoImoxgvUdZ4srKI6eSvn-lYM,9132
|
|
27
27
|
unstructured_ingest/embed/huggingface.py,sha256=6Gx9L3xa3cv9fX4AMuLsePJQF4T_jwkKjovfqF5X1NM,2435
|
|
28
28
|
unstructured_ingest/embed/interfaces.py,sha256=Y3PLhgWnMDmtpugE37hlAiBIbC8izrFFXXkrPVby-HY,5137
|
|
29
29
|
unstructured_ingest/embed/mixedbreadai.py,sha256=pmpGQ0E-bfkkg4rvPvsFxL6Oc7H5f0mJGguHtfL7oLc,4592
|
|
30
|
-
unstructured_ingest/embed/octoai.py,sha256=
|
|
31
|
-
unstructured_ingest/embed/openai.py,sha256=
|
|
32
|
-
unstructured_ingest/embed/togetherai.py,sha256=
|
|
30
|
+
unstructured_ingest/embed/octoai.py,sha256=yZuD7R4mEKS4Jjyae_IrNWogMPOFFS8gW5oUllj3ROU,4540
|
|
31
|
+
unstructured_ingest/embed/openai.py,sha256=TMEOPVfm_OSs4tb3Ymd6q5J49R_-YKvO4TOqCHb3bwk,4647
|
|
32
|
+
unstructured_ingest/embed/togetherai.py,sha256=EehrzTRx4sd_P6AG9JkHAGwTG-o93GMaV5ufmJaxKWs,3629
|
|
33
33
|
unstructured_ingest/embed/vertexai.py,sha256=jA3Y-AysVVaYwqkVd_OgRKF0JdHLAgZlRgfgddcZV2o,3763
|
|
34
|
-
unstructured_ingest/embed/voyageai.py,sha256
|
|
34
|
+
unstructured_ingest/embed/voyageai.py,sha256=EOrYzaoXOZ6C4fNkMlCgb8KA8rdfgVXN3USMFpnn0Bs,4698
|
|
35
35
|
unstructured_ingest/interfaces/__init__.py,sha256=QIkWqjsq9INTa89gPuXlMlQL4s3y5TqLmPkuVuTyXcs,795
|
|
36
36
|
unstructured_ingest/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
|
|
37
37
|
unstructured_ingest/interfaces/downloader.py,sha256=xX0ZzsFRSzZb7SAeoeQph8sIbVq13DRw-3MYkdADrY0,2918
|
|
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
|
|
|
231
231
|
unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
232
232
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
|
|
233
233
|
unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
|
|
234
|
-
unstructured_ingest-1.0.
|
|
235
|
-
unstructured_ingest-1.0.
|
|
236
|
-
unstructured_ingest-1.0.
|
|
237
|
-
unstructured_ingest-1.0.
|
|
238
|
-
unstructured_ingest-1.0.
|
|
234
|
+
unstructured_ingest-1.0.18.dist-info/METADATA,sha256=Ab6dhItl8CiP5OYQContbtpnfBpz77OsIecAyjgb_DA,8694
|
|
235
|
+
unstructured_ingest-1.0.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
236
|
+
unstructured_ingest-1.0.18.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
237
|
+
unstructured_ingest-1.0.18.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
238
|
+
unstructured_ingest-1.0.18.dist-info/RECORD,,
|
|
File without changes
|
{unstructured_ingest-1.0.17.dist-info → unstructured_ingest-1.0.18.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.17.dist-info → unstructured_ingest-1.0.18.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|