unstructured-ingest 1.0.15__py3-none-any.whl → 1.0.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "1.0.15" # pragma: no cover
1
+ __version__ = "1.0.16" # pragma: no cover
@@ -16,8 +16,10 @@ if TYPE_CHECKING:
16
16
 
17
17
  class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
18
18
  api_version: str = Field(description="Azure API version", default="2024-06-01")
19
- azure_endpoint: str
20
- embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
19
+ azure_endpoint: str = Field(description="Azure endpoint")
20
+ embedder_model_name: str = Field(
21
+ default="text-embedding-ada-002", alias="model_name", description="Azure OpenAI model name"
22
+ )
21
23
 
22
24
  @requires_dependencies(["openai"], extras="openai")
23
25
  def get_client(self) -> "AzureOpenAI":
@@ -58,10 +58,14 @@ def conform_query(query: str, provider: str) -> dict:
58
58
 
59
59
 
60
60
  class BedrockEmbeddingConfig(EmbeddingConfig):
61
- aws_access_key_id: SecretStr
62
- aws_secret_access_key: SecretStr
63
- region_name: str = "us-west-2"
64
- embedder_model_name: str = Field(default="amazon.titan-embed-text-v1", alias="model_name")
61
+ aws_access_key_id: SecretStr = Field(description="aws access key id")
62
+ aws_secret_access_key: SecretStr = Field(description="aws secret access key")
63
+ region_name: str = Field(description="aws region name", default="us-west-2")
64
+ embedder_model_name: str = Field(
65
+ default="amazon.titan-embed-text-v1",
66
+ alias="model_name",
67
+ description="AWS Bedrock model name",
68
+ )
65
69
 
66
70
  def wrap_error(self, e: Exception) -> Exception:
67
71
  if is_internal_error(e=e):
@@ -15,11 +15,18 @@ if TYPE_CHECKING:
15
15
 
16
16
 
17
17
  class HuggingFaceEmbeddingConfig(EmbeddingConfig):
18
- embedder_model_name: Optional[str] = Field(default="all-MiniLM-L6-v2", alias="model_name")
18
+ embedder_model_name: Optional[str] = Field(
19
+ default="all-MiniLM-L6-v2", alias="model_name", description="HuggingFace model name"
20
+ )
19
21
  embedder_model_kwargs: Optional[dict] = Field(
20
- default_factory=lambda: {"device": "cpu"}, alias="model_kwargs"
22
+ default_factory=lambda: {"device": "cpu"},
23
+ alias="model_kwargs",
24
+ description="additional model parameters",
25
+ )
26
+ encode_kwargs: Optional[dict] = Field(
27
+ default_factory=lambda: {"normalize_embeddings": False},
28
+ description="additional embedding parameters",
21
29
  )
22
- encode_kwargs: Optional[dict] = Field(default_factory=lambda: {"normalize_embeddings": False})
23
30
 
24
31
  @requires_dependencies(
25
32
  ["sentence_transformers"],
@@ -34,10 +34,13 @@ class MixedbreadAIEmbeddingConfig(EmbeddingConfig):
34
34
 
35
35
  api_key: SecretStr = Field(
36
36
  default_factory=lambda: SecretStr(os.environ.get("MXBAI_API_KEY")),
37
+ description="API key for Mixedbread AI",
37
38
  )
38
39
 
39
40
  embedder_model_name: str = Field(
40
- default="mixedbread-ai/mxbai-embed-large-v1", alias="model_name"
41
+ default="mixedbread-ai/mxbai-embed-large-v1",
42
+ alias="model_name",
43
+ description="Mixedbread AI model name",
41
44
  )
42
45
 
43
46
  @requires_dependencies(
@@ -25,8 +25,12 @@ if TYPE_CHECKING:
25
25
 
26
26
  class OctoAiEmbeddingConfig(EmbeddingConfig):
27
27
  api_key: SecretStr
28
- embedder_model_name: str = Field(default="thenlper/gte-large", alias="model_name")
29
- base_url: str = Field(default="https://text.octoai.run/v1")
28
+ embedder_model_name: str = Field(
29
+ default="thenlper/gte-large", alias="model_name", description="octoai model name"
30
+ )
31
+ base_url: str = Field(
32
+ default="https://text.octoai.run/v1", description="optional override for the base url"
33
+ )
30
34
 
31
35
  def wrap_error(self, e: Exception) -> Exception:
32
36
  if is_internal_error(e=e):
@@ -24,9 +24,11 @@ if TYPE_CHECKING:
24
24
 
25
25
 
26
26
  class OpenAIEmbeddingConfig(EmbeddingConfig):
27
- api_key: SecretStr
28
- embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
29
- base_url: Optional[str] = None
27
+ api_key: SecretStr = Field(description="API key for OpenAI")
28
+ embedder_model_name: str = Field(
29
+ default="text-embedding-ada-002", alias="model_name", description="OpenAI model name"
30
+ )
31
+ base_url: Optional[str] = Field(default=None, description="optional override for the base url")
30
32
 
31
33
  @requires_dependencies(["openai"], extras="openai")
32
34
  def wrap_error(self, e: Exception) -> Exception:
@@ -20,9 +20,11 @@ if TYPE_CHECKING:
20
20
 
21
21
 
22
22
  class TogetherAIEmbeddingConfig(EmbeddingConfig):
23
- api_key: SecretStr
23
+ api_key: SecretStr = Field(description="API key for Together AI")
24
24
  embedder_model_name: str = Field(
25
- default="togethercomputer/m2-bert-80M-8k-retrieval", alias="model_name"
25
+ default="togethercomputer/m2-bert-80M-8k-retrieval",
26
+ alias="model_name",
27
+ description="Together AI model name",
26
28
  )
27
29
 
28
30
  def wrap_error(self, e: Exception) -> Exception:
@@ -32,9 +32,9 @@ ApiKeyType = Secret[Annotated[dict, BeforeValidator(conform_string_to_dict)]]
32
32
 
33
33
 
34
34
  class VertexAIEmbeddingConfig(EmbeddingConfig):
35
- api_key: ApiKeyType
35
+ api_key: ApiKeyType = Field(description="API key for Vertex AI")
36
36
  embedder_model_name: Optional[str] = Field(
37
- default="textembedding-gecko@001", alias="model_name"
37
+ default="textembedding-gecko@001", alias="model_name", description="Vertex AI model name"
38
38
  )
39
39
 
40
40
  def wrap_error(self, e: Exception) -> Exception:
@@ -26,11 +26,14 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
26
26
  le=128,
27
27
  description="Batch size for embedding requests. VoyageAI has a limit of 128.",
28
28
  )
29
- api_key: SecretStr
30
- embedder_model_name: str = Field(default="voyage-3", alias="model_name")
31
- truncation: Optional[bool] = Field(default=None)
32
- max_retries: int = 0
33
- timeout_in_seconds: Optional[int] = None
29
+ api_key: SecretStr = Field(description="API key for VoyageAI")
30
+ embedder_model_name: str = Field(
31
+ default="voyage-3", alias="model_name", description="VoyageAI model name"
32
+ )
33
+ max_retries: int = Field(default=0, description="Max retries for embedding requests.")
34
+ timeout_in_seconds: Optional[int] = Field(
35
+ default=None, description="Optional timeout in seconds for embedding requests."
36
+ )
34
37
 
35
38
  def wrap_error(self, e: Exception) -> Exception:
36
39
  if is_internal_error(e=e):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.15
3
+ Version: 1.0.16
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -19,7 +19,6 @@ Classifier: Programming Language :: Python :: 3.12
19
19
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
20
  Requires-Python: <3.13,>=3.9
21
21
  Requires-Dist: click
22
- Requires-Dist: dataclasses-json
23
22
  Requires-Dist: opentelemetry-sdk
24
23
  Requires-Dist: pydantic>=2.7
25
24
  Requires-Dist: python-dateutil
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=oGOfVxqR9-1jTBJa12PKKVu0cnuxJyM_nJIiJM33B_A,43
2
+ unstructured_ingest/__version__.py,sha256=NfSrbEsbtpdSSGDsN8yPzjM7DstYUTj04Lqn9H8oysI,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -22,16 +22,16 @@ unstructured_ingest/data_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
22
22
  unstructured_ingest/data_types/entities.py,sha256=ECc6EkZ5_ZUvK7uaALYOynfFmofIrHYIJZfb67hUIxA,371
23
23
  unstructured_ingest/data_types/file_data.py,sha256=J0RQa7YXhhxiLVzhPbF5Hl2nzSpxLFK9vrP6RTBWlSg,3833
24
24
  unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM_0T69fcObs_I,1707
26
- unstructured_ingest/embed/bedrock.py,sha256=AjOMSZakPrGvVUEOacnTbhuE1Ka5MdRZgOnK7Qb2lhY,8931
27
- unstructured_ingest/embed/huggingface.py,sha256=vJeTVeLzPU0mksmZHz-4v9TCtdiNt-d9bsHPbzhpyV8,2253
25
+ unstructured_ingest/embed/azure_openai.py,sha256=fk9yTG-Xr1TSu4n4l8O3DQo9-oceVL9fX_8rehwXsNM,1798
26
+ unstructured_ingest/embed/bedrock.py,sha256=dzfCsatB0i8hUp1YnXmoImoxgvUdZ4srKI6eSvn-lYM,9132
27
+ unstructured_ingest/embed/huggingface.py,sha256=6Gx9L3xa3cv9fX4AMuLsePJQF4T_jwkKjovfqF5X1NM,2435
28
28
  unstructured_ingest/embed/interfaces.py,sha256=Y3PLhgWnMDmtpugE37hlAiBIbC8izrFFXXkrPVby-HY,5137
29
- unstructured_ingest/embed/mixedbreadai.py,sha256=_rqH5sZspKbMM1naSU5zYFgEULzZivtc3-5RrgZErpE,4486
30
- unstructured_ingest/embed/octoai.py,sha256=RWMcoYvp14SPBRoqfPRl0ny_dmt8nVbCW02UGcYetbY,4387
31
- unstructured_ingest/embed/openai.py,sha256=09Lx9lqlDXTHWXRHNiUerK-g3bhK8hSuW9RLzglHWCA,4493
32
- unstructured_ingest/embed/togetherai.py,sha256=a1ko06oSgkUgQahYNtPt6pU8v8bmNkRl7WTEK5foBvc,3527
33
- unstructured_ingest/embed/vertexai.py,sha256=CjSkXYo9ZT590eM3MU3tdDa4p5gHn4HW0bAlm70MjOo,3682
34
- unstructured_ingest/embed/voyageai.py,sha256=84O49p-vQuQzhNndPIfwCYcCNBoHFIX9QHZvc8m5DdA,4496
29
+ unstructured_ingest/embed/mixedbreadai.py,sha256=pmpGQ0E-bfkkg4rvPvsFxL6Oc7H5f0mJGguHtfL7oLc,4592
30
+ unstructured_ingest/embed/octoai.py,sha256=3_xcD0jWfcMzn8OoOgHaIo8SWjeErrZH2DQ4Q9UVwOA,4498
31
+ unstructured_ingest/embed/openai.py,sha256=TMEOPVfm_OSs4tb3Ymd6q5J49R_-YKvO4TOqCHb3bwk,4647
32
+ unstructured_ingest/embed/togetherai.py,sha256=EehrzTRx4sd_P6AG9JkHAGwTG-o93GMaV5ufmJaxKWs,3629
33
+ unstructured_ingest/embed/vertexai.py,sha256=jA3Y-AysVVaYwqkVd_OgRKF0JdHLAgZlRgfgddcZV2o,3763
34
+ unstructured_ingest/embed/voyageai.py,sha256=EOrYzaoXOZ6C4fNkMlCgb8KA8rdfgVXN3USMFpnn0Bs,4698
35
35
  unstructured_ingest/interfaces/__init__.py,sha256=QIkWqjsq9INTa89gPuXlMlQL4s3y5TqLmPkuVuTyXcs,795
36
36
  unstructured_ingest/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
37
37
  unstructured_ingest/interfaces/downloader.py,sha256=xX0ZzsFRSzZb7SAeoeQph8sIbVq13DRw-3MYkdADrY0,2918
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
231
231
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
232
232
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
233
233
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
234
- unstructured_ingest-1.0.15.dist-info/METADATA,sha256=FLLtR4NPVBbafV9aTVcwguRub-D_xwmHwQmPWDAGmXU,8720
235
- unstructured_ingest-1.0.15.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
- unstructured_ingest-1.0.15.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
- unstructured_ingest-1.0.15.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
- unstructured_ingest-1.0.15.dist-info/RECORD,,
234
+ unstructured_ingest-1.0.16.dist-info/METADATA,sha256=VKRwEluZjChUvNhdtBQrtuFdgfTzVusL_BGbr-fUprM,8688
235
+ unstructured_ingest-1.0.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
+ unstructured_ingest-1.0.16.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
+ unstructured_ingest-1.0.16.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
+ unstructured_ingest-1.0.16.dist-info/RECORD,,