unstructured-ingest 1.0.17__py3-none-any.whl → 1.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "1.0.17" # pragma: no cover
1
+ __version__ = "1.0.19" # pragma: no cover
@@ -25,9 +25,8 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
25
25
  def get_client(self) -> "AzureOpenAI":
26
26
  from openai import AzureOpenAI
27
27
 
28
- api_key = self.api_key.get_secret_value() if self.api_key else None
29
28
  return AzureOpenAI(
30
- api_key=api_key,
29
+ api_key=self.api_key.get_secret_value(),
31
30
  api_version=self.api_version,
32
31
  azure_endpoint=self.azure_endpoint,
33
32
  )
@@ -36,9 +35,8 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
36
35
  def get_async_client(self) -> "AsyncAzureOpenAI":
37
36
  from openai import AsyncAzureOpenAI
38
37
 
39
- api_key = self.api_key.get_secret_value() if self.api_key else None
40
38
  return AsyncAzureOpenAI(
41
- api_key=api_key,
39
+ api_key=self.api_key.get_secret_value(),
42
40
  api_version=self.api_version,
43
41
  azure_endpoint=self.azure_endpoint,
44
42
  )
@@ -1,5 +1,5 @@
1
1
  from dataclasses import dataclass
2
- from typing import TYPE_CHECKING, Optional
2
+ from typing import TYPE_CHECKING
3
3
 
4
4
  from pydantic import Field, SecretStr
5
5
 
@@ -24,7 +24,7 @@ if TYPE_CHECKING:
24
24
 
25
25
 
26
26
  class OctoAiEmbeddingConfig(EmbeddingConfig):
27
- api_key: Optional[SecretStr] = Field(description="API key for OctoAI", default=None)
27
+ api_key: SecretStr = Field(description="API key for OctoAI")
28
28
  embedder_model_name: str = Field(
29
29
  default="thenlper/gte-large", alias="model_name", description="octoai model name"
30
30
  )
@@ -77,8 +77,7 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
77
77
  """Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
78
78
  from openai import OpenAI
79
79
 
80
- api_key = self.api_key.get_secret_value() if self.api_key else None
81
- return OpenAI(api_key=api_key, base_url=self.base_url)
80
+ return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
82
81
 
83
82
  @requires_dependencies(
84
83
  ["openai", "tiktoken"],
@@ -88,8 +87,7 @@ class OctoAiEmbeddingConfig(EmbeddingConfig):
88
87
  """Creates an OpenAI python client to embed elements. Uses the OpenAI SDK."""
89
88
  from openai import AsyncOpenAI
90
89
 
91
- api_key = self.api_key.get_secret_value() if self.api_key else None
92
- return AsyncOpenAI(api_key=api_key, base_url=self.base_url)
90
+ return AsyncOpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
93
91
 
94
92
 
95
93
  @dataclass
@@ -24,7 +24,7 @@ if TYPE_CHECKING:
24
24
 
25
25
 
26
26
  class OpenAIEmbeddingConfig(EmbeddingConfig):
27
- api_key: Optional[SecretStr] = Field(description="API key for OpenAI", default=None)
27
+ api_key: SecretStr = Field(description="API key for OpenAI")
28
28
  embedder_model_name: str = Field(
29
29
  default="text-embedding-ada-002", alias="model_name", description="OpenAI model name"
30
30
  )
@@ -88,15 +88,13 @@ class OpenAIEmbeddingConfig(EmbeddingConfig):
88
88
  def get_client(self) -> "OpenAI":
89
89
  from openai import OpenAI
90
90
 
91
- api_key = self.api_key.get_secret_value() if self.api_key else None
92
- return OpenAI(api_key=api_key, base_url=self.base_url)
91
+ return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
93
92
 
94
93
  @requires_dependencies(["openai"], extras="openai")
95
94
  def get_async_client(self) -> "AsyncOpenAI":
96
95
  from openai import AsyncOpenAI
97
96
 
98
- api_key = self.api_key.get_secret_value() if self.api_key else None
99
- return AsyncOpenAI(api_key=api_key, base_url=self.base_url)
97
+ return AsyncOpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
100
98
 
101
99
 
102
100
  @dataclass
@@ -1,5 +1,5 @@
1
1
  from dataclasses import dataclass
2
- from typing import TYPE_CHECKING, Any, Optional
2
+ from typing import TYPE_CHECKING, Any
3
3
 
4
4
  from pydantic import Field, SecretStr
5
5
 
@@ -20,7 +20,7 @@ if TYPE_CHECKING:
20
20
 
21
21
 
22
22
  class TogetherAIEmbeddingConfig(EmbeddingConfig):
23
- api_key: Optional[SecretStr] = Field(description="API key for Together AI", default=None)
23
+ api_key: SecretStr = Field(description="API key for Together AI")
24
24
  embedder_model_name: str = Field(
25
25
  default="togethercomputer/m2-bert-80M-8k-retrieval",
26
26
  alias="model_name",
@@ -58,15 +58,13 @@ class TogetherAIEmbeddingConfig(EmbeddingConfig):
58
58
  def get_client(self) -> "Together":
59
59
  from together import Together
60
60
 
61
- api_key = self.api_key.get_secret_value() if self.api_key else None
62
- return Together(api_key=api_key)
61
+ return Together(api_key=self.api_key.get_secret_value())
63
62
 
64
63
  @requires_dependencies(["together"], extras="togetherai")
65
64
  def get_async_client(self) -> "AsyncTogether":
66
65
  from together import AsyncTogether
67
66
 
68
- api_key = self.api_key.get_secret_value() if self.api_key else None
69
- return AsyncTogether(api_key=api_key)
67
+ return AsyncTogether(api_key=self.api_key.get_secret_value())
70
68
 
71
69
 
72
70
  @dataclass
@@ -26,7 +26,7 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
26
26
  le=128,
27
27
  description="Batch size for embedding requests. VoyageAI has a limit of 128.",
28
28
  )
29
- api_key: Optional[SecretStr] = Field(description="API key for VoyageAI", default=None)
29
+ api_key: SecretStr = Field(description="API key for VoyageAI")
30
30
  embedder_model_name: str = Field(
31
31
  default="voyage-3", alias="model_name", description="VoyageAI model name"
32
32
  )
@@ -65,9 +65,8 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
65
65
  """Creates a VoyageAI python client to embed elements."""
66
66
  from voyageai import Client as VoyageAIClient
67
67
 
68
- api_key = self.api_key.get_secret_value() if self.api_key else None
69
68
  client = VoyageAIClient(
70
- api_key=api_key,
69
+ api_key=self.api_key.get_secret_value(),
71
70
  max_retries=self.max_retries,
72
71
  timeout=self.timeout_in_seconds,
73
72
  )
@@ -81,9 +80,8 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
81
80
  """Creates a VoyageAI python client to embed elements."""
82
81
  from voyageai import AsyncClient as AsyncVoyageAIClient
83
82
 
84
- api_key = self.api_key.get_secret_value() if self.api_key else None
85
83
  client = AsyncVoyageAIClient(
86
- api_key=api_key,
84
+ api_key=self.api_key.get_secret_value(),
87
85
  max_retries=self.max_retries,
88
86
  timeout=self.timeout_in_seconds,
89
87
  )
@@ -1,4 +1,4 @@
1
- CREATE TABLE elements (
1
+ CREATE TABLE IF NOT EXISTS `elements` (
2
2
  id STRING NOT NULL PRIMARY KEY,
3
3
  record_id STRING NOT NULL,
4
4
  element_id STRING NOT NULL,
@@ -7,4 +7,3 @@ CREATE TABLE elements (
7
7
  type STRING,
8
8
  metadata VARIANT
9
9
  );
10
-
@@ -136,7 +136,7 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
136
136
  def get_table_columns(self) -> dict[str, str]:
137
137
  if self._columns is None:
138
138
  with self.get_cursor() as cursor:
139
- cursor.execute(f"SELECT * from {self.upload_config.table_name} LIMIT 1")
139
+ cursor.execute(f"SELECT * from `{self.upload_config.table_name}` LIMIT 1")
140
140
  self._columns = {desc[0]: desc[1] for desc in cursor.description}
141
141
  return self._columns
142
142
 
@@ -152,7 +152,7 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
152
152
  )
153
153
  with self.get_cursor() as cursor:
154
154
  cursor.execute(
155
- f"DELETE FROM {self.upload_config.table_name} WHERE {RECORD_ID_LABEL} = '{file_data.identifier}'" # noqa: E501
155
+ f"DELETE FROM `{self.upload_config.table_name}` WHERE {RECORD_ID_LABEL} = '{file_data.identifier}'" # noqa: E501
156
156
  )
157
157
  results = cursor.fetchall()
158
158
  deleted_rows = results[0][0]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.17
3
+ Version: 1.0.19
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=HJycVzTiDHeRdW4JUDAnGRPwiviRO2FPCxl56CUWKGY,43
2
+ unstructured_ingest/__version__.py,sha256=QEY4I6tpDtP0kidFO1nzGaJrkm23PnuMCi1-QfQdUBQ,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -22,16 +22,16 @@ unstructured_ingest/data_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
22
22
  unstructured_ingest/data_types/entities.py,sha256=ECc6EkZ5_ZUvK7uaALYOynfFmofIrHYIJZfb67hUIxA,371
23
23
  unstructured_ingest/data_types/file_data.py,sha256=J0RQa7YXhhxiLVzhPbF5Hl2nzSpxLFK9vrP6RTBWlSg,3833
24
24
  unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- unstructured_ingest/embed/azure_openai.py,sha256=nHEkvWH7mETrJB-C9qlci6bEpSEPiW9peG82EUDQ954,1902
25
+ unstructured_ingest/embed/azure_openai.py,sha256=fk9yTG-Xr1TSu4n4l8O3DQo9-oceVL9fX_8rehwXsNM,1798
26
26
  unstructured_ingest/embed/bedrock.py,sha256=dzfCsatB0i8hUp1YnXmoImoxgvUdZ4srKI6eSvn-lYM,9132
27
27
  unstructured_ingest/embed/huggingface.py,sha256=6Gx9L3xa3cv9fX4AMuLsePJQF4T_jwkKjovfqF5X1NM,2435
28
28
  unstructured_ingest/embed/interfaces.py,sha256=Y3PLhgWnMDmtpugE37hlAiBIbC8izrFFXXkrPVby-HY,5137
29
29
  unstructured_ingest/embed/mixedbreadai.py,sha256=pmpGQ0E-bfkkg4rvPvsFxL6Oc7H5f0mJGguHtfL7oLc,4592
30
- unstructured_ingest/embed/octoai.py,sha256=imuH_vLlmDd3GgAgiA0AaXB1fGjaI9lPpCCBG5HbpU8,4678
31
- unstructured_ingest/embed/openai.py,sha256=yYqpSZcD8kUZOT36oj39hN8sCFpPKShTHVyV4dI3Bbg,4775
32
- unstructured_ingest/embed/togetherai.py,sha256=19Le-SdMLp2U1qy5mTk_kO90b-AbOG_-a7Fslp1caJA,3767
30
+ unstructured_ingest/embed/octoai.py,sha256=yZuD7R4mEKS4Jjyae_IrNWogMPOFFS8gW5oUllj3ROU,4540
31
+ unstructured_ingest/embed/openai.py,sha256=TMEOPVfm_OSs4tb3Ymd6q5J49R_-YKvO4TOqCHb3bwk,4647
32
+ unstructured_ingest/embed/togetherai.py,sha256=EehrzTRx4sd_P6AG9JkHAGwTG-o93GMaV5ufmJaxKWs,3629
33
33
  unstructured_ingest/embed/vertexai.py,sha256=jA3Y-AysVVaYwqkVd_OgRKF0JdHLAgZlRgfgddcZV2o,3763
34
- unstructured_ingest/embed/voyageai.py,sha256=-aKSxZm6G5NcKlloA6je70HmT30WSgcnZWCGEoz9PPo,4826
34
+ unstructured_ingest/embed/voyageai.py,sha256=EOrYzaoXOZ6C4fNkMlCgb8KA8rdfgVXN3USMFpnn0Bs,4698
35
35
  unstructured_ingest/interfaces/__init__.py,sha256=QIkWqjsq9INTa89gPuXlMlQL4s3y5TqLmPkuVuTyXcs,795
36
36
  unstructured_ingest/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
37
37
  unstructured_ingest/interfaces/downloader.py,sha256=xX0ZzsFRSzZb7SAeoeQph8sIbVq13DRw-3MYkdADrY0,2918
@@ -89,7 +89,7 @@ unstructured_ingest/processes/connectors/slack.py,sha256=EkFj9PcAu5_gF2xLogikKDA
89
89
  unstructured_ingest/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
90
90
  unstructured_ingest/processes/connectors/vectara.py,sha256=xrC6jkgW8BII4UjdzUelDu122xT484cpfMTK2wl-sko,12292
91
91
  unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
- unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
92
+ unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=8a9HTcRWA6IuswSD632b_uZSO6Dax_0rUYnflqktcek,226
93
93
  unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
94
94
  unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
95
95
  unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
@@ -97,7 +97,7 @@ unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6
97
97
  unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
98
98
  unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
99
99
  unstructured_ingest/processes/connectors/databricks/volumes_native.py,sha256=pivySGMmFSsyuB42ARAWAPXFQ7qTQxO3dfEoE23pBNM,3104
100
- unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=tqi6PpYpIBMTZcYZXl5Lw0YuawyDvjHI08TKPFFTTr0,8194
100
+ unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=K-EBsV99I9ubD3A0cqAJTC4vpSwrnBeACFGWbgGCSsY,8198
101
101
  unstructured_ingest/processes/connectors/duckdb/__init__.py,sha256=Dr6BRJJGefJnnp_vn5W5gBd7vrCCXTMLweuDIqTP-fM,558
102
102
  unstructured_ingest/processes/connectors/duckdb/base.py,sha256=bTLhilg6mgERNCpeeNNl7wxy3xkOt23O9XpCyD0WVY4,2945
103
103
  unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=jsmibTd_yvYzkCT05HhCJvplyobtjfNILC3zyTuCcVY,4464
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
231
231
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
232
232
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
233
233
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
234
- unstructured_ingest-1.0.17.dist-info/METADATA,sha256=XvwbN72yhaJKn5uYKyWePqbH5ZmptqS9I0RP___NbXQ,8694
235
- unstructured_ingest-1.0.17.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
- unstructured_ingest-1.0.17.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
- unstructured_ingest-1.0.17.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
- unstructured_ingest-1.0.17.dist-info/RECORD,,
234
+ unstructured_ingest-1.0.19.dist-info/METADATA,sha256=KYCpNnFQGIb6yuOkgP9qKKvLxkQ0Mw2qdWz_I124nYM,8694
235
+ unstructured_ingest-1.0.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
+ unstructured_ingest-1.0.19.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
+ unstructured_ingest-1.0.19.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
+ unstructured_ingest-1.0.19.dist-info/RECORD,,