unstructured-ingest 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/embedders/test_azure_openai.py +59 -0
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/embed/azure_openai.py +31 -0
- unstructured_ingest/v2/processes/connectors/couchbase.py +4 -1
- unstructured_ingest/v2/processes/embedder.py +30 -0
- {unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.4.dist-info}/METADATA +19 -19
- {unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.4.dist-info}/RECORD +11 -9
- {unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.4.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.4.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.4.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.4.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from test.integration.embedders.utils import validate_embedding_output, validate_raw_embedder
|
|
7
|
+
from test.integration.utils import requires_env
|
|
8
|
+
from unstructured_ingest.embed.azure_openai import (
|
|
9
|
+
AzureOpenAIEmbeddingConfig,
|
|
10
|
+
AzureOpenAIEmbeddingEncoder,
|
|
11
|
+
)
|
|
12
|
+
from unstructured_ingest.v2.processes.embedder import Embedder, EmbedderConfig
|
|
13
|
+
|
|
14
|
+
API_KEY = "AZURE_OPENAI_API_KEY"
|
|
15
|
+
ENDPOINT = "AZURE_OPENAI_ENDPOINT"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class AzureData:
|
|
20
|
+
api_key: str
|
|
21
|
+
endpoint: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_azure_data() -> AzureData:
|
|
25
|
+
api_key = os.getenv(API_KEY, None)
|
|
26
|
+
assert api_key
|
|
27
|
+
endpoint = os.getenv(ENDPOINT, None)
|
|
28
|
+
assert endpoint
|
|
29
|
+
return AzureData(api_key, endpoint)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@requires_env(API_KEY, ENDPOINT)
|
|
33
|
+
def test_azure_openai_embedder(embedder_file: Path):
|
|
34
|
+
azure_data = get_azure_data()
|
|
35
|
+
embedder_config = EmbedderConfig(
|
|
36
|
+
embedding_provider="azure-openai",
|
|
37
|
+
embedding_api_key=azure_data.api_key,
|
|
38
|
+
embedding_azure_endpoint=azure_data.endpoint,
|
|
39
|
+
)
|
|
40
|
+
embedder = Embedder(config=embedder_config)
|
|
41
|
+
results = embedder.run(elements_filepath=embedder_file)
|
|
42
|
+
assert results
|
|
43
|
+
with embedder_file.open("r") as f:
|
|
44
|
+
original_elements = json.load(f)
|
|
45
|
+
validate_embedding_output(original_elements=original_elements, output_elements=results)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@requires_env(API_KEY, ENDPOINT)
|
|
49
|
+
def test_raw_azure_openai_embedder(embedder_file: Path):
|
|
50
|
+
azure_data = get_azure_data()
|
|
51
|
+
embedder = AzureOpenAIEmbeddingEncoder(
|
|
52
|
+
config=AzureOpenAIEmbeddingConfig(
|
|
53
|
+
api_key=azure_data.api_key,
|
|
54
|
+
azure_endpoint=azure_data.endpoint,
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
validate_raw_embedder(
|
|
58
|
+
embedder=embedder, embedder_file=embedder_file, expected_dimensions=(1536,)
|
|
59
|
+
)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.4" # pragma: no cover
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
from pydantic import Field
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
|
|
7
|
+
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from openai import AzureOpenAI
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
|
|
14
|
+
api_version: str = Field(description="Azure API version", default="2024-06-01")
|
|
15
|
+
azure_endpoint: str
|
|
16
|
+
embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
|
|
17
|
+
|
|
18
|
+
@requires_dependencies(["openai"], extras="openai")
|
|
19
|
+
def get_client(self) -> "AzureOpenAI":
|
|
20
|
+
from openai import AzureOpenAI
|
|
21
|
+
|
|
22
|
+
return AzureOpenAI(
|
|
23
|
+
api_key=self.api_key.get_secret_value(),
|
|
24
|
+
api_version=self.api_version,
|
|
25
|
+
azure_endpoint=self.azure_endpoint,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class AzureOpenAIEmbeddingEncoder(OpenAIEmbeddingEncoder):
|
|
31
|
+
config: AzureOpenAIEmbeddingConfig
|
|
@@ -219,6 +219,9 @@ class CouchbaseIndexer(Indexer):
|
|
|
219
219
|
|
|
220
220
|
|
|
221
221
|
class CouchbaseDownloaderConfig(DownloaderConfig):
|
|
222
|
+
collection_id: str = Field(
|
|
223
|
+
default="id", description="The unique key of the id field in the collection"
|
|
224
|
+
)
|
|
222
225
|
fields: list[str] = field(default_factory=list)
|
|
223
226
|
|
|
224
227
|
|
|
@@ -250,7 +253,7 @@ class CouchbaseDownloader(Downloader):
|
|
|
250
253
|
def generate_download_response(
|
|
251
254
|
self, result: dict, bucket: str, file_data: FileData
|
|
252
255
|
) -> DownloadResponse:
|
|
253
|
-
record_id = result[
|
|
256
|
+
record_id = result[self.download_config.collection_id]
|
|
254
257
|
filename_id = self.get_identifier(bucket=bucket, record_id=record_id)
|
|
255
258
|
filename = f"{filename_id}.txt"
|
|
256
259
|
download_path = self.download_dir / Path(filename)
|
|
@@ -16,6 +16,7 @@ class EmbedderConfig(BaseModel):
|
|
|
16
16
|
embedding_provider: Optional[
|
|
17
17
|
Literal[
|
|
18
18
|
"openai",
|
|
19
|
+
"azure-openai",
|
|
19
20
|
"huggingface",
|
|
20
21
|
"aws-bedrock",
|
|
21
22
|
"vertexai",
|
|
@@ -43,6 +44,14 @@ class EmbedderConfig(BaseModel):
|
|
|
43
44
|
embedding_aws_region: Optional[str] = Field(
|
|
44
45
|
default="us-west-2", description="AWS region used for AWS-based embedders, such as bedrock"
|
|
45
46
|
)
|
|
47
|
+
embedding_azure_endpoint: Optional[str] = Field(
|
|
48
|
+
default=None,
|
|
49
|
+
description="Your Azure endpoint, including the resource, "
|
|
50
|
+
"e.g. `https://example-resource.azure.openai.com/`",
|
|
51
|
+
)
|
|
52
|
+
embedding_azure_api_version: Optional[str] = Field(
|
|
53
|
+
description="Azure API version", default=None
|
|
54
|
+
)
|
|
46
55
|
|
|
47
56
|
def get_huggingface_embedder(self, embedding_kwargs: dict) -> "BaseEmbeddingEncoder":
|
|
48
57
|
from unstructured_ingest.embed.huggingface import (
|
|
@@ -59,6 +68,25 @@ class EmbedderConfig(BaseModel):
|
|
|
59
68
|
|
|
60
69
|
return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig.model_validate(embedding_kwargs))
|
|
61
70
|
|
|
71
|
+
def get_azure_openai_embedder(self, embedding_kwargs: dict) -> "BaseEmbeddingEncoder":
|
|
72
|
+
from unstructured_ingest.embed.azure_openai import (
|
|
73
|
+
AzureOpenAIEmbeddingConfig,
|
|
74
|
+
AzureOpenAIEmbeddingEncoder,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
config_kwargs = {
|
|
78
|
+
"api_key": self.embedding_api_key,
|
|
79
|
+
"azure_endpoint": self.embedding_azure_endpoint,
|
|
80
|
+
}
|
|
81
|
+
if api_version := self.embedding_azure_api_version:
|
|
82
|
+
config_kwargs["api_version"] = api_version
|
|
83
|
+
if model_name := self.embedding_model_name:
|
|
84
|
+
config_kwargs["model_name"] = model_name
|
|
85
|
+
|
|
86
|
+
return AzureOpenAIEmbeddingEncoder(
|
|
87
|
+
config=AzureOpenAIEmbeddingConfig.model_validate(config_kwargs)
|
|
88
|
+
)
|
|
89
|
+
|
|
62
90
|
def get_octoai_embedder(self, embedding_kwargs: dict) -> "BaseEmbeddingEncoder":
|
|
63
91
|
from unstructured_ingest.embed.octoai import OctoAiEmbeddingConfig, OctoAIEmbeddingEncoder
|
|
64
92
|
|
|
@@ -146,6 +174,8 @@ class EmbedderConfig(BaseModel):
|
|
|
146
174
|
return self.get_mixedbread_embedder(embedding_kwargs=kwargs)
|
|
147
175
|
if self.embedding_provider == "togetherai":
|
|
148
176
|
return self.get_togetherai_embedder(embedding_kwargs=kwargs)
|
|
177
|
+
if self.embedding_provider == "azure-openai":
|
|
178
|
+
return self.get_azure_openai_embedder(embedding_kwargs=kwargs)
|
|
149
179
|
|
|
150
180
|
raise ValueError(f"{self.embedding_provider} not a recognized encoder")
|
|
151
181
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.13
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist: pydantic>=2.7
|
|
25
|
+
Requires-Dist: opentelemetry-sdk
|
|
27
26
|
Requires-Dist: pandas
|
|
28
|
-
Requires-Dist: tqdm
|
|
29
27
|
Requires-Dist: python-dateutil
|
|
28
|
+
Requires-Dist: pydantic>=2.7
|
|
29
|
+
Requires-Dist: dataclasses-json
|
|
30
|
+
Requires-Dist: tqdm
|
|
30
31
|
Requires-Dist: click
|
|
31
|
-
Requires-Dist: opentelemetry-sdk
|
|
32
32
|
Provides-Extra: airtable
|
|
33
33
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
34
34
|
Provides-Extra: astradb
|
|
@@ -44,15 +44,15 @@ Provides-Extra: biomed
|
|
|
44
44
|
Requires-Dist: bs4; extra == "biomed"
|
|
45
45
|
Requires-Dist: requests; extra == "biomed"
|
|
46
46
|
Provides-Extra: box
|
|
47
|
-
Requires-Dist: boxfs; extra == "box"
|
|
48
47
|
Requires-Dist: fsspec; extra == "box"
|
|
48
|
+
Requires-Dist: boxfs; extra == "box"
|
|
49
49
|
Provides-Extra: chroma
|
|
50
50
|
Requires-Dist: chromadb; extra == "chroma"
|
|
51
51
|
Provides-Extra: clarifai
|
|
52
52
|
Requires-Dist: clarifai; extra == "clarifai"
|
|
53
53
|
Provides-Extra: confluence
|
|
54
|
-
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
55
54
|
Requires-Dist: requests; extra == "confluence"
|
|
55
|
+
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
56
56
|
Provides-Extra: couchbase
|
|
57
57
|
Requires-Dist: couchbase; extra == "couchbase"
|
|
58
58
|
Provides-Extra: csv
|
|
@@ -60,8 +60,8 @@ Requires-Dist: unstructured[tsv]; extra == "csv"
|
|
|
60
60
|
Provides-Extra: databricks-volumes
|
|
61
61
|
Requires-Dist: databricks-sdk; extra == "databricks-volumes"
|
|
62
62
|
Provides-Extra: delta-table
|
|
63
|
-
Requires-Dist: boto3; extra == "delta-table"
|
|
64
63
|
Requires-Dist: deltalake; extra == "delta-table"
|
|
64
|
+
Requires-Dist: boto3; extra == "delta-table"
|
|
65
65
|
Provides-Extra: discord
|
|
66
66
|
Requires-Dist: discord-py; extra == "discord"
|
|
67
67
|
Provides-Extra: doc
|
|
@@ -69,8 +69,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
|
|
|
69
69
|
Provides-Extra: docx
|
|
70
70
|
Requires-Dist: unstructured[docx]; extra == "docx"
|
|
71
71
|
Provides-Extra: dropbox
|
|
72
|
-
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
73
72
|
Requires-Dist: fsspec; extra == "dropbox"
|
|
73
|
+
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
74
74
|
Provides-Extra: elasticsearch
|
|
75
75
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
76
76
|
Provides-Extra: embed-huggingface
|
|
@@ -78,8 +78,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
|
78
78
|
Provides-Extra: embed-mixedbreadai
|
|
79
79
|
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
80
80
|
Provides-Extra: embed-octoai
|
|
81
|
-
Requires-Dist: openai; extra == "embed-octoai"
|
|
82
81
|
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
82
|
+
Requires-Dist: openai; extra == "embed-octoai"
|
|
83
83
|
Provides-Extra: embed-vertexai
|
|
84
84
|
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
85
85
|
Provides-Extra: embed-voyageai
|
|
@@ -88,18 +88,18 @@ Provides-Extra: epub
|
|
|
88
88
|
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
89
89
|
Provides-Extra: gcs
|
|
90
90
|
Requires-Dist: bs4; extra == "gcs"
|
|
91
|
-
Requires-Dist: gcsfs; extra == "gcs"
|
|
92
91
|
Requires-Dist: fsspec; extra == "gcs"
|
|
92
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
93
93
|
Provides-Extra: github
|
|
94
|
-
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
95
94
|
Requires-Dist: requests; extra == "github"
|
|
95
|
+
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
96
96
|
Provides-Extra: gitlab
|
|
97
97
|
Requires-Dist: python-gitlab; extra == "gitlab"
|
|
98
98
|
Provides-Extra: google-drive
|
|
99
99
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
100
100
|
Provides-Extra: hubspot
|
|
101
|
-
Requires-Dist: urllib3; extra == "hubspot"
|
|
102
101
|
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
102
|
+
Requires-Dist: urllib3; extra == "hubspot"
|
|
103
103
|
Provides-Extra: jira
|
|
104
104
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
105
105
|
Provides-Extra: kafka
|
|
@@ -117,19 +117,19 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
117
117
|
Provides-Extra: msg
|
|
118
118
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
119
119
|
Provides-Extra: notion
|
|
120
|
-
Requires-Dist: backoff; extra == "notion"
|
|
121
120
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
122
|
-
Requires-Dist:
|
|
121
|
+
Requires-Dist: backoff; extra == "notion"
|
|
123
122
|
Requires-Dist: httpx; extra == "notion"
|
|
123
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
124
124
|
Provides-Extra: odt
|
|
125
125
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
126
126
|
Provides-Extra: onedrive
|
|
127
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
127
128
|
Requires-Dist: msal; extra == "onedrive"
|
|
128
129
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
129
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
130
130
|
Provides-Extra: openai
|
|
131
|
-
Requires-Dist: openai; extra == "openai"
|
|
132
131
|
Requires-Dist: tiktoken; extra == "openai"
|
|
132
|
+
Requires-Dist: openai; extra == "openai"
|
|
133
133
|
Provides-Extra: opensearch
|
|
134
134
|
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
135
135
|
Provides-Extra: org
|
|
@@ -163,8 +163,8 @@ Requires-Dist: s3fs; extra == "s3"
|
|
|
163
163
|
Provides-Extra: salesforce
|
|
164
164
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
165
165
|
Provides-Extra: sftp
|
|
166
|
-
Requires-Dist: paramiko; extra == "sftp"
|
|
167
166
|
Requires-Dist: fsspec; extra == "sftp"
|
|
167
|
+
Requires-Dist: paramiko; extra == "sftp"
|
|
168
168
|
Provides-Extra: sharepoint
|
|
169
169
|
Requires-Dist: msal; extra == "sharepoint"
|
|
170
170
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
@@ -173,8 +173,8 @@ Requires-Dist: singlestoredb; extra == "singlestore"
|
|
|
173
173
|
Provides-Extra: slack
|
|
174
174
|
Requires-Dist: slack-sdk[optional]; extra == "slack"
|
|
175
175
|
Provides-Extra: snowflake
|
|
176
|
-
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
177
176
|
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
177
|
+
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
178
178
|
Provides-Extra: togetherai
|
|
179
179
|
Requires-Dist: together; extra == "togetherai"
|
|
180
180
|
Provides-Extra: tsv
|
|
@@ -39,6 +39,7 @@ test/integration/connectors/weaviate/test_cloud.py,sha256=07VxNRxWWcgTstFfpoZ1Fl
|
|
|
39
39
|
test/integration/connectors/weaviate/test_local.py,sha256=SK6iEwQUKiCd0X99BEk8GlQoLaCcJcFPt09NN526Ct0,4508
|
|
40
40
|
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
42
|
+
test/integration/embedders/test_azure_openai.py,sha256=6tFpKFBFRXD49imhhRzsvy3MPtuZ4L1PtnKyMVBRAqc,1808
|
|
42
43
|
test/integration/embedders/test_bedrock.py,sha256=0oBRNS_DtFDGQ22Z1T3t6VOJ31PrItgvnJpqcLe9Fg4,1903
|
|
43
44
|
test/integration/embedders/test_huggingface.py,sha256=0mMTOO-Nh7KB70AGs_7LLQIxMYrnSPqyihriUeqACbM,1007
|
|
44
45
|
test/integration/embedders/test_mixedbread.py,sha256=RrLv8SByMNXsgrlh94RbaT-VyxZ4-DILO-OPpmOwvSI,1441
|
|
@@ -82,7 +83,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
82
83
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
83
84
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
84
85
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
85
|
-
unstructured_ingest/__version__.py,sha256=
|
|
86
|
+
unstructured_ingest/__version__.py,sha256=0rNziXrR8RxleBY3pKm77TbOCJ0CwApHiLqXBAViUAo,42
|
|
86
87
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
87
88
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
88
89
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -250,6 +251,7 @@ unstructured_ingest/connector/notion/types/database_properties/unique_id.py,sha2
|
|
|
250
251
|
unstructured_ingest/connector/notion/types/database_properties/url.py,sha256=iXQ2tVUm9UlKVtDA0NQiFIRJ5PHYW9wOaWt2vFfSVCg,862
|
|
251
252
|
unstructured_ingest/connector/notion/types/database_properties/verification.py,sha256=J_DLjY-v2T6xDGMQ7FkI0YMKMA6SG6Y3yYW7qUD1hKA,2334
|
|
252
253
|
unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
254
|
+
unstructured_ingest/embed/azure_openai.py,sha256=4YBOIxv66wVZ5EqNNC4uCDPNJ3VrsLPe5wwagT6zqe0,1001
|
|
253
255
|
unstructured_ingest/embed/bedrock.py,sha256=-PRdZsF44vwi6G4G75gdO31AJKfZWClOXkJQAk7rEO8,3096
|
|
254
256
|
unstructured_ingest/embed/huggingface.py,sha256=2cBiQhOhfWHX3hS-eKjocysOkUaRlyRfUj9Kxjrp6cE,1934
|
|
255
257
|
unstructured_ingest/embed/interfaces.py,sha256=au4Xp8ciDvo4bidlUbazFW2aC7NZW5-UDLKXBFVzAX4,2025
|
|
@@ -389,7 +391,7 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=zlgXgwReX9TBOdfTpS9hETah4
|
|
|
389
391
|
unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
|
|
390
392
|
unstructured_ingest/v2/processes/chunker.py,sha256=31-7ojsM2coIt2rMR0KOb82IxLVJfNHbqYUOsDkhxN8,5491
|
|
391
393
|
unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
|
|
392
|
-
unstructured_ingest/v2/processes/embedder.py,sha256=
|
|
394
|
+
unstructured_ingest/v2/processes/embedder.py,sha256=xCBpaL07WnVUOUW8SHktaf1vwBGZxl3Nf8-99509ClQ,7721
|
|
393
395
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
394
396
|
unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcDPCOPA_BZjLzLF71nOY,8194
|
|
395
397
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
@@ -399,7 +401,7 @@ unstructured_ingest/v2/processes/connectors/astradb.py,sha256=QTUQ-cv_iZi9eaXRRH
|
|
|
399
401
|
unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=-6IijSWGqj-85vD0c4l5wdMHp-LF371jO8j53PPRB4I,12002
|
|
400
402
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=skrxRPHZ8y3JxNa0dt5SVitHiDQ5WVxLvY_kh2-QUrQ,8029
|
|
401
403
|
unstructured_ingest/v2/processes/connectors/confluence.py,sha256=qQApDcmPBGg4tHXwSOj4JPkAbrO9GQ4NRlaETjhp25U,7003
|
|
402
|
-
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=
|
|
404
|
+
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=LbUJLt6fqaNYSmy9vUiovG-UOALMcvh8OD-gZAaf-f4,12333
|
|
403
405
|
unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=1yS7ivEyiucwd_kv6LL5HQdGabT43yeG6XCdwiz89hc,8019
|
|
404
406
|
unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=yBgCeLy9iCVI8bBDcHHuHB0H3BO05e9E1OccbHwvKAo,9724
|
|
405
407
|
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=EEwXK1Anlu-eXl5qxmdDIqPYW7eMSez6WGlTPG2vSn8,13121
|
|
@@ -459,9 +461,9 @@ unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-
|
|
|
459
461
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
460
462
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
461
463
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=ln1p9ahFTaT-qsL7p4bgw_IqnU60As_l6vVAqUWyQVE,11655
|
|
462
|
-
unstructured_ingest-0.3.
|
|
463
|
-
unstructured_ingest-0.3.
|
|
464
|
-
unstructured_ingest-0.3.
|
|
465
|
-
unstructured_ingest-0.3.
|
|
466
|
-
unstructured_ingest-0.3.
|
|
467
|
-
unstructured_ingest-0.3.
|
|
464
|
+
unstructured_ingest-0.3.4.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
465
|
+
unstructured_ingest-0.3.4.dist-info/METADATA,sha256=6Nj2KHvch7j5QLfahz5NcFHmmNq9vNixTfZSDUEQPjo,7393
|
|
466
|
+
unstructured_ingest-0.3.4.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
467
|
+
unstructured_ingest-0.3.4.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
468
|
+
unstructured_ingest-0.3.4.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
469
|
+
unstructured_ingest-0.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.3.3.dist-info → unstructured_ingest-0.3.4.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|