unstructured-ingest 0.0.22__py3-none-any.whl → 0.0.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/interfaces.py +5 -5
- unstructured_ingest/embed/__init__.py +0 -17
- unstructured_ingest/embed/vertexai.py +1 -1
- unstructured_ingest/embed/voyageai.py +2 -2
- unstructured_ingest/interfaces.py +5 -5
- unstructured_ingest/v2/cli/utils/model_conversion.py +3 -3
- unstructured_ingest/v2/processes/connectors/pinecone.py +32 -21
- unstructured_ingest/v2/processes/embedder.py +10 -10
- {unstructured_ingest-0.0.22.dist-info → unstructured_ingest-0.0.24.dist-info}/METADATA +10 -10
- {unstructured_ingest-0.0.22.dist-info → unstructured_ingest-0.0.24.dist-info}/RECORD +15 -15
- {unstructured_ingest-0.0.22.dist-info → unstructured_ingest-0.0.24.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.0.22.dist-info → unstructured_ingest-0.0.24.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.0.22.dist-info → unstructured_ingest-0.0.24.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.0.22.dist-info → unstructured_ingest-0.0.24.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.24" # pragma: no cover
|
|
@@ -415,11 +415,11 @@ class CliEmbeddingConfig(EmbeddingConfig, CliMixin):
|
|
|
415
415
|
@staticmethod
|
|
416
416
|
def get_cli_options() -> t.List[click.Option]:
|
|
417
417
|
embed_providers = [
|
|
418
|
-
"
|
|
419
|
-
"
|
|
420
|
-
"
|
|
421
|
-
"
|
|
422
|
-
"
|
|
418
|
+
"openai",
|
|
419
|
+
"huggingface",
|
|
420
|
+
"aws-bedrock",
|
|
421
|
+
"vertexai",
|
|
422
|
+
"voyageai",
|
|
423
423
|
"octoai",
|
|
424
424
|
]
|
|
425
425
|
options = [
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from unstructured_ingest.embed.bedrock import BedrockEmbeddingEncoder
|
|
2
|
-
from unstructured_ingest.embed.huggingface import HuggingFaceEmbeddingEncoder
|
|
3
|
-
from unstructured_ingest.embed.mixedbreadai import MixedbreadAIEmbeddingEncoder
|
|
4
|
-
from unstructured_ingest.embed.octoai import OctoAIEmbeddingEncoder
|
|
5
|
-
from unstructured_ingest.embed.openai import OpenAIEmbeddingEncoder
|
|
6
|
-
from unstructured_ingest.embed.vertexai import VertexAIEmbeddingEncoder
|
|
7
|
-
from unstructured_ingest.embed.voyageai import VoyageAIEmbeddingEncoder
|
|
8
|
-
|
|
9
|
-
EMBEDDING_PROVIDER_TO_CLASS_MAP = {
|
|
10
|
-
"langchain-openai": OpenAIEmbeddingEncoder,
|
|
11
|
-
"langchain-huggingface": HuggingFaceEmbeddingEncoder,
|
|
12
|
-
"langchain-aws-bedrock": BedrockEmbeddingEncoder,
|
|
13
|
-
"langchain-vertexai": VertexAIEmbeddingEncoder,
|
|
14
|
-
"langchain-voyageai": VoyageAIEmbeddingEncoder,
|
|
15
|
-
"mixedbread-ai": MixedbreadAIEmbeddingEncoder,
|
|
16
|
-
"octoai": OctoAIEmbeddingEncoder,
|
|
17
|
-
}
|
|
@@ -45,7 +45,7 @@ class VertexAIEmbeddingConfig(EmbeddingConfig):
|
|
|
45
45
|
extras="embed-vertexai",
|
|
46
46
|
)
|
|
47
47
|
def get_client(self) -> "TextEmbeddingModel":
|
|
48
|
-
"""Creates a
|
|
48
|
+
"""Creates a VertexAI python client to embed elements."""
|
|
49
49
|
from vertexai.language_models import TextEmbeddingModel
|
|
50
50
|
|
|
51
51
|
self.register_application_credentials()
|
|
@@ -20,11 +20,11 @@ class VoyageAIEmbeddingConfig(EmbeddingConfig):
|
|
|
20
20
|
timeout_in_seconds: Optional[int] = None
|
|
21
21
|
|
|
22
22
|
@requires_dependencies(
|
|
23
|
-
["
|
|
23
|
+
["voyageai"],
|
|
24
24
|
extras="embed-voyageai",
|
|
25
25
|
)
|
|
26
26
|
def get_client(self) -> "VoyageAIClient":
|
|
27
|
-
"""Creates a
|
|
27
|
+
"""Creates a VoyageAI python client to embed elements."""
|
|
28
28
|
from voyageai import Client as VoyageAIClient
|
|
29
29
|
|
|
30
30
|
client = VoyageAIClient(
|
|
@@ -204,14 +204,14 @@ class EmbeddingConfig(BaseConfig):
|
|
|
204
204
|
if self.model_name:
|
|
205
205
|
kwargs["model_name"] = self.model_name
|
|
206
206
|
# TODO make this more dynamic to map to encoder configs
|
|
207
|
-
if self.provider == "
|
|
207
|
+
if self.provider == "openai":
|
|
208
208
|
from unstructured_ingest.embed.openai import (
|
|
209
209
|
OpenAIEmbeddingConfig,
|
|
210
210
|
OpenAIEmbeddingEncoder,
|
|
211
211
|
)
|
|
212
212
|
|
|
213
213
|
return OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(**kwargs))
|
|
214
|
-
elif self.provider == "
|
|
214
|
+
elif self.provider == "huggingface":
|
|
215
215
|
from unstructured_ingest.embed.huggingface import (
|
|
216
216
|
HuggingFaceEmbeddingConfig,
|
|
217
217
|
HuggingFaceEmbeddingEncoder,
|
|
@@ -225,7 +225,7 @@ class EmbeddingConfig(BaseConfig):
|
|
|
225
225
|
)
|
|
226
226
|
|
|
227
227
|
return OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(**kwargs))
|
|
228
|
-
elif self.provider == "
|
|
228
|
+
elif self.provider == "aws-bedrock":
|
|
229
229
|
from unstructured_ingest.embed.bedrock import (
|
|
230
230
|
BedrockEmbeddingConfig,
|
|
231
231
|
BedrockEmbeddingEncoder,
|
|
@@ -238,14 +238,14 @@ class EmbeddingConfig(BaseConfig):
|
|
|
238
238
|
region_name=self.aws_region,
|
|
239
239
|
)
|
|
240
240
|
)
|
|
241
|
-
elif self.provider == "
|
|
241
|
+
elif self.provider == "vertexai":
|
|
242
242
|
from unstructured_ingest.embed.vertexai import (
|
|
243
243
|
VertexAIEmbeddingConfig,
|
|
244
244
|
VertexAIEmbeddingEncoder,
|
|
245
245
|
)
|
|
246
246
|
|
|
247
247
|
return VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(**kwargs))
|
|
248
|
-
elif self.provider == "
|
|
248
|
+
elif self.provider == "voyageai":
|
|
249
249
|
from unstructured_ingest.embed.voyageai import (
|
|
250
250
|
VoyageAIEmbeddingConfig,
|
|
251
251
|
VoyageAIEmbeddingEncoder,
|
|
@@ -155,14 +155,14 @@ def _get_type_from_field(field: FieldInfo) -> click.ParamType:
|
|
|
155
155
|
|
|
156
156
|
def get_option_from_field(option_name: str, field_info: FieldInfo) -> Option:
|
|
157
157
|
param_decls = [option_name]
|
|
158
|
-
|
|
158
|
+
help_text = field_info.description or ""
|
|
159
159
|
if examples := field_info.examples:
|
|
160
|
-
|
|
160
|
+
help_text += f" [Examples: {', '.join(examples)}]"
|
|
161
161
|
option_kwargs = {
|
|
162
162
|
"type": _get_type_from_field(field_info),
|
|
163
163
|
"default": get_default_value_from_field(field_info),
|
|
164
164
|
"required": field_info.is_required(),
|
|
165
|
-
"help":
|
|
165
|
+
"help": str(help_text),
|
|
166
166
|
"is_flag": is_boolean_flag(field_info),
|
|
167
167
|
"show_default": field_info.default is not PydanticUndefined,
|
|
168
168
|
}
|
|
@@ -58,20 +58,6 @@ class PineconeConnectionConfig(ConnectionConfig):
|
|
|
58
58
|
return index
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
class PineconeUploadStagerConfig(UploadStagerConfig):
|
|
62
|
-
pass
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class PineconeUploaderConfig(UploaderConfig):
|
|
66
|
-
batch_size: Optional[int] = Field(
|
|
67
|
-
default=None,
|
|
68
|
-
description="Optional number of records per batch. Will otherwise limit by size.",
|
|
69
|
-
)
|
|
70
|
-
pool_threads: Optional[int] = Field(
|
|
71
|
-
default=1, description="Optional limit on number of threads to use for upload"
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
|
|
75
61
|
ALLOWED_FIELDS = (
|
|
76
62
|
"element_id",
|
|
77
63
|
"text",
|
|
@@ -86,31 +72,56 @@ ALLOWED_FIELDS = (
|
|
|
86
72
|
"is_continuation",
|
|
87
73
|
"link_urls",
|
|
88
74
|
"link_texts",
|
|
75
|
+
"text_as_html",
|
|
89
76
|
)
|
|
90
77
|
|
|
91
78
|
|
|
79
|
+
class PineconeUploadStagerConfig(UploadStagerConfig):
|
|
80
|
+
metadata_fields: list[str] = Field(
|
|
81
|
+
default=str(ALLOWED_FIELDS),
|
|
82
|
+
description=(
|
|
83
|
+
"which metadata from the source element to map to the payload metadata being sent to "
|
|
84
|
+
"Pinecone."
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class PineconeUploaderConfig(UploaderConfig):
|
|
90
|
+
batch_size: Optional[int] = Field(
|
|
91
|
+
default=None,
|
|
92
|
+
description="Optional number of records per batch. Will otherwise limit by size.",
|
|
93
|
+
)
|
|
94
|
+
pool_threads: Optional[int] = Field(
|
|
95
|
+
default=1, description="Optional limit on number of threads to use for upload"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
92
99
|
@dataclass
|
|
93
100
|
class PineconeUploadStager(UploadStager):
|
|
94
101
|
upload_stager_config: PineconeUploadStagerConfig = field(
|
|
95
102
|
default_factory=lambda: PineconeUploadStagerConfig()
|
|
96
103
|
)
|
|
97
104
|
|
|
98
|
-
|
|
99
|
-
def conform_dict(element_dict: dict) -> dict:
|
|
105
|
+
def conform_dict(self, element_dict: dict) -> dict:
|
|
100
106
|
embeddings = element_dict.pop("embeddings", None)
|
|
101
107
|
metadata: dict[str, Any] = element_dict.pop("metadata", {})
|
|
102
108
|
data_source = metadata.pop("data_source", {})
|
|
103
109
|
coordinates = metadata.pop("coordinates", {})
|
|
104
|
-
|
|
105
|
-
element_dict
|
|
106
|
-
|
|
107
|
-
|
|
110
|
+
pinecone_metadata = {}
|
|
111
|
+
for possible_meta in [element_dict, metadata, data_source, coordinates]:
|
|
112
|
+
pinecone_metadata.update(
|
|
113
|
+
{
|
|
114
|
+
k: v
|
|
115
|
+
for k, v in possible_meta.items()
|
|
116
|
+
if k in self.upload_stager_config.metadata_fields
|
|
117
|
+
}
|
|
118
|
+
)
|
|
108
119
|
|
|
109
120
|
return {
|
|
110
121
|
"id": str(uuid.uuid4()),
|
|
111
122
|
"values": embeddings,
|
|
112
123
|
"metadata": flatten_dict(
|
|
113
|
-
|
|
124
|
+
pinecone_metadata,
|
|
114
125
|
separator="-",
|
|
115
126
|
flatten_lists=True,
|
|
116
127
|
remove_none=True,
|
|
@@ -15,11 +15,11 @@ if TYPE_CHECKING:
|
|
|
15
15
|
class EmbedderConfig(BaseModel):
|
|
16
16
|
embedding_provider: Optional[
|
|
17
17
|
Literal[
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
18
|
+
"openai",
|
|
19
|
+
"huggingface",
|
|
20
|
+
"aws-bedrock",
|
|
21
|
+
"vertexai",
|
|
22
|
+
"voyageai",
|
|
23
23
|
"octoai",
|
|
24
24
|
"mixedbread-ai",
|
|
25
25
|
]
|
|
@@ -114,22 +114,22 @@ class EmbedderConfig(BaseModel):
|
|
|
114
114
|
if self.embedding_model_name:
|
|
115
115
|
kwargs["model_name"] = self.embedding_model_name
|
|
116
116
|
# TODO make this more dynamic to map to encoder configs
|
|
117
|
-
if self.embedding_provider == "
|
|
117
|
+
if self.embedding_provider == "openai":
|
|
118
118
|
return self.get_openai_embedder(embedding_kwargs=kwargs)
|
|
119
119
|
|
|
120
|
-
if self.embedding_provider == "
|
|
120
|
+
if self.embedding_provider == "huggingface":
|
|
121
121
|
return self.get_huggingface_embedder(embedding_kwargs=kwargs)
|
|
122
122
|
|
|
123
123
|
if self.embedding_provider == "octoai":
|
|
124
124
|
return self.get_octoai_embedder(embedding_kwargs=kwargs)
|
|
125
125
|
|
|
126
|
-
if self.embedding_provider == "
|
|
126
|
+
if self.embedding_provider == "aws-bedrock":
|
|
127
127
|
return self.get_bedrock_embedder()
|
|
128
128
|
|
|
129
|
-
if self.embedding_provider == "
|
|
129
|
+
if self.embedding_provider == "vertexai":
|
|
130
130
|
return self.get_vertexai_embedder(embedding_kwargs=kwargs)
|
|
131
131
|
|
|
132
|
-
if self.embedding_provider == "
|
|
132
|
+
if self.embedding_provider == "voyageai":
|
|
133
133
|
return self.get_voyageai_embedder(embedding_kwargs=kwargs)
|
|
134
134
|
if self.embedding_provider == "mixedbread-ai":
|
|
135
135
|
return self.get_mixedbread_embedder(embedding_kwargs=kwargs)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.24
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,13 +22,13 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.13
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
+
Requires-Dist: pydantic>=2.7
|
|
26
|
+
Requires-Dist: tqdm
|
|
27
|
+
Requires-Dist: click
|
|
28
|
+
Requires-Dist: python-dateutil
|
|
29
|
+
Requires-Dist: opentelemetry-sdk
|
|
25
30
|
Requires-Dist: pandas
|
|
26
31
|
Requires-Dist: dataclasses-json
|
|
27
|
-
Requires-Dist: opentelemetry-sdk
|
|
28
|
-
Requires-Dist: python-dateutil
|
|
29
|
-
Requires-Dist: click
|
|
30
|
-
Requires-Dist: tqdm
|
|
31
|
-
Requires-Dist: pydantic>=2.7
|
|
32
32
|
Provides-Extra: airtable
|
|
33
33
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
34
34
|
Provides-Extra: astradb
|
|
@@ -87,9 +87,9 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
87
87
|
Provides-Extra: epub
|
|
88
88
|
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
89
89
|
Provides-Extra: gcs
|
|
90
|
-
Requires-Dist: bs4; extra == "gcs"
|
|
91
90
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
92
91
|
Requires-Dist: fsspec; extra == "gcs"
|
|
92
|
+
Requires-Dist: bs4; extra == "gcs"
|
|
93
93
|
Provides-Extra: github
|
|
94
94
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
95
95
|
Requires-Dist: requests; extra == "github"
|
|
@@ -115,10 +115,10 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
115
115
|
Provides-Extra: msg
|
|
116
116
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
117
117
|
Provides-Extra: notion
|
|
118
|
-
Requires-Dist: httpx; extra == "notion"
|
|
119
118
|
Requires-Dist: notion-client; extra == "notion"
|
|
120
|
-
Requires-Dist: htmlBuilder; extra == "notion"
|
|
121
119
|
Requires-Dist: backoff; extra == "notion"
|
|
120
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
121
|
+
Requires-Dist: httpx; extra == "notion"
|
|
122
122
|
Provides-Extra: odt
|
|
123
123
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
124
124
|
Provides-Extra: onedrive
|
|
@@ -161,8 +161,8 @@ Requires-Dist: fsspec; extra == "s3"
|
|
|
161
161
|
Provides-Extra: salesforce
|
|
162
162
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
163
163
|
Provides-Extra: sftp
|
|
164
|
-
Requires-Dist: fsspec; extra == "sftp"
|
|
165
164
|
Requires-Dist: paramiko; extra == "sftp"
|
|
165
|
+
Requires-Dist: fsspec; extra == "sftp"
|
|
166
166
|
Provides-Extra: sharepoint
|
|
167
167
|
Requires-Dist: msal; extra == "sharepoint"
|
|
168
168
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=i77-gjXpw3EQpetJm6qwuhTR53KoBsdCYSBjHDaGJUQ,43
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
|
-
unstructured_ingest/interfaces.py,sha256=
|
|
4
|
+
unstructured_ingest/interfaces.py,sha256=0r0gQoHJQ4DVSQEVbUPBA3N6WyvGMkR1u6U2SwUvoAQ,31361
|
|
5
5
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
6
6
|
unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
|
|
7
7
|
unstructured_ingest/processor.py,sha256=XKKrvbxsb--5cDzz4hB3-GfWZYyIjJ2ah8FpzQKF_DM,2760
|
|
@@ -9,7 +9,7 @@ unstructured_ingest/cli/__init__.py,sha256=9kNcBOHuXON5lB1MJU9QewEhwPmId56vXqB29
|
|
|
9
9
|
unstructured_ingest/cli/cli.py,sha256=LutBTBYMqboKw8cputHVszpenyfnySzcUC15ifwuYyg,1049
|
|
10
10
|
unstructured_ingest/cli/cmd_factory.py,sha256=UdHm1KacTombpF6DxyTSwTCuApsKHUYw_kVu5Nhcy3Y,364
|
|
11
11
|
unstructured_ingest/cli/common.py,sha256=I0El08FHz5kxw7iz0VWOWPrvcJD1rBgXJSwVIpVmmwU,204
|
|
12
|
-
unstructured_ingest/cli/interfaces.py,sha256=
|
|
12
|
+
unstructured_ingest/cli/interfaces.py,sha256=nWZVXAoLEP08eDPj10c2nwHNbd-HXOHFa4YvEdUJ8y8,24084
|
|
13
13
|
unstructured_ingest/cli/utils.py,sha256=KNhkFNKOeEihc8HlvMz_MTbYVQNFklrBKbC8xg9h1xE,7982
|
|
14
14
|
unstructured_ingest/cli/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
unstructured_ingest/cli/base/cmd.py,sha256=BbfjA2v203Jh-7DL6bzxQ7fOeNixd5BsBMuzXz6h5IQ,583
|
|
@@ -166,15 +166,15 @@ unstructured_ingest/connector/notion/types/database_properties/title.py,sha256=O
|
|
|
166
166
|
unstructured_ingest/connector/notion/types/database_properties/unique_id.py,sha256=H9lKi8rCDPtKmuu7j9CnJoTUr6YmzIF4oXbv_OxuN9k,1162
|
|
167
167
|
unstructured_ingest/connector/notion/types/database_properties/url.py,sha256=iXQ2tVUm9UlKVtDA0NQiFIRJ5PHYW9wOaWt2vFfSVCg,862
|
|
168
168
|
unstructured_ingest/connector/notion/types/database_properties/verification.py,sha256=J_DLjY-v2T6xDGMQ7FkI0YMKMA6SG6Y3yYW7qUD1hKA,2334
|
|
169
|
-
unstructured_ingest/embed/__init__.py,sha256=
|
|
169
|
+
unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
170
170
|
unstructured_ingest/embed/bedrock.py,sha256=5-pKWwOEGHKOHa06wYuKOhvT8Xu72ke6nrpCnRtkAaU,3872
|
|
171
171
|
unstructured_ingest/embed/huggingface.py,sha256=ku_JQr72KBG8n5b6KRkXIbeBGzdgLw_KKIEm1dFK3oM,2729
|
|
172
172
|
unstructured_ingest/embed/interfaces.py,sha256=L5WimR69bmEvliIBlZ8wOCH_YDA9DWteCu6QEsKCV5I,1113
|
|
173
173
|
unstructured_ingest/embed/mixedbreadai.py,sha256=NSrAt1_bjphTHLUnlzzWSBU25UBCZlpYaLdWSRSGyqs,5504
|
|
174
174
|
unstructured_ingest/embed/octoai.py,sha256=0zxAUAMzodGkqMwqMkEvSfgWLNHtEnhdvUofvJDQD1A,2368
|
|
175
175
|
unstructured_ingest/embed/openai.py,sha256=4Ee4A2rQ8OlSh_yiJSFmok_qqRDi1A3KyayB5YiPLFw,2058
|
|
176
|
-
unstructured_ingest/embed/vertexai.py,sha256=
|
|
177
|
-
unstructured_ingest/embed/voyageai.py,sha256=
|
|
176
|
+
unstructured_ingest/embed/vertexai.py,sha256=cgyRyTm_dO_qyedwbIhOQIFvKjCqZBoDh606ykzTYHI,3598
|
|
177
|
+
unstructured_ingest/embed/voyageai.py,sha256=6BWNJUZOqkHSMaO2XPVZVYAVRrAtpMWQZEKp0qgp20Q,2631
|
|
178
178
|
unstructured_ingest/enhanced_dataclass/__init__.py,sha256=gDZOUsv5eo-8jm4Yu7DdDwi101aGbfG7JctTdOYnTOM,151
|
|
179
179
|
unstructured_ingest/enhanced_dataclass/core.py,sha256=d6aUkDynuKX87cHx9_N5UDUWrvISR4jYRFRTvd_avlI,3038
|
|
180
180
|
unstructured_ingest/enhanced_dataclass/dataclasses.py,sha256=aZMsoCzAGRb8Rmh3BTSBFtNr6FmFTY93KYGLk3gYJKQ,1949
|
|
@@ -277,7 +277,7 @@ unstructured_ingest/v2/cli/base/importer.py,sha256=nRt0QQ3qpi264-n_mR0l55C2ddM8n
|
|
|
277
277
|
unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdjtlPhqn5Mg,2872
|
|
278
278
|
unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
279
279
|
unstructured_ingest/v2/cli/utils/click.py,sha256=Wn2s3PuvBCKB0lsK-W7X_Y0eYyWnS6Y9wWo1OhVBOzY,6344
|
|
280
|
-
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=
|
|
280
|
+
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=uJQKpbTC5ysOdVaRq2SWEjG8btBimVZYzX9NVL7xnzs,7500
|
|
281
281
|
unstructured_ingest/v2/interfaces/__init__.py,sha256=Rfa8crx6De7WNOK-EjsWWwFVpsUfCc6gY8B8tQ3ae9I,899
|
|
282
282
|
unstructured_ingest/v2/interfaces/connector.py,sha256=KG0pHdAcpuO5h72xrAkJzADmjxbav31TZ2Wo3PBvwT0,765
|
|
283
283
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=PKT1kr79Mz1urW_8xCyq9sBuK93gDvyTXg5e4ma4htU,2871
|
|
@@ -304,7 +304,7 @@ unstructured_ingest/v2/pipeline/steps/upload.py,sha256=zlgXgwReX9TBOdfTpS9hETah4
|
|
|
304
304
|
unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
|
|
305
305
|
unstructured_ingest/v2/processes/chunker.py,sha256=76PrpCSd8k3DpfdZcl8I10u7vciKzhSV9ZByrrp302g,5476
|
|
306
306
|
unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
|
|
307
|
-
unstructured_ingest/v2/processes/embedder.py,sha256=
|
|
307
|
+
unstructured_ingest/v2/processes/embedder.py,sha256=nFYiOmIJwWLodBt_cC-E5h7zmYB9t3hLu2BWtBStm3g,5977
|
|
308
308
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
309
309
|
unstructured_ingest/v2/processes/partitioner.py,sha256=bpqmZDsKKi6qtxNWdIWBfQmr1ccQUhU0axecpGAUf_4,7739
|
|
310
310
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
@@ -323,7 +323,7 @@ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=ZUlyAQyTt0U1JoapFYH
|
|
|
323
323
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=2_R_hrEAaTU4vJTCK9oKblWTgv6BKjyUhFtC7uq3q2w,4859
|
|
324
324
|
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=ZiUo-dFo1LMOvFwphSLRZiR1PcrN8GWLTHhsh4TU6n0,9207
|
|
325
325
|
unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=dfDSNrWIEk19wuHdlMJpp_SLMOteNPlkDBPlAwu1LVY,6767
|
|
326
|
-
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=
|
|
326
|
+
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=8St-JaVrDdQEVZpRS_TfjFusfjg0bAg3IYyykGFyWdw,7169
|
|
327
327
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
328
328
|
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=hOaV5gBcHFc6N5Rbu3MgM-5Aol1ht-QkNIN4PqjvfxE,19665
|
|
329
329
|
unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=4rVvWKK2iQr03Ff6cB5zjfE1MpN0JyIGpCxxFCDI6hc,5563
|
|
@@ -339,9 +339,9 @@ unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=-_pYHbsBG9FyRyN
|
|
|
339
339
|
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=je1BDqFWlyMfPa4oAMMNFQLLQtCY9quuqx3xjTwF8OQ,6251
|
|
340
340
|
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=dwpyqDq0qceCBWX3zM1hiUlgXB4hzX6ObOr-sh-5CJs,6926
|
|
341
341
|
unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
342
|
-
unstructured_ingest-0.0.
|
|
343
|
-
unstructured_ingest-0.0.
|
|
344
|
-
unstructured_ingest-0.0.
|
|
345
|
-
unstructured_ingest-0.0.
|
|
346
|
-
unstructured_ingest-0.0.
|
|
347
|
-
unstructured_ingest-0.0.
|
|
342
|
+
unstructured_ingest-0.0.24.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
343
|
+
unstructured_ingest-0.0.24.dist-info/METADATA,sha256=rHTF8fy1vNg5NmCBNVdobYWeGgpn_PBKao2z54UbgnE,7108
|
|
344
|
+
unstructured_ingest-0.0.24.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
345
|
+
unstructured_ingest-0.0.24.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
346
|
+
unstructured_ingest-0.0.24.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
|
|
347
|
+
unstructured_ingest-0.0.24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.0.22.dist-info → unstructured_ingest-0.0.24.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|