unstructured-ingest 0.0.21__py3-none-any.whl → 0.0.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/cli/interfaces.py +5 -5
- unstructured_ingest/embed/__init__.py +0 -17
- unstructured_ingest/embed/bedrock.py +56 -19
- unstructured_ingest/embed/huggingface.py +22 -22
- unstructured_ingest/embed/interfaces.py +11 -4
- unstructured_ingest/embed/mixedbreadai.py +17 -17
- unstructured_ingest/embed/octoai.py +7 -7
- unstructured_ingest/embed/openai.py +15 -20
- unstructured_ingest/embed/vertexai.py +26 -18
- unstructured_ingest/embed/voyageai.py +25 -20
- unstructured_ingest/interfaces.py +5 -5
- unstructured_ingest/v2/cli/base/cmd.py +1 -1
- unstructured_ingest/v2/interfaces/connector.py +1 -1
- unstructured_ingest/v2/pipeline/pipeline.py +3 -1
- unstructured_ingest/v2/pipeline/steps/chunk.py +1 -1
- unstructured_ingest/v2/pipeline/steps/download.py +6 -2
- unstructured_ingest/v2/pipeline/steps/embed.py +1 -1
- unstructured_ingest/v2/pipeline/steps/filter.py +1 -1
- unstructured_ingest/v2/pipeline/steps/index.py +4 -2
- unstructured_ingest/v2/pipeline/steps/partition.py +1 -1
- unstructured_ingest/v2/pipeline/steps/stage.py +3 -1
- unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -1
- unstructured_ingest/v2/pipeline/steps/upload.py +6 -2
- unstructured_ingest/v2/processes/connectors/airtable.py +1 -1
- unstructured_ingest/v2/processes/connectors/databricks_volumes.py +1 -1
- unstructured_ingest/v2/processes/connectors/elasticsearch.py +2 -2
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +31 -5
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +31 -2
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +36 -8
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +25 -77
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +30 -1
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +15 -18
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +22 -1
- unstructured_ingest/v2/processes/connectors/milvus.py +2 -2
- unstructured_ingest/v2/processes/connectors/opensearch.py +2 -2
- unstructured_ingest/v2/processes/embedder.py +10 -10
- unstructured_ingest/v2/utils.py +1 -1
- unstructured_ingest-0.0.23.dist-info/METADATA +186 -0
- {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/RECORD +44 -44
- {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/WHEEL +1 -1
- unstructured_ingest-0.0.21.dist-info/METADATA +0 -639
- {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.0.21.dist-info → unstructured_ingest-0.0.23.dist-info}/top_level.txt +0 -0
|
@@ -2,13 +2,15 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
4
|
from pathlib import Path
|
|
5
|
+
from time import time
|
|
5
6
|
from typing import Any, Generator, Optional, Union
|
|
6
7
|
|
|
8
|
+
from dateutil import parser
|
|
7
9
|
from pydantic import Field, Secret
|
|
8
10
|
|
|
9
11
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
10
12
|
from unstructured_ingest.utils.string_and_date_utils import json_to_dict
|
|
11
|
-
from unstructured_ingest.v2.interfaces import DownloadResponse, FileData
|
|
13
|
+
from unstructured_ingest.v2.interfaces import DownloadResponse, FileData, FileDataSourceMetadata
|
|
12
14
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
13
15
|
DestinationRegistryEntry,
|
|
14
16
|
SourceRegistryEntry,
|
|
@@ -106,6 +108,33 @@ class GcsIndexer(FsspecIndexer):
|
|
|
106
108
|
def precheck(self) -> None:
|
|
107
109
|
super().precheck()
|
|
108
110
|
|
|
111
|
+
def get_metadata(self, file_data: dict) -> FileDataSourceMetadata:
|
|
112
|
+
path = file_data["name"]
|
|
113
|
+
date_created = None
|
|
114
|
+
date_modified = None
|
|
115
|
+
if modified_at_str := file_data.get("updated"):
|
|
116
|
+
date_modified = parser.parse(modified_at_str).timestamp()
|
|
117
|
+
if created_at_str := file_data.get("timeCreated"):
|
|
118
|
+
date_created = parser.parse(created_at_str).timestamp()
|
|
119
|
+
|
|
120
|
+
file_size = file_data.get("size") if "size" in file_data else None
|
|
121
|
+
|
|
122
|
+
version = file_data.get("etag")
|
|
123
|
+
record_locator = {
|
|
124
|
+
"protocol": self.index_config.protocol,
|
|
125
|
+
"remote_file_path": self.index_config.remote_url,
|
|
126
|
+
"file_id": file_data.get("id"),
|
|
127
|
+
}
|
|
128
|
+
return FileDataSourceMetadata(
|
|
129
|
+
date_created=date_created,
|
|
130
|
+
date_modified=date_modified,
|
|
131
|
+
date_processed=str(time()),
|
|
132
|
+
version=version,
|
|
133
|
+
url=f"{self.index_config.protocol}://{path}",
|
|
134
|
+
record_locator=record_locator,
|
|
135
|
+
filesize_bytes=file_size,
|
|
136
|
+
)
|
|
137
|
+
|
|
109
138
|
|
|
110
139
|
class GcsDownloaderConfig(FsspecDownloaderConfig):
|
|
111
140
|
pass
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import contextlib
|
|
2
2
|
from dataclasses import dataclass, field
|
|
3
|
-
from datetime import datetime
|
|
4
3
|
from pathlib import Path
|
|
5
4
|
from time import time
|
|
6
5
|
from typing import Any, Generator, Optional
|
|
@@ -69,7 +68,7 @@ class S3ConnectionConfig(FsspecConnectionConfig):
|
|
|
69
68
|
|
|
70
69
|
# Avoid injecting None by filtering out k,v pairs where the value is None
|
|
71
70
|
access_configs.update(
|
|
72
|
-
{k: v for k, v in self.access_config.get_secret_value().
|
|
71
|
+
{k: v for k, v in self.access_config.get_secret_value().model_dump().items() if v}
|
|
73
72
|
)
|
|
74
73
|
return access_configs
|
|
75
74
|
|
|
@@ -80,27 +79,25 @@ class S3Indexer(FsspecIndexer):
|
|
|
80
79
|
index_config: S3IndexerConfig
|
|
81
80
|
connector_type: str = CONNECTOR_TYPE
|
|
82
81
|
|
|
83
|
-
def
|
|
82
|
+
def get_path(self, file_data: dict) -> str:
|
|
83
|
+
return file_data["Key"]
|
|
84
|
+
|
|
85
|
+
def get_metadata(self, file_data: dict) -> FileDataSourceMetadata:
|
|
86
|
+
path = file_data["Key"]
|
|
84
87
|
date_created = None
|
|
85
88
|
date_modified = None
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
pass
|
|
94
|
-
with contextlib.suppress(AttributeError):
|
|
95
|
-
file_size = self.fs.size(path)
|
|
89
|
+
modified = file_data.get("LastModified")
|
|
90
|
+
if modified:
|
|
91
|
+
date_created = str(modified.timestamp())
|
|
92
|
+
date_modified = str(modified.timestamp())
|
|
93
|
+
|
|
94
|
+
file_size = file_data.get("size") if "size" in file_data else None
|
|
95
|
+
file_size = file_size or file_data.get("Size")
|
|
96
96
|
|
|
97
|
-
version = None
|
|
98
|
-
info: dict[str, Any] = self.fs.info(path)
|
|
99
|
-
if etag := info.get("ETag"):
|
|
100
|
-
version = str(etag).rstrip('"').lstrip('"')
|
|
97
|
+
version = file_data.get("ETag").rstrip('"').lstrip('"') if "ETag" in file_data else None
|
|
101
98
|
metadata: dict[str, str] = {}
|
|
102
99
|
with contextlib.suppress(AttributeError):
|
|
103
|
-
metadata = self.fs.metadata(path)
|
|
100
|
+
metadata = self.fs.metadata(path=path)
|
|
104
101
|
record_locator = {
|
|
105
102
|
"protocol": self.index_config.protocol,
|
|
106
103
|
"remote_file_path": self.index_config.remote_url,
|
|
@@ -3,13 +3,14 @@ from __future__ import annotations
|
|
|
3
3
|
import os
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from time import time
|
|
6
7
|
from typing import Any, Generator, Optional
|
|
7
8
|
from urllib.parse import urlparse
|
|
8
9
|
|
|
9
10
|
from pydantic import Field, Secret
|
|
10
11
|
|
|
11
12
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
12
|
-
from unstructured_ingest.v2.interfaces import DownloadResponse, FileData
|
|
13
|
+
from unstructured_ingest.v2.interfaces import DownloadResponse, FileData, FileDataSourceMetadata
|
|
13
14
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
14
15
|
DestinationRegistryEntry,
|
|
15
16
|
SourceRegistryEntry,
|
|
@@ -96,6 +97,26 @@ class SftpIndexer(FsspecIndexer):
|
|
|
96
97
|
def precheck(self) -> None:
|
|
97
98
|
super().precheck()
|
|
98
99
|
|
|
100
|
+
def get_metadata(self, file_data: dict) -> FileDataSourceMetadata:
|
|
101
|
+
path = file_data["name"]
|
|
102
|
+
date_created = file_data.get("time").timestamp() if "time" in file_data else None
|
|
103
|
+
date_modified = file_data.get("mtime").timestamp() if "mtime" in file_data else None
|
|
104
|
+
|
|
105
|
+
file_size = file_data.get("size") if "size" in file_data else None
|
|
106
|
+
|
|
107
|
+
record_locator = {
|
|
108
|
+
"protocol": self.index_config.protocol,
|
|
109
|
+
"remote_file_path": self.index_config.remote_url,
|
|
110
|
+
}
|
|
111
|
+
return FileDataSourceMetadata(
|
|
112
|
+
date_created=date_created,
|
|
113
|
+
date_modified=date_modified,
|
|
114
|
+
date_processed=str(time()),
|
|
115
|
+
url=f"{self.index_config.protocol}://{path}",
|
|
116
|
+
record_locator=record_locator,
|
|
117
|
+
filesize_bytes=file_size,
|
|
118
|
+
)
|
|
119
|
+
|
|
99
120
|
|
|
100
121
|
class SftpDownloaderConfig(FsspecDownloaderConfig):
|
|
101
122
|
remote_url: str = Field(description="Remote fsspec URL formatted as `protocol://dir/path`")
|
|
@@ -48,8 +48,8 @@ class MilvusConnectionConfig(ConnectionConfig):
|
|
|
48
48
|
|
|
49
49
|
def get_connection_kwargs(self) -> dict[str, Any]:
|
|
50
50
|
access_config = self.access_config.get_secret_value()
|
|
51
|
-
access_config_dict = access_config.
|
|
52
|
-
connection_config_dict = self.
|
|
51
|
+
access_config_dict = access_config.model_dump()
|
|
52
|
+
connection_config_dict = self.model_dump()
|
|
53
53
|
connection_config_dict.pop("access_config", None)
|
|
54
54
|
connection_config_dict.update(access_config_dict)
|
|
55
55
|
# Drop any that were not set explicitly
|
|
@@ -101,8 +101,8 @@ class OpenSearchConnectionConfig(ConnectionConfig):
|
|
|
101
101
|
if self.username and access_config.password:
|
|
102
102
|
client_input_kwargs["http_auth"] = (self.username, access_config.password)
|
|
103
103
|
client_input = OpenSearchClientInput(**client_input_kwargs)
|
|
104
|
-
logger.debug(f"opensearch client inputs mapped to: {client_input.
|
|
105
|
-
client_kwargs = client_input.
|
|
104
|
+
logger.debug(f"opensearch client inputs mapped to: {client_input.model_dump()}")
|
|
105
|
+
client_kwargs = client_input.model_dump()
|
|
106
106
|
if client_input.http_auth is not None:
|
|
107
107
|
client_kwargs["http_auth"] = client_input.http_auth.get_secret_value()
|
|
108
108
|
client_kwargs = {k: v for k, v in client_kwargs.items() if v is not None}
|
|
@@ -15,11 +15,11 @@ if TYPE_CHECKING:
|
|
|
15
15
|
class EmbedderConfig(BaseModel):
|
|
16
16
|
embedding_provider: Optional[
|
|
17
17
|
Literal[
|
|
18
|
-
"
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
18
|
+
"openai",
|
|
19
|
+
"huggingface",
|
|
20
|
+
"aws-bedrock",
|
|
21
|
+
"vertexai",
|
|
22
|
+
"voyageai",
|
|
23
23
|
"octoai",
|
|
24
24
|
"mixedbread-ai",
|
|
25
25
|
]
|
|
@@ -114,22 +114,22 @@ class EmbedderConfig(BaseModel):
|
|
|
114
114
|
if self.embedding_model_name:
|
|
115
115
|
kwargs["model_name"] = self.embedding_model_name
|
|
116
116
|
# TODO make this more dynamic to map to encoder configs
|
|
117
|
-
if self.embedding_provider == "
|
|
117
|
+
if self.embedding_provider == "openai":
|
|
118
118
|
return self.get_openai_embedder(embedding_kwargs=kwargs)
|
|
119
119
|
|
|
120
|
-
if self.embedding_provider == "
|
|
120
|
+
if self.embedding_provider == "huggingface":
|
|
121
121
|
return self.get_huggingface_embedder(embedding_kwargs=kwargs)
|
|
122
122
|
|
|
123
123
|
if self.embedding_provider == "octoai":
|
|
124
124
|
return self.get_octoai_embedder(embedding_kwargs=kwargs)
|
|
125
125
|
|
|
126
|
-
if self.embedding_provider == "
|
|
126
|
+
if self.embedding_provider == "aws-bedrock":
|
|
127
127
|
return self.get_bedrock_embedder()
|
|
128
128
|
|
|
129
|
-
if self.embedding_provider == "
|
|
129
|
+
if self.embedding_provider == "vertexai":
|
|
130
130
|
return self.get_vertexai_embedder(embedding_kwargs=kwargs)
|
|
131
131
|
|
|
132
|
-
if self.embedding_provider == "
|
|
132
|
+
if self.embedding_provider == "voyageai":
|
|
133
133
|
return self.get_voyageai_embedder(embedding_kwargs=kwargs)
|
|
134
134
|
if self.embedding_provider == "mixedbread-ai":
|
|
135
135
|
return self.get_mixedbread_embedder(embedding_kwargs=kwargs)
|
unstructured_ingest/v2/utils.py
CHANGED
|
@@ -19,7 +19,7 @@ def is_secret(value: Any) -> bool:
|
|
|
19
19
|
|
|
20
20
|
def serialize_base_model(model: BaseModel) -> dict:
|
|
21
21
|
# To get the full serialized dict regardless of if values are marked as Secret
|
|
22
|
-
model_dict = model.
|
|
22
|
+
model_dict = model.model_dump()
|
|
23
23
|
return serialize_base_dict(model_dict=model_dict)
|
|
24
24
|
|
|
25
25
|
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: unstructured-ingest
|
|
3
|
+
Version: 0.0.23
|
|
4
|
+
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
|
+
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
|
+
Author: Unstructured Technologies
|
|
7
|
+
Author-email: devops@unstructuredai.io
|
|
8
|
+
License: Apache-2.0
|
|
9
|
+
Keywords: NLP PDF HTML CV XML parsing preprocessing
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Education
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
|
+
Requires-Python: >=3.9.0,<3.13
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
License-File: LICENSE.md
|
|
25
|
+
Requires-Dist: pydantic>=2.7
|
|
26
|
+
Requires-Dist: python-dateutil
|
|
27
|
+
Requires-Dist: click
|
|
28
|
+
Requires-Dist: opentelemetry-sdk
|
|
29
|
+
Requires-Dist: pandas
|
|
30
|
+
Requires-Dist: dataclasses-json
|
|
31
|
+
Requires-Dist: tqdm
|
|
32
|
+
Provides-Extra: airtable
|
|
33
|
+
Requires-Dist: pyairtable; extra == "airtable"
|
|
34
|
+
Provides-Extra: astradb
|
|
35
|
+
Requires-Dist: astrapy; extra == "astradb"
|
|
36
|
+
Provides-Extra: azure
|
|
37
|
+
Requires-Dist: fsspec; extra == "azure"
|
|
38
|
+
Requires-Dist: adlfs; extra == "azure"
|
|
39
|
+
Provides-Extra: azure-cognitive-search
|
|
40
|
+
Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
|
|
41
|
+
Provides-Extra: bedrock
|
|
42
|
+
Requires-Dist: boto3; extra == "bedrock"
|
|
43
|
+
Provides-Extra: biomed
|
|
44
|
+
Requires-Dist: bs4; extra == "biomed"
|
|
45
|
+
Requires-Dist: requests; extra == "biomed"
|
|
46
|
+
Provides-Extra: box
|
|
47
|
+
Requires-Dist: fsspec; extra == "box"
|
|
48
|
+
Requires-Dist: boxfs; extra == "box"
|
|
49
|
+
Provides-Extra: chroma
|
|
50
|
+
Requires-Dist: chromadb; extra == "chroma"
|
|
51
|
+
Provides-Extra: clarifai
|
|
52
|
+
Requires-Dist: clarifai; extra == "clarifai"
|
|
53
|
+
Provides-Extra: confluence
|
|
54
|
+
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
55
|
+
Requires-Dist: requests; extra == "confluence"
|
|
56
|
+
Provides-Extra: couchbase
|
|
57
|
+
Requires-Dist: couchbase; extra == "couchbase"
|
|
58
|
+
Provides-Extra: csv
|
|
59
|
+
Requires-Dist: unstructured[tsv]; extra == "csv"
|
|
60
|
+
Provides-Extra: databricks-volumes
|
|
61
|
+
Requires-Dist: databricks-sdk; extra == "databricks-volumes"
|
|
62
|
+
Provides-Extra: delta-table
|
|
63
|
+
Requires-Dist: fsspec; extra == "delta-table"
|
|
64
|
+
Requires-Dist: deltalake; extra == "delta-table"
|
|
65
|
+
Provides-Extra: discord
|
|
66
|
+
Requires-Dist: discord-py; extra == "discord"
|
|
67
|
+
Provides-Extra: doc
|
|
68
|
+
Requires-Dist: unstructured[docx]; extra == "doc"
|
|
69
|
+
Provides-Extra: docx
|
|
70
|
+
Requires-Dist: unstructured[docx]; extra == "docx"
|
|
71
|
+
Provides-Extra: dropbox
|
|
72
|
+
Requires-Dist: fsspec; extra == "dropbox"
|
|
73
|
+
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
74
|
+
Provides-Extra: elasticsearch
|
|
75
|
+
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
76
|
+
Provides-Extra: embed-huggingface
|
|
77
|
+
Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
78
|
+
Provides-Extra: embed-mixedbreadai
|
|
79
|
+
Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
80
|
+
Provides-Extra: embed-octoai
|
|
81
|
+
Requires-Dist: openai; extra == "embed-octoai"
|
|
82
|
+
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
83
|
+
Provides-Extra: embed-vertexai
|
|
84
|
+
Requires-Dist: vertexai; extra == "embed-vertexai"
|
|
85
|
+
Provides-Extra: embed-voyageai
|
|
86
|
+
Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
87
|
+
Provides-Extra: epub
|
|
88
|
+
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
89
|
+
Provides-Extra: gcs
|
|
90
|
+
Requires-Dist: bs4; extra == "gcs"
|
|
91
|
+
Requires-Dist: fsspec; extra == "gcs"
|
|
92
|
+
Requires-Dist: gcsfs; extra == "gcs"
|
|
93
|
+
Provides-Extra: github
|
|
94
|
+
Requires-Dist: requests; extra == "github"
|
|
95
|
+
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
96
|
+
Provides-Extra: gitlab
|
|
97
|
+
Requires-Dist: python-gitlab; extra == "gitlab"
|
|
98
|
+
Provides-Extra: google-drive
|
|
99
|
+
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
100
|
+
Provides-Extra: hubspot
|
|
101
|
+
Requires-Dist: urllib3; extra == "hubspot"
|
|
102
|
+
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
103
|
+
Provides-Extra: jira
|
|
104
|
+
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
105
|
+
Provides-Extra: kafka
|
|
106
|
+
Requires-Dist: confluent-kafka; extra == "kafka"
|
|
107
|
+
Provides-Extra: kdbai
|
|
108
|
+
Requires-Dist: kdbai-client; extra == "kdbai"
|
|
109
|
+
Provides-Extra: md
|
|
110
|
+
Requires-Dist: unstructured[md]; extra == "md"
|
|
111
|
+
Provides-Extra: milvus
|
|
112
|
+
Requires-Dist: pymilvus; extra == "milvus"
|
|
113
|
+
Provides-Extra: mongodb
|
|
114
|
+
Requires-Dist: pymongo; extra == "mongodb"
|
|
115
|
+
Provides-Extra: msg
|
|
116
|
+
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
117
|
+
Provides-Extra: notion
|
|
118
|
+
Requires-Dist: backoff; extra == "notion"
|
|
119
|
+
Requires-Dist: httpx; extra == "notion"
|
|
120
|
+
Requires-Dist: htmlBuilder; extra == "notion"
|
|
121
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
122
|
+
Provides-Extra: odt
|
|
123
|
+
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
124
|
+
Provides-Extra: onedrive
|
|
125
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
126
|
+
Requires-Dist: msal; extra == "onedrive"
|
|
127
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
128
|
+
Provides-Extra: openai
|
|
129
|
+
Requires-Dist: openai; extra == "openai"
|
|
130
|
+
Requires-Dist: tiktoken; extra == "openai"
|
|
131
|
+
Provides-Extra: opensearch
|
|
132
|
+
Requires-Dist: opensearch-py; extra == "opensearch"
|
|
133
|
+
Provides-Extra: org
|
|
134
|
+
Requires-Dist: unstructured[org]; extra == "org"
|
|
135
|
+
Provides-Extra: outlook
|
|
136
|
+
Requires-Dist: msal; extra == "outlook"
|
|
137
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
138
|
+
Provides-Extra: pdf
|
|
139
|
+
Requires-Dist: unstructured[pdf]; extra == "pdf"
|
|
140
|
+
Provides-Extra: pinecone
|
|
141
|
+
Requires-Dist: pinecone-client>=3.7.1; extra == "pinecone"
|
|
142
|
+
Provides-Extra: postgres
|
|
143
|
+
Requires-Dist: psycopg2-binary; extra == "postgres"
|
|
144
|
+
Provides-Extra: ppt
|
|
145
|
+
Requires-Dist: unstructured[pptx]; extra == "ppt"
|
|
146
|
+
Provides-Extra: pptx
|
|
147
|
+
Requires-Dist: unstructured[pptx]; extra == "pptx"
|
|
148
|
+
Provides-Extra: qdrant
|
|
149
|
+
Requires-Dist: qdrant-client; extra == "qdrant"
|
|
150
|
+
Provides-Extra: reddit
|
|
151
|
+
Requires-Dist: praw; extra == "reddit"
|
|
152
|
+
Provides-Extra: remote
|
|
153
|
+
Requires-Dist: unstructured-client>=0.25.8; extra == "remote"
|
|
154
|
+
Provides-Extra: rst
|
|
155
|
+
Requires-Dist: unstructured[rst]; extra == "rst"
|
|
156
|
+
Provides-Extra: rtf
|
|
157
|
+
Requires-Dist: unstructured[rtf]; extra == "rtf"
|
|
158
|
+
Provides-Extra: s3
|
|
159
|
+
Requires-Dist: fsspec; extra == "s3"
|
|
160
|
+
Requires-Dist: s3fs; extra == "s3"
|
|
161
|
+
Provides-Extra: salesforce
|
|
162
|
+
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
163
|
+
Provides-Extra: sftp
|
|
164
|
+
Requires-Dist: paramiko; extra == "sftp"
|
|
165
|
+
Requires-Dist: fsspec; extra == "sftp"
|
|
166
|
+
Provides-Extra: sharepoint
|
|
167
|
+
Requires-Dist: msal; extra == "sharepoint"
|
|
168
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
169
|
+
Provides-Extra: singlestore
|
|
170
|
+
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
171
|
+
Provides-Extra: slack
|
|
172
|
+
Requires-Dist: slack-sdk; extra == "slack"
|
|
173
|
+
Provides-Extra: tsv
|
|
174
|
+
Requires-Dist: unstructured[tsv]; extra == "tsv"
|
|
175
|
+
Provides-Extra: vectara
|
|
176
|
+
Requires-Dist: requests; extra == "vectara"
|
|
177
|
+
Provides-Extra: weaviate
|
|
178
|
+
Requires-Dist: weaviate-client; extra == "weaviate"
|
|
179
|
+
Provides-Extra: wikipedia
|
|
180
|
+
Requires-Dist: wikipedia; extra == "wikipedia"
|
|
181
|
+
Provides-Extra: xlsx
|
|
182
|
+
Requires-Dist: unstructured[xlsx]; extra == "xlsx"
|
|
183
|
+
|
|
184
|
+
# Unstructured Ingest
|
|
185
|
+
|
|
186
|
+
For details, see the [Unstructured Ingest overview](https://docs.unstructured.io/ingestion/overview) in the Unstructured documentation.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=HgbcmBIk6mQp0Bz81M53L-kPIBJnMYIFOGkRL73EChs,43
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
|
-
unstructured_ingest/interfaces.py,sha256=
|
|
4
|
+
unstructured_ingest/interfaces.py,sha256=0r0gQoHJQ4DVSQEVbUPBA3N6WyvGMkR1u6U2SwUvoAQ,31361
|
|
5
5
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
6
6
|
unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
|
|
7
7
|
unstructured_ingest/processor.py,sha256=XKKrvbxsb--5cDzz4hB3-GfWZYyIjJ2ah8FpzQKF_DM,2760
|
|
@@ -9,7 +9,7 @@ unstructured_ingest/cli/__init__.py,sha256=9kNcBOHuXON5lB1MJU9QewEhwPmId56vXqB29
|
|
|
9
9
|
unstructured_ingest/cli/cli.py,sha256=LutBTBYMqboKw8cputHVszpenyfnySzcUC15ifwuYyg,1049
|
|
10
10
|
unstructured_ingest/cli/cmd_factory.py,sha256=UdHm1KacTombpF6DxyTSwTCuApsKHUYw_kVu5Nhcy3Y,364
|
|
11
11
|
unstructured_ingest/cli/common.py,sha256=I0El08FHz5kxw7iz0VWOWPrvcJD1rBgXJSwVIpVmmwU,204
|
|
12
|
-
unstructured_ingest/cli/interfaces.py,sha256=
|
|
12
|
+
unstructured_ingest/cli/interfaces.py,sha256=nWZVXAoLEP08eDPj10c2nwHNbd-HXOHFa4YvEdUJ8y8,24084
|
|
13
13
|
unstructured_ingest/cli/utils.py,sha256=KNhkFNKOeEihc8HlvMz_MTbYVQNFklrBKbC8xg9h1xE,7982
|
|
14
14
|
unstructured_ingest/cli/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
unstructured_ingest/cli/base/cmd.py,sha256=BbfjA2v203Jh-7DL6bzxQ7fOeNixd5BsBMuzXz6h5IQ,583
|
|
@@ -166,15 +166,15 @@ unstructured_ingest/connector/notion/types/database_properties/title.py,sha256=O
|
|
|
166
166
|
unstructured_ingest/connector/notion/types/database_properties/unique_id.py,sha256=H9lKi8rCDPtKmuu7j9CnJoTUr6YmzIF4oXbv_OxuN9k,1162
|
|
167
167
|
unstructured_ingest/connector/notion/types/database_properties/url.py,sha256=iXQ2tVUm9UlKVtDA0NQiFIRJ5PHYW9wOaWt2vFfSVCg,862
|
|
168
168
|
unstructured_ingest/connector/notion/types/database_properties/verification.py,sha256=J_DLjY-v2T6xDGMQ7FkI0YMKMA6SG6Y3yYW7qUD1hKA,2334
|
|
169
|
-
unstructured_ingest/embed/__init__.py,sha256=
|
|
170
|
-
unstructured_ingest/embed/bedrock.py,sha256=
|
|
171
|
-
unstructured_ingest/embed/huggingface.py,sha256=
|
|
172
|
-
unstructured_ingest/embed/interfaces.py,sha256=
|
|
173
|
-
unstructured_ingest/embed/mixedbreadai.py,sha256=
|
|
174
|
-
unstructured_ingest/embed/octoai.py,sha256=
|
|
175
|
-
unstructured_ingest/embed/openai.py,sha256=
|
|
176
|
-
unstructured_ingest/embed/vertexai.py,sha256=
|
|
177
|
-
unstructured_ingest/embed/voyageai.py,sha256=
|
|
169
|
+
unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
170
|
+
unstructured_ingest/embed/bedrock.py,sha256=5-pKWwOEGHKOHa06wYuKOhvT8Xu72ke6nrpCnRtkAaU,3872
|
|
171
|
+
unstructured_ingest/embed/huggingface.py,sha256=ku_JQr72KBG8n5b6KRkXIbeBGzdgLw_KKIEm1dFK3oM,2729
|
|
172
|
+
unstructured_ingest/embed/interfaces.py,sha256=L5WimR69bmEvliIBlZ8wOCH_YDA9DWteCu6QEsKCV5I,1113
|
|
173
|
+
unstructured_ingest/embed/mixedbreadai.py,sha256=NSrAt1_bjphTHLUnlzzWSBU25UBCZlpYaLdWSRSGyqs,5504
|
|
174
|
+
unstructured_ingest/embed/octoai.py,sha256=0zxAUAMzodGkqMwqMkEvSfgWLNHtEnhdvUofvJDQD1A,2368
|
|
175
|
+
unstructured_ingest/embed/openai.py,sha256=4Ee4A2rQ8OlSh_yiJSFmok_qqRDi1A3KyayB5YiPLFw,2058
|
|
176
|
+
unstructured_ingest/embed/vertexai.py,sha256=cgyRyTm_dO_qyedwbIhOQIFvKjCqZBoDh606ykzTYHI,3598
|
|
177
|
+
unstructured_ingest/embed/voyageai.py,sha256=6BWNJUZOqkHSMaO2XPVZVYAVRrAtpMWQZEKp0qgp20Q,2631
|
|
178
178
|
unstructured_ingest/enhanced_dataclass/__init__.py,sha256=gDZOUsv5eo-8jm4Yu7DdDwi101aGbfG7JctTdOYnTOM,151
|
|
179
179
|
unstructured_ingest/enhanced_dataclass/core.py,sha256=d6aUkDynuKX87cHx9_N5UDUWrvISR4jYRFRTvd_avlI,3038
|
|
180
180
|
unstructured_ingest/enhanced_dataclass/dataclasses.py,sha256=aZMsoCzAGRb8Rmh3BTSBFtNr6FmFTY93KYGLk3gYJKQ,1949
|
|
@@ -266,12 +266,12 @@ unstructured_ingest/v2/logger.py,sha256=wcln4s5Nyp2fjjJux9iM3d6t9aQFNJ2H1IAZXmIk
|
|
|
266
266
|
unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
|
|
267
267
|
unstructured_ingest/v2/otel.py,sha256=2fGj1c7cVcC3J8NwL6MNYhyPEAXiB33DsilvRDkrdLo,4130
|
|
268
268
|
unstructured_ingest/v2/unstructured_api.py,sha256=1EQVuL-TojmFxWoWFzXr1oCFPEC3IkCQqhjXM8spdTY,3373
|
|
269
|
-
unstructured_ingest/v2/utils.py,sha256=
|
|
269
|
+
unstructured_ingest/v2/utils.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
270
270
|
unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
271
271
|
unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
|
|
272
272
|
unstructured_ingest/v2/cli/cmds.py,sha256=wWUTbvvxEqKAy6bNE6XhPnj0ELMeSbb9_r1NZl58xMM,489
|
|
273
273
|
unstructured_ingest/v2/cli/base/__init__.py,sha256=zXCa7F4FMqItmzxfUIVmyI-CeGh8X85yF8lRxwX_OYQ,83
|
|
274
|
-
unstructured_ingest/v2/cli/base/cmd.py,sha256=
|
|
274
|
+
unstructured_ingest/v2/cli/base/cmd.py,sha256=fO1gXvMFgRMoXgVF4Nwk8J-MYU-U59ubchZU_Gx1mK4,11490
|
|
275
275
|
unstructured_ingest/v2/cli/base/dest.py,sha256=zDjqek7anr0JQ2ptEl8KIAsUXuCuHRnBQnJhoPj4NVM,3198
|
|
276
276
|
unstructured_ingest/v2/cli/base/importer.py,sha256=nRt0QQ3qpi264-n_mR0l55C2ddM8nowTNzT1jsWaam8,1128
|
|
277
277
|
unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdjtlPhqn5Mg,2872
|
|
@@ -279,7 +279,7 @@ unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
279
279
|
unstructured_ingest/v2/cli/utils/click.py,sha256=Wn2s3PuvBCKB0lsK-W7X_Y0eYyWnS6Y9wWo1OhVBOzY,6344
|
|
280
280
|
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=73DKHQQ6Tm0Lz5NCRduDlyfOhY2KH-MZN1n6jUgrsuU,7480
|
|
281
281
|
unstructured_ingest/v2/interfaces/__init__.py,sha256=Rfa8crx6De7WNOK-EjsWWwFVpsUfCc6gY8B8tQ3ae9I,899
|
|
282
|
-
unstructured_ingest/v2/interfaces/connector.py,sha256=
|
|
282
|
+
unstructured_ingest/v2/interfaces/connector.py,sha256=KG0pHdAcpuO5h72xrAkJzADmjxbav31TZ2Wo3PBvwT0,765
|
|
283
283
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=PKT1kr79Mz1urW_8xCyq9sBuK93gDvyTXg5e4ma4htU,2871
|
|
284
284
|
unstructured_ingest/v2/interfaces/file_data.py,sha256=ieJK-hqHCEOmoYNGoFbCHziSaZyMtRS9VpSoYbwoKCE,1944
|
|
285
285
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=Bd1S-gTLsxhJBLEh1lYm_gXqwQLaEZMoqPq9yGxtN_E,713
|
|
@@ -290,39 +290,39 @@ unstructured_ingest/v2/interfaces/uploader.py,sha256=JmZDl1blJa5rS61YHCae3Hfet84
|
|
|
290
290
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
291
291
|
unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
|
|
292
292
|
unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
|
|
293
|
-
unstructured_ingest/v2/pipeline/pipeline.py,sha256=
|
|
293
|
+
unstructured_ingest/v2/pipeline/pipeline.py,sha256=x6hanD7Cj7Wd5MBUvb33UwXQMZxubzwlAiYyBCMukuc,15693
|
|
294
294
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
295
|
-
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=
|
|
296
|
-
unstructured_ingest/v2/pipeline/steps/download.py,sha256=
|
|
297
|
-
unstructured_ingest/v2/pipeline/steps/embed.py,sha256
|
|
298
|
-
unstructured_ingest/v2/pipeline/steps/filter.py,sha256=
|
|
299
|
-
unstructured_ingest/v2/pipeline/steps/index.py,sha256=
|
|
300
|
-
unstructured_ingest/v2/pipeline/steps/partition.py,sha256
|
|
301
|
-
unstructured_ingest/v2/pipeline/steps/stage.py,sha256=
|
|
302
|
-
unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=
|
|
303
|
-
unstructured_ingest/v2/pipeline/steps/upload.py,sha256=
|
|
295
|
+
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=rYVcHSXeQSzWszg6VmtYlNc66Gsx-22Ti0BxPyQaJak,3135
|
|
296
|
+
unstructured_ingest/v2/pipeline/steps/download.py,sha256=lzvOl5SoUK6OCCVVeG4CzdPIGj6eKKCGdciNo_0RMNk,8173
|
|
297
|
+
unstructured_ingest/v2/pipeline/steps/embed.py,sha256=-YFvmchdsonWiSXxaD7PJfuUUtMLklaQM_8kZCQxCdM,3113
|
|
298
|
+
unstructured_ingest/v2/pipeline/steps/filter.py,sha256=q7bNieaFMprqoF8Mx7w-ZN6jyA5peiGeTGyPtvcV-uw,1199
|
|
299
|
+
unstructured_ingest/v2/pipeline/steps/index.py,sha256=nfDo-wt5sooKtMHKG7sI42m1L44uw-pxErDlDB1engw,2678
|
|
300
|
+
unstructured_ingest/v2/pipeline/steps/partition.py,sha256=9MQViptxK3ALKco8uE4gK9PpEoGq5JjzyU14C_18blU,3193
|
|
301
|
+
unstructured_ingest/v2/pipeline/steps/stage.py,sha256=cphKgHScLz2rNLZRI5Olsb6dAH-MKGu3p6MYS1BEzkA,2246
|
|
302
|
+
unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=CFSy4tGp6BAvF0oIwWFN8v4zFzh5pRDeESjEn5iP9hE,1756
|
|
303
|
+
unstructured_ingest/v2/pipeline/steps/upload.py,sha256=zlgXgwReX9TBOdfTpS9hETah4SeSmzPB2g8dAGfLIvM,1987
|
|
304
304
|
unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
|
|
305
305
|
unstructured_ingest/v2/processes/chunker.py,sha256=76PrpCSd8k3DpfdZcl8I10u7vciKzhSV9ZByrrp302g,5476
|
|
306
306
|
unstructured_ingest/v2/processes/connector_registry.py,sha256=vkEe6jpgdYtZCxMj59s5atWGgmPuxAEXRUoTt-MJ7wc,2198
|
|
307
|
-
unstructured_ingest/v2/processes/embedder.py,sha256=
|
|
307
|
+
unstructured_ingest/v2/processes/embedder.py,sha256=nFYiOmIJwWLodBt_cC-E5h7zmYB9t3hLu2BWtBStm3g,5977
|
|
308
308
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
309
309
|
unstructured_ingest/v2/processes/partitioner.py,sha256=bpqmZDsKKi6qtxNWdIWBfQmr1ccQUhU0axecpGAUf_4,7739
|
|
310
310
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
311
311
|
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=6iBdoH6BW8oMK1ZvEi0IgEchuk0cNUPoNIaikpzeML8,4992
|
|
312
|
-
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=
|
|
312
|
+
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=Yi7PEv_FejZ9_y3BPY3gu5YGVfeLh-9YX-qLyQHjJsY,8921
|
|
313
313
|
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=ZctZRfXcOAMBGPkKgHvhTmV_-2F0YN5vqwfY9UCHIlU,5791
|
|
314
314
|
unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=S55v7TXu30rEdgythMBB_2VcuomyMPmcPtLYykbhw_E,8466
|
|
315
315
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=skrxRPHZ8y3JxNa0dt5SVitHiDQ5WVxLvY_kh2-QUrQ,8029
|
|
316
316
|
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=SONLywyEfoAlLc-HPabXeGzoiwKnekMHIbRMXd4CGXs,12146
|
|
317
|
-
unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=
|
|
318
|
-
unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=
|
|
317
|
+
unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=E_4DzeemC4mhZsVuLmSXtfy4MR1MoU6CNyvpRqsKnJU,6030
|
|
318
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=ojxMUHkLa6ZG50aTGn2YWhDHZ1n38uFRn5p8_ghAIvM,16762
|
|
319
319
|
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=7xOQthcqBd9auJxB0nxZlhh1vdjXpMX_CtQZa6YfZz0,13088
|
|
320
320
|
unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=D71gt8fsPOXi2-Rir8mATw6dRM3BdzYGnn62qG1iaBw,5586
|
|
321
321
|
unstructured_ingest/v2/processes/connectors/local.py,sha256=a3stgnIkhBbXPIQD0O-RaRM-Eb-szHj9Yy4Fz881-9c,6723
|
|
322
|
-
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=
|
|
322
|
+
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=ZUlyAQyTt0U1JoapFYHQW3IIaGYY50b3URDSLEAFjtk,7687
|
|
323
323
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=2_R_hrEAaTU4vJTCK9oKblWTgv6BKjyUhFtC7uq3q2w,4859
|
|
324
324
|
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=ZiUo-dFo1LMOvFwphSLRZiR1PcrN8GWLTHhsh4TU6n0,9207
|
|
325
|
-
unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=
|
|
325
|
+
unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=dfDSNrWIEk19wuHdlMJpp_SLMOteNPlkDBPlAwu1LVY,6767
|
|
326
326
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=gCueI1Px7UkI1flNovLMRvcbPGczHI3IlYhOPYlb3WU,6748
|
|
327
327
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
328
328
|
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=hOaV5gBcHFc6N5Rbu3MgM-5Aol1ht-QkNIN4PqjvfxE,19665
|
|
@@ -331,17 +331,17 @@ unstructured_ingest/v2/processes/connectors/sql.py,sha256=srj2ECKnkGR_iEFBdpa8sx
|
|
|
331
331
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
|
|
332
332
|
unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=Ss0YyD5T6k-00eJ6dr5lSo2H0LcOjVTMmozehyTvnAo,8866
|
|
333
333
|
unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
|
|
334
|
-
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=
|
|
335
|
-
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=
|
|
336
|
-
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=
|
|
337
|
-
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=
|
|
338
|
-
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256
|
|
339
|
-
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=
|
|
340
|
-
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=
|
|
334
|
+
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=Y01BuVRql0Kvzc_cdaZE9dDGYjJzrwJu-etfUrEGcUU,7061
|
|
335
|
+
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=Cjk0LUxqOCDbme0GmnD_5_b1hfStjI23cKw6BquKNrg,5488
|
|
336
|
+
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=NNAxIRdOQxUncfwhu7J7SnQRM6BSStNOyQZi-4E51iY,5816
|
|
337
|
+
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=usLzU2NA5D_a1juhja4jyJP_CzW4h-5rZ22bWVwvZGQ,10853
|
|
338
|
+
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=-_pYHbsBG9FyRyNIaf_xyFbPiiR7pnWEEg_8mp0rIZ8,7053
|
|
339
|
+
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=je1BDqFWlyMfPa4oAMMNFQLLQtCY9quuqx3xjTwF8OQ,6251
|
|
340
|
+
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=dwpyqDq0qceCBWX3zM1hiUlgXB4hzX6ObOr-sh-5CJs,6926
|
|
341
341
|
unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
342
|
-
unstructured_ingest-0.0.
|
|
343
|
-
unstructured_ingest-0.0.
|
|
344
|
-
unstructured_ingest-0.0.
|
|
345
|
-
unstructured_ingest-0.0.
|
|
346
|
-
unstructured_ingest-0.0.
|
|
347
|
-
unstructured_ingest-0.0.
|
|
342
|
+
unstructured_ingest-0.0.23.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
343
|
+
unstructured_ingest-0.0.23.dist-info/METADATA,sha256=iWfV6hzGvmClCO7_huz8s-h9FST1mJsc-mUHZQaGQU4,7108
|
|
344
|
+
unstructured_ingest-0.0.23.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
345
|
+
unstructured_ingest-0.0.23.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
346
|
+
unstructured_ingest-0.0.23.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
|
|
347
|
+
unstructured_ingest-0.0.23.dist-info/RECORD,,
|