unstructured-ingest 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/cli/base/cmd.py +10 -0
- unstructured_ingest/v2/cli/base/src.py +2 -0
- unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py +1 -9
- unstructured_ingest/v2/cli/cmds/local.py +0 -8
- unstructured_ingest/v2/cli/configs/__init__.py +8 -1
- unstructured_ingest/v2/cli/configs/filter.py +28 -0
- unstructured_ingest/v2/interfaces/__init__.py +2 -1
- unstructured_ingest/v2/interfaces/downloader.py +9 -3
- unstructured_ingest/v2/interfaces/file_data.py +6 -1
- unstructured_ingest/v2/interfaces/process.py +3 -0
- unstructured_ingest/v2/pipeline/interfaces.py +3 -5
- unstructured_ingest/v2/pipeline/pipeline.py +72 -2
- unstructured_ingest/v2/pipeline/steps/download.py +77 -13
- unstructured_ingest/v2/pipeline/steps/filter.py +40 -0
- unstructured_ingest/v2/processes/connectors/astra.py +8 -0
- unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py +8 -0
- unstructured_ingest/v2/processes/connectors/chroma.py +8 -6
- unstructured_ingest/v2/processes/connectors/databricks_volumes.py +9 -0
- unstructured_ingest/v2/processes/connectors/elasticsearch.py +23 -9
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +22 -31
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +13 -5
- unstructured_ingest/v2/processes/connectors/google_drive.py +13 -9
- unstructured_ingest/v2/processes/connectors/local.py +15 -15
- unstructured_ingest/v2/processes/connectors/mongodb.py +10 -4
- unstructured_ingest/v2/processes/connectors/onedrive.py +14 -2
- unstructured_ingest/v2/processes/connectors/pinecone.py +6 -3
- unstructured_ingest/v2/processes/connectors/salesforce.py +10 -8
- unstructured_ingest/v2/processes/connectors/sharepoint.py +14 -8
- unstructured_ingest/v2/processes/connectors/sql.py +24 -9
- unstructured_ingest/v2/processes/connectors/weaviate.py +13 -5
- unstructured_ingest/v2/processes/filter.py +54 -0
- {unstructured_ingest-0.0.1.dist-info → unstructured_ingest-0.0.2.dist-info}/METADATA +13 -13
- {unstructured_ingest-0.0.1.dist-info → unstructured_ingest-0.0.2.dist-info}/RECORD +37 -34
- {unstructured_ingest-0.0.1.dist-info → unstructured_ingest-0.0.2.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.0.1.dist-info → unstructured_ingest-0.0.2.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.0.1.dist-info → unstructured_ingest-0.0.2.dist-info}/top_level.txt +0 -0
|
@@ -4,13 +4,14 @@ import uuid
|
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from datetime import date, datetime
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Optional, Union
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
10
10
|
import pandas as pd
|
|
11
11
|
from dateutil import parser
|
|
12
12
|
|
|
13
13
|
from unstructured_ingest.enhanced_dataclass import enhanced_field
|
|
14
|
+
from unstructured_ingest.error import DestinationConnectionError
|
|
14
15
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
15
16
|
from unstructured_ingest.v2.interfaces import (
|
|
16
17
|
AccessConfig,
|
|
@@ -25,6 +26,11 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
25
26
|
from unstructured_ingest.v2.logger import logger
|
|
26
27
|
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
27
28
|
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from sqlite3 import Connection as SqliteConnection
|
|
31
|
+
|
|
32
|
+
from psycopg2.extensions import connection as PostgresConnection
|
|
33
|
+
|
|
28
34
|
CONNECTOR_TYPE = "sql"
|
|
29
35
|
ELEMENTS_TABLE_NAME = "elements"
|
|
30
36
|
|
|
@@ -41,7 +47,7 @@ class DatabaseType(str, enum.Enum):
|
|
|
41
47
|
|
|
42
48
|
|
|
43
49
|
@dataclass
|
|
44
|
-
class
|
|
50
|
+
class SQLConnectionConfig(ConnectionConfig):
|
|
45
51
|
db_type: DatabaseType = (
|
|
46
52
|
# required default value here because of parent class
|
|
47
53
|
DatabaseType.SQLITE
|
|
@@ -134,7 +140,7 @@ class SQLUploadStager(UploadStager):
|
|
|
134
140
|
**kwargs: Any,
|
|
135
141
|
) -> Path:
|
|
136
142
|
with open(elements_filepath) as elements_file:
|
|
137
|
-
elements_contents = json.load(elements_file)
|
|
143
|
+
elements_contents: list[dict] = json.load(elements_file)
|
|
138
144
|
output_path = Path(output_dir) / Path(f"{output_filename}.json")
|
|
139
145
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
140
146
|
|
|
@@ -151,7 +157,7 @@ class SQLUploadStager(UploadStager):
|
|
|
151
157
|
data["id"] = str(uuid.uuid4())
|
|
152
158
|
|
|
153
159
|
# remove extraneous, not supported columns
|
|
154
|
-
|
|
160
|
+
data = {k: v for k, v in data.items() if k in _COLUMNS}
|
|
155
161
|
|
|
156
162
|
output.append(data)
|
|
157
163
|
|
|
@@ -185,23 +191,32 @@ class SQLUploaderConfig(UploaderConfig):
|
|
|
185
191
|
class SQLUploader(Uploader):
|
|
186
192
|
connector_type: str = CONNECTOR_TYPE
|
|
187
193
|
upload_config: SQLUploaderConfig
|
|
188
|
-
connection_config:
|
|
194
|
+
connection_config: SQLConnectionConfig
|
|
195
|
+
|
|
196
|
+
def precheck(self) -> None:
|
|
197
|
+
try:
|
|
198
|
+
cursor = self.connection().cursor()
|
|
199
|
+
cursor.execute("SELECT 1;")
|
|
200
|
+
cursor.close()
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
203
|
+
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
189
204
|
|
|
190
205
|
@property
|
|
191
|
-
def connection(self):
|
|
206
|
+
def connection(self) -> Callable[[], Union["SqliteConnection", "PostgresConnection"]]:
|
|
192
207
|
if self.connection_config.db_type == DatabaseType.POSTGRESQL:
|
|
193
208
|
return self._make_psycopg_connection
|
|
194
209
|
elif self.connection_config.db_type == DatabaseType.SQLITE:
|
|
195
210
|
return self._make_sqlite_connection
|
|
196
211
|
raise ValueError(f"Unsupported database {self.connection_config.db_type} connection.")
|
|
197
212
|
|
|
198
|
-
def _make_sqlite_connection(self):
|
|
213
|
+
def _make_sqlite_connection(self) -> "SqliteConnection":
|
|
199
214
|
from sqlite3 import connect
|
|
200
215
|
|
|
201
216
|
return connect(database=self.connection_config.database)
|
|
202
217
|
|
|
203
218
|
@requires_dependencies(["psycopg2"], extras="postgres")
|
|
204
|
-
def _make_psycopg_connection(self):
|
|
219
|
+
def _make_psycopg_connection(self) -> "PostgresConnection":
|
|
205
220
|
from psycopg2 import connect
|
|
206
221
|
|
|
207
222
|
return connect(
|
|
@@ -261,7 +276,7 @@ class SQLUploader(Uploader):
|
|
|
261
276
|
|
|
262
277
|
|
|
263
278
|
sql_destination_entry = DestinationRegistryEntry(
|
|
264
|
-
connection_config=
|
|
279
|
+
connection_config=SQLConnectionConfig,
|
|
265
280
|
uploader=SQLUploader,
|
|
266
281
|
uploader_config=SQLUploaderConfig,
|
|
267
282
|
upload_stager=SQLUploadStager,
|
|
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING, Any, Optional
|
|
|
7
7
|
from dateutil import parser
|
|
8
8
|
|
|
9
9
|
from unstructured_ingest.enhanced_dataclass import enhanced_field
|
|
10
|
+
from unstructured_ingest.error import DestinationConnectionError
|
|
10
11
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
12
|
from unstructured_ingest.v2.interfaces import (
|
|
12
13
|
AccessConfig,
|
|
@@ -156,15 +157,21 @@ class WeaviateUploaderConfig(UploaderConfig):
|
|
|
156
157
|
class WeaviateUploader(Uploader):
|
|
157
158
|
upload_config: WeaviateUploaderConfig
|
|
158
159
|
connection_config: WeaviateConnectionConfig
|
|
159
|
-
client: Optional["Client"] = field(init=False)
|
|
160
160
|
connector_type: str = CONNECTOR_TYPE
|
|
161
161
|
|
|
162
162
|
@requires_dependencies(["weaviate"], extras="weaviate")
|
|
163
|
-
def
|
|
163
|
+
def get_client(self) -> "Client":
|
|
164
164
|
from weaviate import Client
|
|
165
165
|
|
|
166
166
|
auth = self._resolve_auth_method()
|
|
167
|
-
|
|
167
|
+
return Client(url=self.connection_config.host_url, auth_client_secret=auth)
|
|
168
|
+
|
|
169
|
+
def precheck(self) -> None:
|
|
170
|
+
try:
|
|
171
|
+
self.get_client()
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.error(f"Failed to validate connection {e}", exc_info=True)
|
|
174
|
+
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
168
175
|
|
|
169
176
|
@requires_dependencies(["weaviate"], extras="weaviate")
|
|
170
177
|
def _resolve_auth_method(self):
|
|
@@ -215,8 +222,9 @@ class WeaviateUploader(Uploader):
|
|
|
215
222
|
f"at {self.connection_config.host_url}",
|
|
216
223
|
)
|
|
217
224
|
|
|
218
|
-
|
|
219
|
-
|
|
225
|
+
client = self.get_client()
|
|
226
|
+
client.batch.configure(batch_size=self.upload_config.batch_size)
|
|
227
|
+
with client.batch as b:
|
|
220
228
|
for e in elements_dict:
|
|
221
229
|
vector = e.pop("embeddings", None)
|
|
222
230
|
b.add_data_object(
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import fnmatch
|
|
2
|
+
from abc import ABC
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any, Callable, Optional
|
|
5
|
+
|
|
6
|
+
from unstructured_ingest.enhanced_dataclass import EnhancedDataClassJsonMixin
|
|
7
|
+
from unstructured_ingest.v2.interfaces import FileData
|
|
8
|
+
from unstructured_ingest.v2.interfaces.process import BaseProcess
|
|
9
|
+
from unstructured_ingest.v2.logger import logger
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class FiltererConfig(EnhancedDataClassJsonMixin):
|
|
14
|
+
file_glob: Optional[list[str]] = None
|
|
15
|
+
max_file_size: Optional[int] = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class Filterer(BaseProcess, ABC):
|
|
20
|
+
config: FiltererConfig = field(default_factory=lambda: FiltererConfig())
|
|
21
|
+
filters: list[Callable[[FileData], bool]] = field(init=False, default_factory=list)
|
|
22
|
+
|
|
23
|
+
def __post_init__(self):
|
|
24
|
+
# Populate the filters based on values in config
|
|
25
|
+
if self.config.file_glob is not None:
|
|
26
|
+
self.filters.append(self.glob_filter)
|
|
27
|
+
if self.config.max_file_size:
|
|
28
|
+
self.filters.append(self.file_size_filter)
|
|
29
|
+
|
|
30
|
+
def is_async(self) -> bool:
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
def file_size_filter(self, file_data: FileData) -> bool:
|
|
34
|
+
if filesize_bytes := file_data.metadata.filesize_bytes:
|
|
35
|
+
return filesize_bytes <= self.config.max_file_size
|
|
36
|
+
return True
|
|
37
|
+
|
|
38
|
+
def glob_filter(self, file_data: FileData) -> bool:
|
|
39
|
+
patterns = self.config.file_glob
|
|
40
|
+
path = file_data.source_identifiers.fullpath
|
|
41
|
+
for pattern in patterns:
|
|
42
|
+
if fnmatch.filter([path], pattern):
|
|
43
|
+
return True
|
|
44
|
+
logger.debug(f"The file {path!r} is discarded as it does not match any given glob.")
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
def run(self, file_data: FileData, **kwargs: Any) -> Optional[FileData]:
|
|
48
|
+
for filter in self.filters:
|
|
49
|
+
if not filter(file_data):
|
|
50
|
+
logger.debug(
|
|
51
|
+
f"filtered out file data due to {filter.__name__}: {file_data.identifier}"
|
|
52
|
+
)
|
|
53
|
+
return None
|
|
54
|
+
return file_data
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.2
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -21,16 +21,16 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
21
21
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
22
22
|
Requires-Python: >=3.9.0,<3.13
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
|
-
Requires-Dist: pandas
|
|
25
24
|
Requires-Dist: unstructured
|
|
26
25
|
Requires-Dist: python-dateutil
|
|
26
|
+
Requires-Dist: pandas
|
|
27
27
|
Provides-Extra: airtable
|
|
28
28
|
Requires-Dist: pyairtable ; extra == 'airtable'
|
|
29
29
|
Provides-Extra: astra
|
|
30
30
|
Requires-Dist: astrapy ; extra == 'astra'
|
|
31
31
|
Provides-Extra: azure
|
|
32
|
-
Requires-Dist: adlfs ; extra == 'azure'
|
|
33
32
|
Requires-Dist: fsspec ; extra == 'azure'
|
|
33
|
+
Requires-Dist: adlfs ; extra == 'azure'
|
|
34
34
|
Provides-Extra: azure-cognitive-search
|
|
35
35
|
Requires-Dist: azure-search-documents ; extra == 'azure-cognitive-search'
|
|
36
36
|
Provides-Extra: bedrock
|
|
@@ -39,12 +39,12 @@ Requires-Dist: boto3 ; extra == 'bedrock'
|
|
|
39
39
|
Provides-Extra: biomed
|
|
40
40
|
Requires-Dist: bs4 ; extra == 'biomed'
|
|
41
41
|
Provides-Extra: box
|
|
42
|
-
Requires-Dist: fsspec ; extra == 'box'
|
|
43
42
|
Requires-Dist: boxfs ; extra == 'box'
|
|
43
|
+
Requires-Dist: fsspec ; extra == 'box'
|
|
44
44
|
Provides-Extra: chroma
|
|
45
|
+
Requires-Dist: importlib-metadata >=7.1.0 ; extra == 'chroma'
|
|
45
46
|
Requires-Dist: typer <=0.9.0 ; extra == 'chroma'
|
|
46
47
|
Requires-Dist: chromadb ; extra == 'chroma'
|
|
47
|
-
Requires-Dist: importlib-metadata >=7.1.0 ; extra == 'chroma'
|
|
48
48
|
Provides-Extra: clarifai
|
|
49
49
|
Requires-Dist: clarifai ; extra == 'clarifai'
|
|
50
50
|
Provides-Extra: confluence
|
|
@@ -54,8 +54,8 @@ Requires-Dist: unstructured[tsv] ; extra == 'csv'
|
|
|
54
54
|
Provides-Extra: databricks-volumes
|
|
55
55
|
Requires-Dist: databricks-sdk ; extra == 'databricks-volumes'
|
|
56
56
|
Provides-Extra: delta-table
|
|
57
|
-
Requires-Dist: fsspec ; extra == 'delta-table'
|
|
58
57
|
Requires-Dist: deltalake ; extra == 'delta-table'
|
|
58
|
+
Requires-Dist: fsspec ; extra == 'delta-table'
|
|
59
59
|
Provides-Extra: discord
|
|
60
60
|
Requires-Dist: discord-py ; extra == 'discord'
|
|
61
61
|
Provides-Extra: doc
|
|
@@ -69,8 +69,8 @@ Provides-Extra: elasticsearch
|
|
|
69
69
|
Requires-Dist: elasticsearch[async] ; extra == 'elasticsearch'
|
|
70
70
|
Provides-Extra: embed-huggingface
|
|
71
71
|
Requires-Dist: huggingface ; extra == 'embed-huggingface'
|
|
72
|
-
Requires-Dist: langchain-community ; extra == 'embed-huggingface'
|
|
73
72
|
Requires-Dist: sentence-transformers ; extra == 'embed-huggingface'
|
|
73
|
+
Requires-Dist: langchain-community ; extra == 'embed-huggingface'
|
|
74
74
|
Provides-Extra: embed-octoai
|
|
75
75
|
Requires-Dist: tiktoken ; extra == 'embed-octoai'
|
|
76
76
|
Requires-Dist: openai ; extra == 'embed-octoai'
|
|
@@ -79,8 +79,8 @@ Requires-Dist: langchain-community ; extra == 'embed-vertexai'
|
|
|
79
79
|
Requires-Dist: langchain ; extra == 'embed-vertexai'
|
|
80
80
|
Requires-Dist: langchain-google-vertexai ; extra == 'embed-vertexai'
|
|
81
81
|
Provides-Extra: embed-voyageai
|
|
82
|
-
Requires-Dist: langchain-voyageai ; extra == 'embed-voyageai'
|
|
83
82
|
Requires-Dist: langchain ; extra == 'embed-voyageai'
|
|
83
|
+
Requires-Dist: langchain-voyageai ; extra == 'embed-voyageai'
|
|
84
84
|
Provides-Extra: epub
|
|
85
85
|
Requires-Dist: unstructured[epub] ; extra == 'epub'
|
|
86
86
|
Provides-Extra: gcs
|
|
@@ -114,20 +114,20 @@ Requires-Dist: notion-client ; extra == 'notion'
|
|
|
114
114
|
Provides-Extra: odt
|
|
115
115
|
Requires-Dist: unstructured[odt] ; extra == 'odt'
|
|
116
116
|
Provides-Extra: onedrive
|
|
117
|
+
Requires-Dist: msal ; extra == 'onedrive'
|
|
117
118
|
Requires-Dist: bs4 ; extra == 'onedrive'
|
|
118
119
|
Requires-Dist: Office365-REST-Python-Client ; extra == 'onedrive'
|
|
119
|
-
Requires-Dist: msal ; extra == 'onedrive'
|
|
120
120
|
Provides-Extra: openai
|
|
121
121
|
Requires-Dist: tiktoken ; extra == 'openai'
|
|
122
|
-
Requires-Dist: langchain-community ; extra == 'openai'
|
|
123
122
|
Requires-Dist: openai ; extra == 'openai'
|
|
123
|
+
Requires-Dist: langchain-community ; extra == 'openai'
|
|
124
124
|
Provides-Extra: opensearch
|
|
125
125
|
Requires-Dist: opensearch-py ; extra == 'opensearch'
|
|
126
126
|
Provides-Extra: org
|
|
127
127
|
Requires-Dist: unstructured[org] ; extra == 'org'
|
|
128
128
|
Provides-Extra: outlook
|
|
129
|
-
Requires-Dist: Office365-REST-Python-Client ; extra == 'outlook'
|
|
130
129
|
Requires-Dist: msal ; extra == 'outlook'
|
|
130
|
+
Requires-Dist: Office365-REST-Python-Client ; extra == 'outlook'
|
|
131
131
|
Provides-Extra: pdf
|
|
132
132
|
Requires-Dist: unstructured[pdf] ; extra == 'pdf'
|
|
133
133
|
Provides-Extra: pinecone
|
|
@@ -147,16 +147,16 @@ Requires-Dist: unstructured[rst] ; extra == 'rst'
|
|
|
147
147
|
Provides-Extra: rtf
|
|
148
148
|
Requires-Dist: unstructured[rtf] ; extra == 'rtf'
|
|
149
149
|
Provides-Extra: s3
|
|
150
|
-
Requires-Dist: fsspec ; extra == 's3'
|
|
151
150
|
Requires-Dist: s3fs ; extra == 's3'
|
|
151
|
+
Requires-Dist: fsspec ; extra == 's3'
|
|
152
152
|
Provides-Extra: salesforce
|
|
153
153
|
Requires-Dist: simple-salesforce ; extra == 'salesforce'
|
|
154
154
|
Provides-Extra: sftp
|
|
155
155
|
Requires-Dist: paramiko ; extra == 'sftp'
|
|
156
156
|
Requires-Dist: fsspec ; extra == 'sftp'
|
|
157
157
|
Provides-Extra: sharepoint
|
|
158
|
-
Requires-Dist: Office365-REST-Python-Client ; extra == 'sharepoint'
|
|
159
158
|
Requires-Dist: msal ; extra == 'sharepoint'
|
|
159
|
+
Requires-Dist: Office365-REST-Python-Client ; extra == 'sharepoint'
|
|
160
160
|
Provides-Extra: singlestore
|
|
161
161
|
Requires-Dist: singlestoredb ; extra == 'singlestore'
|
|
162
162
|
Provides-Extra: slack
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=t0CFzEk7qlIWbgyEWA53ytTKmHbZ9ow2lAyjeP1bFqw,42
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
4
|
unstructured_ingest/evaluate.py,sha256=R-mKLFXbVX1xQ1tjGsLHjdP-TbSSV-925IHzggW_bIg,9793
|
|
5
5
|
unstructured_ingest/interfaces.py,sha256=uS8L5mS0mXD8I4XTfVlKZxAwqnpJ4yrRqn4vxWVRhQI,31107
|
|
@@ -260,10 +260,10 @@ unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfi
|
|
|
260
260
|
unstructured_ingest/v2/cli/interfaces.py,sha256=4Bbow6QHks2a1H56tmVQ4vG3sZy-577ZbwrPmDfizmE,829
|
|
261
261
|
unstructured_ingest/v2/cli/utils.py,sha256=QK-ee6FzxPf-IbaNXXWlH-GhvqeNnjK2m8ljBD1SusU,9075
|
|
262
262
|
unstructured_ingest/v2/cli/base/__init__.py,sha256=zXCa7F4FMqItmzxfUIVmyI-CeGh8X85yF8lRxwX_OYQ,83
|
|
263
|
-
unstructured_ingest/v2/cli/base/cmd.py,sha256=
|
|
263
|
+
unstructured_ingest/v2/cli/base/cmd.py,sha256=qi9N5rcyyE2nmswFaoKWbs1PonhHsMC5llqND9-rQso,9790
|
|
264
264
|
unstructured_ingest/v2/cli/base/dest.py,sha256=YMbVIHmYDqvOtxZeEY93stmF2p2ImjuJts7-u-NznYw,2887
|
|
265
265
|
unstructured_ingest/v2/cli/base/importer.py,sha256=nRt0QQ3qpi264-n_mR0l55C2ddM8nowTNzT1jsWaam8,1128
|
|
266
|
-
unstructured_ingest/v2/cli/base/src.py,sha256=
|
|
266
|
+
unstructured_ingest/v2/cli/base/src.py,sha256=oUPO9GPEbkYm1udfD4YQBYTfaefbhpoIN1HPnD672SQ,2460
|
|
267
267
|
unstructured_ingest/v2/cli/cmds/__init__.py,sha256=DWPMD6Wqus22sSoIEyTSiOJAm97aNjvdpdrXgsL4uQ0,2647
|
|
268
268
|
unstructured_ingest/v2/cli/cmds/astra.py,sha256=L-GR2KSP_cFQkQm0aVcdiXmgYMJZCVKIAH794y8qT1M,2590
|
|
269
269
|
unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py,sha256=VTCSUYeIYKnP60lC7DeBYqoqAJnWuBZrwevCXbeIEzw,2248
|
|
@@ -271,7 +271,7 @@ unstructured_ingest/v2/cli/cmds/chroma.py,sha256=RinNOPripk2zRYx1Rt-u-jywXbwh7Js
|
|
|
271
271
|
unstructured_ingest/v2/cli/cmds/databricks_volumes.py,sha256=53d9A7UunJLYZFwwwHEraVshFc3gSzUbmKjMOiv7hn4,5920
|
|
272
272
|
unstructured_ingest/v2/cli/cmds/elasticsearch.py,sha256=joUfnV992fAwEDCtFVJaABwgpyQiWeDl1ZCBEudRtnk,5258
|
|
273
273
|
unstructured_ingest/v2/cli/cmds/google_drive.py,sha256=mXozabpi8kjRFb0S7kw-xMGtEuFoVUxnvefwL5ZIPHc,2334
|
|
274
|
-
unstructured_ingest/v2/cli/cmds/local.py,sha256=
|
|
274
|
+
unstructured_ingest/v2/cli/cmds/local.py,sha256=UOTYjSdNqCFxhZfN6bdxm8jRp6Ijun2K-WpQq1X83OQ,1544
|
|
275
275
|
unstructured_ingest/v2/cli/cmds/milvus.py,sha256=PB1ib1rFGGH_-KDi1bSIO3BIiVcqSJEHCBFFrzQrnmI,1998
|
|
276
276
|
unstructured_ingest/v2/cli/cmds/mongodb.py,sha256=oyV6tacuuxm3dN-AXQgbxvYJiDYo2OOWQKRSBCUGj0E,1823
|
|
277
277
|
unstructured_ingest/v2/cli/cmds/onedrive.py,sha256=DKqhQyyF-swZxs3C9G5W8ECleq8sWpDbpTuiAHXukXQ,2781
|
|
@@ -286,32 +286,34 @@ unstructured_ingest/v2/cli/cmds/fsspec/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
|
|
|
286
286
|
unstructured_ingest/v2/cli/cmds/fsspec/azure.py,sha256=ZHfchzSpGkZ99Fq1050JvHP0-aG1pZsBZxxozcFfxwI,2784
|
|
287
287
|
unstructured_ingest/v2/cli/cmds/fsspec/box.py,sha256=kslkI-0-GyGSJOU7bKgrZeQRXh8HFexDq87ew8kT8kE,1338
|
|
288
288
|
unstructured_ingest/v2/cli/cmds/fsspec/dropbox.py,sha256=LtcR3rCQPgzJNbV3S90HlL0LPPbW9lYEfE8BG4F-dSI,1349
|
|
289
|
-
unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py,sha256=
|
|
289
|
+
unstructured_ingest/v2/cli/cmds/fsspec/fsspec.py,sha256=BlJLEu6YJUejLLFzlSCVSoZDp2pdjoTsdoFFHVwwkVY,2031
|
|
290
290
|
unstructured_ingest/v2/cli/cmds/fsspec/gcs.py,sha256=3-0LYnDs0fgNrDqnHpNZKj_6rwNj9wQVaV0lGOhVFPE,2737
|
|
291
291
|
unstructured_ingest/v2/cli/cmds/fsspec/s3.py,sha256=EXQzYkDtkFli2sfcj4cRDRPFac7b7z1DfQqYlGQcE6o,2279
|
|
292
292
|
unstructured_ingest/v2/cli/cmds/fsspec/sftp.py,sha256=YY2xKguawMyLdcG0qDYKUgk7DT0KgyZJlV17MfwIhpo,2036
|
|
293
|
-
unstructured_ingest/v2/cli/configs/__init__.py,sha256=
|
|
293
|
+
unstructured_ingest/v2/cli/configs/__init__.py,sha256=nAJ1gT3yCAzoZbEbYswE2SMfSk7TEPxa_1v3qEUsgIQ,336
|
|
294
294
|
unstructured_ingest/v2/cli/configs/chunk.py,sha256=KvIhmIRIZxazCumMztAKdWs-4MK7qzOb5h6Ned_2bdU,3547
|
|
295
295
|
unstructured_ingest/v2/cli/configs/embed.py,sha256=q_TwnkxKTKOsMgVYfW6xxbD8FWjU_Uh_X2BQ5-_VLGM,2725
|
|
296
|
+
unstructured_ingest/v2/cli/configs/filter.py,sha256=KYe65_au6m7H4VrjgugC2ain6vsUSWswNSEgcG66VPU,841
|
|
296
297
|
unstructured_ingest/v2/cli/configs/partition.py,sha256=7wdI18V6c4kaXuf50Lh66n9LbtrYHYd8ffEgDQLqvSk,3931
|
|
297
298
|
unstructured_ingest/v2/cli/configs/processor.py,sha256=ZHu2DBIuE8VgL3mEt73yYimw2k_PaOEtdxxFqzHfk84,3350
|
|
298
|
-
unstructured_ingest/v2/interfaces/__init__.py,sha256
|
|
299
|
+
unstructured_ingest/v2/interfaces/__init__.py,sha256=Rfa8crx6De7WNOK-EjsWWwFVpsUfCc6gY8B8tQ3ae9I,899
|
|
299
300
|
unstructured_ingest/v2/interfaces/connector.py,sha256=u4hE1DpTPDC04-n_IzYyn9w1gNCiPT81anrUoEh30Z8,855
|
|
300
|
-
unstructured_ingest/v2/interfaces/downloader.py,sha256=
|
|
301
|
-
unstructured_ingest/v2/interfaces/file_data.py,sha256=
|
|
301
|
+
unstructured_ingest/v2/interfaces/downloader.py,sha256=zs7cxhzbWVc5L0bV4gdCTexWGMVeXTQ9jJF6PCYSAss,2790
|
|
302
|
+
unstructured_ingest/v2/interfaces/file_data.py,sha256=PZrPJBkNC63lNO_1nwvnAeKRxjM3CsjIY6jSO8T9bVM,1665
|
|
302
303
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=pMw0abNHk_tEuA4BkXX1BdAfIwHdytxj7s6tGxMvYRE,821
|
|
303
|
-
unstructured_ingest/v2/interfaces/process.py,sha256=
|
|
304
|
+
unstructured_ingest/v2/interfaces/process.py,sha256=_l4dyaM0u0XxTqQw1Ghr8k2QMpQJMFapLOLhWqSdTdo,512
|
|
304
305
|
unstructured_ingest/v2/interfaces/processor.py,sha256=uHVHeKo5Gt_zFkaEXw7xgaCBDTEl2-Amh-ByA07258o,1620
|
|
305
306
|
unstructured_ingest/v2/interfaces/upload_stager.py,sha256=SylhDl9pK6qa7hvfrhpabCkjwE03yIlI6oM-mQnqtho,1220
|
|
306
307
|
unstructured_ingest/v2/interfaces/uploader.py,sha256=bzfx3Ei4poXKu-hsgjAB4sj4jKij9CoaRSadUM5LtGk,1083
|
|
307
308
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
308
|
-
unstructured_ingest/v2/pipeline/interfaces.py,sha256=
|
|
309
|
-
unstructured_ingest/v2/pipeline/pipeline.py,sha256=
|
|
309
|
+
unstructured_ingest/v2/pipeline/interfaces.py,sha256=Z50-6XFZNajfmJbLKunLxw3RuYMzCYiUp6F0jhQwERE,6441
|
|
310
|
+
unstructured_ingest/v2/pipeline/pipeline.py,sha256=dqn4_O4il6gZ33mE0DVC1wQKRcXMrD_jll999NoyQgw,14283
|
|
310
311
|
unstructured_ingest/v2/pipeline/utils.py,sha256=oPAitfdnITqh2O8Z0uf6VOHg9BTJhitRzNmKXqTwPxg,422
|
|
311
312
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
312
313
|
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=lfCsBo6A9u1cT57YaEjvNI79gc29nW8c-2_WZNjiO5Y,3275
|
|
313
|
-
unstructured_ingest/v2/pipeline/steps/download.py,sha256=
|
|
314
|
+
unstructured_ingest/v2/pipeline/steps/download.py,sha256=qYeuRU5jeICyuTN7E4YUdnbi6X1X2qKoooJMm4Orbdw,7499
|
|
314
315
|
unstructured_ingest/v2/pipeline/steps/embed.py,sha256=VCdDBUXK6Yx8RTvRBpEFdFE7n0izvkP73w6s8Tv2sgg,3253
|
|
316
|
+
unstructured_ingest/v2/pipeline/steps/filter.py,sha256=mYVccl_zp0CGYFWBrSrPelvSElrXhZahebuymGuirV8,1341
|
|
315
317
|
unstructured_ingest/v2/pipeline/steps/index.py,sha256=i4RcJ1oRqNp-rFdc6rvKVGcSzNhdB7woW7_W364uThQ,2269
|
|
316
318
|
unstructured_ingest/v2/pipeline/steps/partition.py,sha256=q7-rpCj5Vy4BXtd7T72gxGb3xg6lmVyNmTwUfHil7Rg,3199
|
|
317
319
|
unstructured_ingest/v2/pipeline/steps/stage.py,sha256=A8i6VAFY4_xFJR0uBEyBNJlQXmTMGaflXsa6Wa6U1wQ,2274
|
|
@@ -321,38 +323,39 @@ unstructured_ingest/v2/processes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
|
321
323
|
unstructured_ingest/v2/processes/chunker.py,sha256=U6zQhaUG_dii66zqL9iEEGodHENNxnpn6V3pC-e7MMI,4233
|
|
322
324
|
unstructured_ingest/v2/processes/connector_registry.py,sha256=KOrvJNNRdpBPyqFwmTm42kD1xXuo7fNS_5yXjtqAz-c,2100
|
|
323
325
|
unstructured_ingest/v2/processes/embedder.py,sha256=QjAsiXAjWtZzh6lJ4D5LsTMBD81zuMBkegXNWq-FZt0,3308
|
|
326
|
+
unstructured_ingest/v2/processes/filter.py,sha256=CfQihLV_r4yTJgAc66mmbP4_xo3wcDlro5oR_KR--bM,1986
|
|
324
327
|
unstructured_ingest/v2/processes/partitioner.py,sha256=f6UQoQHVKjl8rmM5J9EcuP30RTFLSLrArGdC6qh-ffE,7645
|
|
325
328
|
unstructured_ingest/v2/processes/uncompress.py,sha256=x-JZYNs1zJOtRS7xNgiMyrYoAbzKM0p18O8NAl7avCA,1631
|
|
326
329
|
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=7QMKd8gtEJTIuK352Ho6XyoFvLLhrWIzgdu0dXwXWOE,3960
|
|
327
|
-
unstructured_ingest/v2/processes/connectors/astra.py,sha256=
|
|
328
|
-
unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=
|
|
329
|
-
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=
|
|
330
|
-
unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=
|
|
331
|
-
unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=
|
|
332
|
-
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256
|
|
333
|
-
unstructured_ingest/v2/processes/connectors/local.py,sha256=
|
|
330
|
+
unstructured_ingest/v2/processes/connectors/astra.py,sha256=m6A34wYjnctRfIF-14bnbGIFBwht5Y8UWZ4g8R9x6a8,5241
|
|
331
|
+
unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=N_--5t_hxFzFEK4vERzm46gfg-Ghozb71_NmUlEYIMA,8277
|
|
332
|
+
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=W995GLn7D85GoUhSqHQXP5QQ8OglgykA5rcNmg9Ruf4,7158
|
|
333
|
+
unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=8bVht450bxp0K4ub1XdCDmMKEooXVV4DNY5b5GWF0Ig,3636
|
|
334
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=myY2FRXtlBYhH-kbTSsn7j9UDzh36NYHqFRP-ys8am4,15358
|
|
335
|
+
unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=-iYpwt4xxaICRlHD5Bpap7Ck5HRJcapa6uHl60E1uZ4,12702
|
|
336
|
+
unstructured_ingest/v2/processes/connectors/local.py,sha256=IJ5DjASp-5lPmb6J7Y8NROYjIS3sfdRhlcDAZEEGVAw,6573
|
|
334
337
|
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=FWH4FH-zns7gh8sITg9pLYE9uKm_3GeOXJ4wjY6PMno,6776
|
|
335
|
-
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=
|
|
336
|
-
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=
|
|
338
|
+
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=XZCgkF28HCR4DtMmr8jlxb59txXgEvfCabovROUrv6Y,4602
|
|
339
|
+
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=_TFO-vlyCxIxMk6hv20CEsicrlh87wCrbi4I1chsMUw,8822
|
|
337
340
|
unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=HNRZVQsWnjLLm0yAGiIyHRbhAsBnGSXBO_VkUfIdwdE,5463
|
|
338
|
-
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=
|
|
339
|
-
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=
|
|
340
|
-
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=
|
|
341
|
+
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=0rpOh_pi4GormyIQsnEJbKVb7FeizAbLcbljpnjtpeY,5908
|
|
342
|
+
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=S0dEjT1UxReCC6qE9DlSQBgcSzQbOaIq7SMJqXUpNWQ,10858
|
|
343
|
+
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=NRn0lbOuXqIYqZT15IVFeFQCxpCKzZC_M8pVYZeeNfo,17933
|
|
341
344
|
unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=upF2O4hJ2uiBhDRrpQ8CSJUvzmqu2j5H1b_QbReHJpw,5168
|
|
342
|
-
unstructured_ingest/v2/processes/connectors/sql.py,sha256=
|
|
345
|
+
unstructured_ingest/v2/processes/connectors/sql.py,sha256=mbhBI2tcX4q1YJwR3Nr7HGbr-rb8ppUYq2JcN88We3U,9076
|
|
343
346
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=nmpZZCeX0O7rGrwHSWM_heBgpZK9tKT6EV1Moer-z40,576
|
|
344
|
-
unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=
|
|
347
|
+
unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=HtJuOUhBs_HA7uOXlEIuYtx0elb0ecsCvP8N822tOMQ,8564
|
|
345
348
|
unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
|
|
346
349
|
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=RN7zoifocIWVgoP9aMDMz4TP-Z9KhE-HbCCBq33fY90,4674
|
|
347
350
|
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=UnD-F9g7yOOBStrAqeKq6GuQjEyHdwOA3jYLj8YZIRM,4088
|
|
348
351
|
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=I6mPG9EIso9TcIczCw5Y14Yqd-EhTQ2CLw1MJx1V3dY,4420
|
|
349
|
-
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=
|
|
352
|
+
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=zKrwKTVGnhnitD8h_Url5HRFsJZjM66o3jWrzAm-_UA,12153
|
|
350
353
|
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=RYZq_8hKF7bRxuB5Gozv5AzB3_nTuuooE4UfRjXwEFU,4443
|
|
351
|
-
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=
|
|
354
|
+
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=PXK9a5O3woDuBWSf4R5XLQI5mzHtap8wAKpHI8Rh5gQ,5462
|
|
352
355
|
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=J7Ej-j7dtXAluHunwynUfHlNsYwymb-LsrGUFcljcsA,5700
|
|
353
356
|
unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
354
|
-
unstructured_ingest-0.0.
|
|
355
|
-
unstructured_ingest-0.0.
|
|
356
|
-
unstructured_ingest-0.0.
|
|
357
|
-
unstructured_ingest-0.0.
|
|
358
|
-
unstructured_ingest-0.0.
|
|
357
|
+
unstructured_ingest-0.0.2.dist-info/METADATA,sha256=a68Sz8-m1-ZRFz0p4yic64BhgwTuMdIMmCuPECdhWwA,21568
|
|
358
|
+
unstructured_ingest-0.0.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
359
|
+
unstructured_ingest-0.0.2.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
360
|
+
unstructured_ingest-0.0.2.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
|
|
361
|
+
unstructured_ingest-0.0.2.dist-info/RECORD,,
|
|
File without changes
|
{unstructured_ingest-0.0.1.dist-info → unstructured_ingest-0.0.2.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|