unstructured-ingest 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/sql/test_databricks_delta_tables.py +29 -1
- test/integration/connectors/test_onedrive.py +6 -8
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/embed/azure_openai.py +21 -2
- unstructured_ingest/v2/pipeline/pipeline.py +3 -3
- unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json +23 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +29 -26
- unstructured_ingest/v2/processes/connectors/sharepoint.py +67 -389
- unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py +8 -3
- {unstructured_ingest-0.4.5.dist-info → unstructured_ingest-0.4.7.dist-info}/METADATA +23 -23
- {unstructured_ingest-0.4.5.dist-info → unstructured_ingest-0.4.7.dist-info}/RECORD +15 -14
- {unstructured_ingest-0.4.5.dist-info → unstructured_ingest-0.4.7.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.4.5.dist-info → unstructured_ingest-0.4.7.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.4.5.dist-info → unstructured_ingest-0.4.7.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.4.5.dist-info → unstructured_ingest-0.4.7.dist-info}/top_level.txt +0 -0
|
@@ -9,7 +9,8 @@ import pytest
|
|
|
9
9
|
from databricks.sql import connect
|
|
10
10
|
from databricks.sql.client import Connection as DeltaTableConnection
|
|
11
11
|
from databricks.sql.client import Cursor as DeltaTableCursor
|
|
12
|
-
from pydantic import BaseModel, SecretStr
|
|
12
|
+
from pydantic import BaseModel, Secret, SecretStr
|
|
13
|
+
from pytest_mock import MockerFixture
|
|
13
14
|
|
|
14
15
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SQL_TAG, env_setup_path
|
|
15
16
|
from test.integration.utils import requires_env
|
|
@@ -140,3 +141,30 @@ async def test_databricks_delta_tables_destination(
|
|
|
140
141
|
uploader.precheck()
|
|
141
142
|
uploader.run(path=staged_path, file_data=mock_file_data)
|
|
142
143
|
validate_destination(expected_num_elements=expected_num_elements, table_name=destination_table)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def test_get_credentials_provider_with_client_id_and_secret(mocker: MockerFixture):
|
|
147
|
+
access_config = DatabricksDeltaTablesAccessConfig(
|
|
148
|
+
client_id="test_client_id", client_secret="test_client_secret"
|
|
149
|
+
)
|
|
150
|
+
connection_config = DatabricksDeltaTablesConnectionConfig(
|
|
151
|
+
access_config=Secret(access_config),
|
|
152
|
+
server_hostname="test_server_hostname",
|
|
153
|
+
http_path="test_http_path",
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
credentials_provider = connection_config.get_credentials_provider()
|
|
157
|
+
assert credentials_provider is not False
|
|
158
|
+
assert type(credentials_provider).__name__ == "function"
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def test_get_credentials_provider_with_token(mocker: MockerFixture):
|
|
162
|
+
access_config = DatabricksDeltaTablesAccessConfig(token="test_token")
|
|
163
|
+
connection_config = DatabricksDeltaTablesConnectionConfig(
|
|
164
|
+
access_config=Secret(access_config),
|
|
165
|
+
server_hostname="test_server_hostname",
|
|
166
|
+
http_path="test_http_path",
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
credentials_provider = connection_config.get_credentials_provider()
|
|
170
|
+
assert credentials_provider is False
|
|
@@ -18,9 +18,6 @@ from unstructured_ingest.v2.processes.connectors.onedrive import (
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
@pytest.fixture
|
|
21
|
-
@pytest.mark.xfail(
|
|
22
|
-
reason="Issues with test setup on the provider side."
|
|
23
|
-
) # TODO: remove line when issues are addressed
|
|
24
21
|
def onedrive_test_folder() -> str:
|
|
25
22
|
"""
|
|
26
23
|
Pytest fixture that creates a test folder in OneDrive and deletes it after test run.
|
|
@@ -67,9 +64,6 @@ def get_connection_config():
|
|
|
67
64
|
|
|
68
65
|
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, BLOB_STORAGE_TAG)
|
|
69
66
|
@requires_env("MS_CLIENT_CRED", "MS_CLIENT_ID", "MS_TENANT_ID", "MS_USER_PNAME")
|
|
70
|
-
@pytest.mark.xfail(
|
|
71
|
-
reason="Issues with test setup on the provider side."
|
|
72
|
-
) # TODO: remove line when issues are addressed
|
|
73
67
|
def test_onedrive_destination(upload_file: Path, onedrive_test_folder: str):
|
|
74
68
|
"""
|
|
75
69
|
Integration test for the OneDrive destination connector.
|
|
@@ -107,10 +101,14 @@ def test_onedrive_destination(upload_file: Path, onedrive_test_folder: str):
|
|
|
107
101
|
client = connection_config.get_client()
|
|
108
102
|
drive = client.users[user_pname].drive
|
|
109
103
|
|
|
104
|
+
# Workaround: File should not have .json in the metadata.filename it comes from embedder
|
|
110
105
|
uploaded_file = (
|
|
111
|
-
drive.root.get_by_path(destination_fullpath
|
|
106
|
+
drive.root.get_by_path(f"{destination_fullpath}.json")
|
|
107
|
+
.select(["id", "name"])
|
|
108
|
+
.get()
|
|
109
|
+
.execute_query()
|
|
112
110
|
)
|
|
113
111
|
|
|
114
112
|
# Check if the file exists
|
|
115
113
|
assert uploaded_file is not None
|
|
116
|
-
assert uploaded_file.name == upload_file.name
|
|
114
|
+
assert uploaded_file.name == f"{upload_file.name}.json"
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.4.
|
|
1
|
+
__version__ = "0.4.7" # pragma: no cover
|
|
@@ -3,11 +3,15 @@ from typing import TYPE_CHECKING
|
|
|
3
3
|
|
|
4
4
|
from pydantic import Field
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.embed.openai import
|
|
6
|
+
from unstructured_ingest.embed.openai import (
|
|
7
|
+
AsyncOpenAIEmbeddingEncoder,
|
|
8
|
+
OpenAIEmbeddingConfig,
|
|
9
|
+
OpenAIEmbeddingEncoder,
|
|
10
|
+
)
|
|
7
11
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
8
12
|
|
|
9
13
|
if TYPE_CHECKING:
|
|
10
|
-
from openai import AzureOpenAI
|
|
14
|
+
from openai import AsyncAzureOpenAI, AzureOpenAI
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
|
|
@@ -25,7 +29,22 @@ class AzureOpenAIEmbeddingConfig(OpenAIEmbeddingConfig):
|
|
|
25
29
|
azure_endpoint=self.azure_endpoint,
|
|
26
30
|
)
|
|
27
31
|
|
|
32
|
+
@requires_dependencies(["openai"], extras="openai")
|
|
33
|
+
def get_async_client(self) -> "AsyncAzureOpenAI":
|
|
34
|
+
from openai import AsyncAzureOpenAI
|
|
35
|
+
|
|
36
|
+
return AsyncAzureOpenAI(
|
|
37
|
+
api_key=self.api_key.get_secret_value(),
|
|
38
|
+
api_version=self.api_version,
|
|
39
|
+
azure_endpoint=self.azure_endpoint,
|
|
40
|
+
)
|
|
41
|
+
|
|
28
42
|
|
|
29
43
|
@dataclass
|
|
30
44
|
class AzureOpenAIEmbeddingEncoder(OpenAIEmbeddingEncoder):
|
|
31
45
|
config: AzureOpenAIEmbeddingConfig
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class AsyncAzureOpenAIEmbeddingEncoder(AsyncOpenAIEmbeddingEncoder):
|
|
50
|
+
config: AzureOpenAIEmbeddingConfig
|
|
@@ -329,9 +329,9 @@ class Pipeline:
|
|
|
329
329
|
source_entry = {
|
|
330
330
|
k: v
|
|
331
331
|
for k, v in source_registry.items()
|
|
332
|
-
if
|
|
333
|
-
and
|
|
334
|
-
and
|
|
332
|
+
if type(indexer_config) is v.indexer_config
|
|
333
|
+
and type(downloader_config) is v.downloader_config
|
|
334
|
+
and type(source_connection_config) is v.connection_config
|
|
335
335
|
}
|
|
336
336
|
if len(source_entry) > 1:
|
|
337
337
|
raise ValueError(
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"properties": [
|
|
3
|
+
{
|
|
4
|
+
"dataType": [
|
|
5
|
+
"text"
|
|
6
|
+
],
|
|
7
|
+
"indexFilterable": true,
|
|
8
|
+
"indexSearchable": true,
|
|
9
|
+
"name": "record_id",
|
|
10
|
+
"tokenization": "word"
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"dataType": [
|
|
14
|
+
"text"
|
|
15
|
+
],
|
|
16
|
+
"indexFilterable": true,
|
|
17
|
+
"indexSearchable": true,
|
|
18
|
+
"name": "text",
|
|
19
|
+
"tokenization": "word"
|
|
20
|
+
}
|
|
21
|
+
],
|
|
22
|
+
"vectorizer": "none"
|
|
23
|
+
}
|
|
@@ -42,7 +42,7 @@ if TYPE_CHECKING:
|
|
|
42
42
|
from office365.onedrive.drives.drive import Drive
|
|
43
43
|
|
|
44
44
|
CONNECTOR_TYPE = "onedrive"
|
|
45
|
-
|
|
45
|
+
MAX_BYTES_SIZE = 512_000_000
|
|
46
46
|
|
|
47
47
|
|
|
48
48
|
class OnedriveAccessConfig(AccessConfig):
|
|
@@ -251,7 +251,7 @@ class OnedriveDownloader(Downloader):
|
|
|
251
251
|
download_path = self.get_download_path(file_data=file_data)
|
|
252
252
|
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
253
253
|
logger.info(f"downloading {file_data.source_identifiers.fullpath} to {download_path}")
|
|
254
|
-
if fsize >
|
|
254
|
+
if fsize > MAX_BYTES_SIZE:
|
|
255
255
|
logger.info(f"downloading file with size: {fsize} bytes in chunks")
|
|
256
256
|
with download_path.open(mode="wb") as f:
|
|
257
257
|
file.download_session(f, chunk_size=1024 * 1024 * 100).execute_query()
|
|
@@ -313,7 +313,7 @@ class OnedriveUploader(Uploader):
|
|
|
313
313
|
try:
|
|
314
314
|
folder.get().execute_query()
|
|
315
315
|
except ClientRequestException as e:
|
|
316
|
-
if e.
|
|
316
|
+
if not e.response.status_code == 404:
|
|
317
317
|
raise e
|
|
318
318
|
folder = root.create_folder(root_folder).execute_query()
|
|
319
319
|
logger.info(f"successfully created folder: {folder.name}")
|
|
@@ -321,7 +321,11 @@ class OnedriveUploader(Uploader):
|
|
|
321
321
|
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
322
322
|
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
323
323
|
|
|
324
|
+
@requires_dependencies(["office365"], extras="onedrive")
|
|
324
325
|
def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
|
|
326
|
+
from office365.onedrive.driveitems.conflict_behavior import ConflictBehavior
|
|
327
|
+
from office365.runtime.client_request_exception import ClientRequestException
|
|
328
|
+
|
|
325
329
|
drive = self.connection_config.get_drive()
|
|
326
330
|
|
|
327
331
|
# Use the remote_url from upload_config as the base destination folder
|
|
@@ -331,11 +335,11 @@ class OnedriveUploader(Uploader):
|
|
|
331
335
|
if file_data.source_identifiers and file_data.source_identifiers.rel_path:
|
|
332
336
|
# Combine the base destination folder with the file's relative path
|
|
333
337
|
destination_path = Path(base_destination_folder) / Path(
|
|
334
|
-
file_data.source_identifiers.rel_path
|
|
338
|
+
f"{file_data.source_identifiers.rel_path}.json"
|
|
335
339
|
)
|
|
336
340
|
else:
|
|
337
341
|
# If no relative path is provided, upload directly to the base destination folder
|
|
338
|
-
destination_path = Path(base_destination_folder) / path.name
|
|
342
|
+
destination_path = Path(base_destination_folder) / f"{path.name}.json"
|
|
339
343
|
|
|
340
344
|
destination_folder = destination_path.parent
|
|
341
345
|
file_name = destination_path.name
|
|
@@ -348,27 +352,19 @@ class OnedriveUploader(Uploader):
|
|
|
348
352
|
# Attempt to get the folder
|
|
349
353
|
folder = drive.root.get_by_path(destination_folder_str)
|
|
350
354
|
folder.get().execute_query()
|
|
351
|
-
except
|
|
355
|
+
except ClientRequestException as e:
|
|
352
356
|
# Folder doesn't exist, create it recursively
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
.execute_query()
|
|
360
|
-
)
|
|
361
|
-
if folders:
|
|
362
|
-
current_folder = folders[0]
|
|
363
|
-
else:
|
|
364
|
-
# Folder doesn't exist, create it
|
|
365
|
-
current_folder = current_folder.create_folder(part).execute_query()
|
|
366
|
-
folder = current_folder
|
|
357
|
+
root = drive.root
|
|
358
|
+
root_folder = self.upload_config.root_folder
|
|
359
|
+
if not e.response.status_code == 404:
|
|
360
|
+
raise e
|
|
361
|
+
folder = root.create_folder(root_folder).execute_query()
|
|
362
|
+
logger.info(f"successfully created folder: {folder.name}")
|
|
367
363
|
|
|
368
364
|
# Check the size of the file
|
|
369
365
|
file_size = path.stat().st_size
|
|
370
366
|
|
|
371
|
-
if file_size <
|
|
367
|
+
if file_size < MAX_BYTES_SIZE:
|
|
372
368
|
# Use simple upload for small files
|
|
373
369
|
with path.open("rb") as local_file:
|
|
374
370
|
content = local_file.read()
|
|
@@ -388,19 +384,26 @@ class OnedriveUploader(Uploader):
|
|
|
388
384
|
) from e
|
|
389
385
|
else:
|
|
390
386
|
# Use resumable upload for large files
|
|
391
|
-
|
|
392
|
-
|
|
387
|
+
destination_drive_item = drive.root.get_by_path(destination_folder_str)
|
|
388
|
+
|
|
389
|
+
logger.info(
|
|
390
|
+
f"Uploading {path.parent / file_name} to {destination_folder_str} using resumable upload" # noqa: E501
|
|
391
|
+
)
|
|
393
392
|
|
|
394
|
-
logger.info(f"Uploading {path} to {destination_fullpath} using resumable upload")
|
|
395
393
|
try:
|
|
396
394
|
uploaded_file = destination_drive_item.resumable_upload(
|
|
397
395
|
source_path=str(path)
|
|
398
396
|
).execute_query()
|
|
397
|
+
# Rename the uploaded file to the original source name with a .json extension
|
|
398
|
+
# Overwrite the file if it already exists
|
|
399
|
+
renamed_file = uploaded_file.move(
|
|
400
|
+
name=file_name, conflict_behavior=ConflictBehavior.Replace
|
|
401
|
+
).execute_query()
|
|
399
402
|
# Validate the upload
|
|
400
|
-
if not
|
|
403
|
+
if not renamed_file or renamed_file.name != file_name:
|
|
401
404
|
raise DestinationConnectionError(f"Upload failed for file '{file_name}'")
|
|
402
405
|
# Log details about the uploaded file
|
|
403
|
-
logger.info(f"Uploaded file {
|
|
406
|
+
logger.info(f"Uploaded file {renamed_file.name} with ID {renamed_file.id}")
|
|
404
407
|
except Exception as e:
|
|
405
408
|
logger.error(f"Failed to upload file '{file_name}' using resumable upload: {e}")
|
|
406
409
|
raise DestinationConnectionError(
|
|
@@ -1,85 +1,43 @@
|
|
|
1
|
-
import
|
|
2
|
-
from dataclasses import dataclass, field
|
|
3
|
-
from enum import Enum
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from time import time
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
7
|
-
from urllib.parse import quote
|
|
1
|
+
from __future__ import annotations
|
|
8
2
|
|
|
9
|
-
|
|
3
|
+
import asyncio
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import TYPE_CHECKING, Any, AsyncIterator
|
|
10
6
|
|
|
11
|
-
from
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
|
|
9
|
+
from unstructured_ingest.error import (
|
|
10
|
+
SourceConnectionError,
|
|
11
|
+
SourceConnectionNetworkError,
|
|
12
|
+
)
|
|
12
13
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
14
|
from unstructured_ingest.v2.interfaces import (
|
|
14
|
-
AccessConfig,
|
|
15
|
-
ConnectionConfig,
|
|
16
|
-
Downloader,
|
|
17
|
-
DownloaderConfig,
|
|
18
|
-
DownloadResponse,
|
|
19
15
|
FileData,
|
|
20
|
-
FileDataSourceMetadata,
|
|
21
|
-
Indexer,
|
|
22
|
-
IndexerConfig,
|
|
23
|
-
SourceIdentifiers,
|
|
24
16
|
)
|
|
25
17
|
from unstructured_ingest.v2.logger import logger
|
|
26
18
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
27
19
|
SourceRegistryEntry,
|
|
28
20
|
)
|
|
29
|
-
|
|
30
|
-
|
|
21
|
+
from unstructured_ingest.v2.processes.connectors.onedrive import (
|
|
22
|
+
OnedriveAccessConfig,
|
|
23
|
+
OnedriveConnectionConfig,
|
|
24
|
+
OnedriveDownloader,
|
|
25
|
+
OnedriveDownloaderConfig,
|
|
26
|
+
OnedriveIndexer,
|
|
27
|
+
OnedriveIndexerConfig,
|
|
28
|
+
)
|
|
31
29
|
|
|
32
30
|
if TYPE_CHECKING:
|
|
33
|
-
from office365.graph_client import GraphClient
|
|
34
31
|
from office365.onedrive.driveitems.driveItem import DriveItem
|
|
35
|
-
from office365.onedrive.drives.drive import Drive
|
|
36
|
-
from office365.onedrive.permissions.permission import Permission
|
|
37
|
-
from office365.onedrive.sites.site import Site
|
|
38
|
-
from office365.sharepoint.client_context import ClientContext
|
|
39
|
-
from office365.sharepoint.files.file import File
|
|
40
|
-
from office365.sharepoint.folders.folder import Folder
|
|
41
|
-
from office365.sharepoint.publishing.pages.page import SitePage
|
|
42
32
|
|
|
43
33
|
CONNECTOR_TYPE = "sharepoint"
|
|
44
34
|
|
|
45
|
-
MAX_MB_SIZE = 512_000_000
|
|
46
|
-
|
|
47
|
-
# TODO handle other data types possible from Sharepoint
|
|
48
|
-
# exampled: https://github.com/vgrem/Office365-REST-Python-Client/tree/master/examples/sharepoint
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class SharepointContentType(Enum):
|
|
52
|
-
DOCUMENT = "document"
|
|
53
|
-
SITEPAGE = "site_page"
|
|
54
|
-
LIST = "list"
|
|
55
|
-
|
|
56
35
|
|
|
57
|
-
class SharepointAccessConfig(
|
|
58
|
-
client_cred: str = Field(description="
|
|
36
|
+
class SharepointAccessConfig(OnedriveAccessConfig):
|
|
37
|
+
client_cred: str = Field(description="Microsoft App client secret")
|
|
59
38
|
|
|
60
39
|
|
|
61
|
-
class
|
|
62
|
-
permissions_application_id: Optional[str] = Field(
|
|
63
|
-
default=None, description="Microsoft Graph API application id"
|
|
64
|
-
)
|
|
65
|
-
permissions_tenant: Optional[str] = Field(
|
|
66
|
-
default=None,
|
|
67
|
-
description="url to get permissions data within tenant.",
|
|
68
|
-
examples=["https://contoso.onmicrosoft.com"],
|
|
69
|
-
)
|
|
70
|
-
permissions_client_cred: Optional[SecretStr] = Field(
|
|
71
|
-
default=None, description="Microsoft Graph API application credentials"
|
|
72
|
-
)
|
|
73
|
-
authority_url: Optional[SecretStr] = Field(
|
|
74
|
-
repr=False,
|
|
75
|
-
default_factory=lambda: SecretStr(secret_value="https://login.microsoftonline.com"),
|
|
76
|
-
description="Permissions authority url",
|
|
77
|
-
examples=["https://login.microsoftonline.com"],
|
|
78
|
-
)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
class SharepointConnectionConfig(ConnectionConfig):
|
|
82
|
-
client_id: str = Field(description="Sharepoint app client ID")
|
|
40
|
+
class SharepointConnectionConfig(OnedriveConnectionConfig):
|
|
83
41
|
site: str = Field(
|
|
84
42
|
description="Sharepoint site url. Process either base url e.g \
|
|
85
43
|
https://[tenant].sharepoint.com or relative sites \
|
|
@@ -88,355 +46,75 @@ class SharepointConnectionConfig(ConnectionConfig):
|
|
|
88
46
|
https://[tenant]-admin.sharepoint.com.\
|
|
89
47
|
This requires the app to be registered at a tenant level"
|
|
90
48
|
)
|
|
91
|
-
access_config: Secret[SharepointAccessConfig]
|
|
92
|
-
permissions_config: Optional[SharepointPermissionsConfig] = None
|
|
93
49
|
|
|
94
|
-
@requires_dependencies(["office365"], extras="sharepoint")
|
|
95
|
-
def get_client(self) -> "ClientContext":
|
|
96
|
-
from office365.runtime.auth.client_credential import ClientCredential
|
|
97
|
-
from office365.sharepoint.client_context import ClientContext
|
|
98
50
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
self.client_id, self.access_config.get_secret_value().client_cred
|
|
102
|
-
)
|
|
103
|
-
site_client = ClientContext(self.site).with_credentials(credentials)
|
|
104
|
-
except Exception as e:
|
|
105
|
-
logger.error(f"Couldn't set Sharepoint client: {e}")
|
|
106
|
-
raise e
|
|
107
|
-
return site_client
|
|
108
|
-
|
|
109
|
-
@requires_dependencies(["msal"], extras="sharepoint")
|
|
110
|
-
def get_permissions_token(self):
|
|
111
|
-
from msal import ConfidentialClientApplication
|
|
112
|
-
|
|
113
|
-
try:
|
|
114
|
-
client_credential = self.permissions_config.permissions_client_cred.get_secret_value()
|
|
115
|
-
app = ConfidentialClientApplication(
|
|
116
|
-
authority=f"{self.permissions_config.authority_url.get_secret_value()}/"
|
|
117
|
-
f"{self.permissions_config.permissions_tenant}",
|
|
118
|
-
client_id=self.permissions_config.permissions_application_id,
|
|
119
|
-
client_credential=client_credential,
|
|
120
|
-
)
|
|
121
|
-
token = app.acquire_token_for_client(scopes=["https://graph.microsoft.com/.default"])
|
|
122
|
-
except ValueError as exc:
|
|
123
|
-
logger.error("Couldn't set up credentials for Sharepoint")
|
|
124
|
-
raise exc
|
|
125
|
-
if "error" in token:
|
|
126
|
-
raise SourceConnectionNetworkError(
|
|
127
|
-
"failed to fetch token, {}: {}".format(token["error"], token["error_description"])
|
|
128
|
-
)
|
|
129
|
-
return token
|
|
130
|
-
|
|
131
|
-
@requires_dependencies(["office365"], extras="sharepoint")
|
|
132
|
-
def get_permissions_client(self) -> Optional["GraphClient"]:
|
|
133
|
-
from office365.graph_client import GraphClient
|
|
134
|
-
|
|
135
|
-
if self.permissions_config is None:
|
|
136
|
-
return None
|
|
137
|
-
|
|
138
|
-
client = GraphClient(self.get_permissions_token)
|
|
139
|
-
return client
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
class SharepointIndexerConfig(IndexerConfig):
|
|
143
|
-
path: Optional[str] = Field(
|
|
144
|
-
default=None,
|
|
145
|
-
description="Path from which to start parsing files. If the connector is to \
|
|
146
|
-
process all sites within the tenant this filter will be applied to \
|
|
147
|
-
all sites document libraries.",
|
|
148
|
-
)
|
|
149
|
-
recursive: bool = Field(
|
|
150
|
-
default=False,
|
|
151
|
-
description="Recursively download files in their respective folders "
|
|
152
|
-
"otherwise stop at the files in provided folder level.",
|
|
153
|
-
)
|
|
154
|
-
omit_files: bool = Field(default=False, description="Don't process files.")
|
|
155
|
-
omit_pages: bool = Field(default=False, description="Don't process site pages.")
|
|
156
|
-
omit_lists: bool = Field(default=False, description="Don't process lists.")
|
|
51
|
+
class SharepointIndexerConfig(OnedriveIndexerConfig):
|
|
52
|
+
pass
|
|
157
53
|
|
|
158
54
|
|
|
159
55
|
@dataclass
|
|
160
|
-
class SharepointIndexer(
|
|
56
|
+
class SharepointIndexer(OnedriveIndexer):
|
|
161
57
|
connection_config: SharepointConnectionConfig
|
|
162
|
-
index_config: SharepointIndexerConfig
|
|
58
|
+
index_config: SharepointIndexerConfig
|
|
163
59
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
site_client.site_pages.pages.get().execute_query()
|
|
168
|
-
except Exception as e:
|
|
169
|
-
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
170
|
-
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
171
|
-
|
|
172
|
-
def list_files(self, folder: "Folder", recursive: bool = False) -> list["File"]:
|
|
173
|
-
if not recursive:
|
|
174
|
-
folder.expand(["Files"]).get().execute_query()
|
|
175
|
-
return folder.files
|
|
176
|
-
|
|
177
|
-
folder.expand(["Files", "Folders"]).get().execute_query()
|
|
178
|
-
files: list["File"] = list(folder.files)
|
|
179
|
-
folders: list["Folder"] = list(folder.folders)
|
|
180
|
-
for f in folders:
|
|
181
|
-
if "/Forms" in f.serverRelativeUrl:
|
|
182
|
-
continue
|
|
183
|
-
files.extend(self.list_files(f, recursive))
|
|
184
|
-
return files
|
|
185
|
-
|
|
186
|
-
def get_properties(self, raw_properties: dict) -> dict:
|
|
187
|
-
raw_properties = {k: v for k, v in raw_properties.items() if v}
|
|
188
|
-
filtered_properties = {}
|
|
189
|
-
for k, v in raw_properties.items():
|
|
190
|
-
try:
|
|
191
|
-
json.dumps(v)
|
|
192
|
-
filtered_properties[k] = v
|
|
193
|
-
except TypeError:
|
|
194
|
-
pass
|
|
195
|
-
return filtered_properties
|
|
196
|
-
|
|
197
|
-
def list_pages(self, client: "ClientContext") -> list["SitePage"]:
|
|
198
|
-
pages = client.site_pages.pages.get().execute_query()
|
|
199
|
-
return pages
|
|
200
|
-
|
|
201
|
-
def page_to_file_data(self, site_page: "SitePage") -> FileData:
|
|
202
|
-
site_page.expand(site_page.properties.keys()).get().execute_query()
|
|
203
|
-
version = site_page.properties.get("Version", None)
|
|
204
|
-
unique_id = site_page.properties.get("UniqueId", None)
|
|
205
|
-
modified_date = site_page.properties.get("Modified", None)
|
|
206
|
-
url = site_page.properties.get("AbsoluteUrl", None)
|
|
207
|
-
date_modified_dt = parse_datetime(modified_date) if modified_date else None
|
|
208
|
-
date_created_at = (
|
|
209
|
-
parse_datetime(site_page.first_published)
|
|
210
|
-
if (site_page.first_published and site_page.first_published != "0001-01-01T08:00:00Z")
|
|
211
|
-
else None
|
|
212
|
-
)
|
|
213
|
-
file_path = site_page.get_property("Url", "")
|
|
214
|
-
server_path = file_path if file_path[0] != "/" else file_path[1:]
|
|
215
|
-
additional_metadata = self.get_properties(raw_properties=site_page.properties)
|
|
216
|
-
additional_metadata["sharepoint_content_type"] = SharepointContentType.SITEPAGE.value
|
|
217
|
-
return FileData(
|
|
218
|
-
identifier=unique_id,
|
|
219
|
-
connector_type=CONNECTOR_TYPE,
|
|
220
|
-
source_identifiers=SourceIdentifiers(
|
|
221
|
-
filename=site_page.file_name,
|
|
222
|
-
fullpath=file_path,
|
|
223
|
-
rel_path=file_path.replace(self.index_config.path, ""),
|
|
224
|
-
),
|
|
225
|
-
metadata=FileDataSourceMetadata(
|
|
226
|
-
url=url,
|
|
227
|
-
version=version,
|
|
228
|
-
date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None,
|
|
229
|
-
date_created=str(date_created_at.timestamp()) if date_created_at else None,
|
|
230
|
-
date_processed=str(time()),
|
|
231
|
-
record_locator={
|
|
232
|
-
"server_path": server_path,
|
|
233
|
-
},
|
|
234
|
-
),
|
|
235
|
-
additional_metadata=additional_metadata,
|
|
236
|
-
)
|
|
237
|
-
|
|
238
|
-
def file_to_file_data(self, client: "ClientContext", file: "File") -> FileData:
|
|
239
|
-
file.expand(file.properties.keys()).get().execute_query()
|
|
240
|
-
absolute_url = f"{client.base_url}{quote(file.serverRelativeUrl)}"
|
|
241
|
-
date_modified_dt = (
|
|
242
|
-
parse_datetime(file.time_last_modified) if file.time_last_modified else None
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
date_created_at = parse_datetime(file.time_created) if file.time_created else None
|
|
246
|
-
additional_metadata = self.get_properties(raw_properties=file.properties)
|
|
247
|
-
additional_metadata["sharepoint_content_type"] = SharepointContentType.DOCUMENT.value
|
|
248
|
-
fullpath = str(file.serverRelativeUrl)
|
|
249
|
-
rel_path = fullpath.replace(self.index_config.path, "")
|
|
250
|
-
while rel_path[0] == "/":
|
|
251
|
-
rel_path = rel_path[1:]
|
|
252
|
-
return FileData(
|
|
253
|
-
identifier=file.unique_id,
|
|
254
|
-
connector_type=CONNECTOR_TYPE,
|
|
255
|
-
source_identifiers=SourceIdentifiers(
|
|
256
|
-
filename=file.name,
|
|
257
|
-
fullpath=fullpath,
|
|
258
|
-
rel_path=rel_path,
|
|
259
|
-
),
|
|
260
|
-
metadata=FileDataSourceMetadata(
|
|
261
|
-
url=absolute_url,
|
|
262
|
-
version=f"{file.major_version}.{file.minor_version}",
|
|
263
|
-
date_modified=str(date_modified_dt.timestamp()) if date_modified_dt else None,
|
|
264
|
-
date_created=str(date_created_at.timestamp()) if date_created_at else None,
|
|
265
|
-
date_processed=str(time()),
|
|
266
|
-
record_locator={"server_path": file.serverRelativeUrl, "site_url": client.base_url},
|
|
267
|
-
),
|
|
268
|
-
additional_metadata=additional_metadata,
|
|
269
|
-
)
|
|
270
|
-
|
|
271
|
-
def get_root(self, client: "ClientContext") -> "Folder":
|
|
272
|
-
if path := self.index_config.path:
|
|
273
|
-
return client.web.get_folder_by_server_relative_path(path)
|
|
274
|
-
default_document_library = client.web.default_document_library()
|
|
275
|
-
root_folder = default_document_library.root_folder
|
|
276
|
-
root_folder = root_folder.get().execute_query()
|
|
277
|
-
self.index_config.path = root_folder.name
|
|
278
|
-
return root_folder
|
|
279
|
-
|
|
280
|
-
def get_site_url(self, client: "ClientContext") -> str:
|
|
281
|
-
res = client.web.get().execute_query()
|
|
282
|
-
return res.url
|
|
283
|
-
|
|
284
|
-
def get_site(self, permissions_client: "GraphClient", site_url) -> "Site":
|
|
285
|
-
return permissions_client.sites.get_by_url(url=site_url).execute_query()
|
|
286
|
-
|
|
287
|
-
def get_permissions_items(self, site: "Site") -> list["DriveItem"]:
|
|
288
|
-
# TODO find a way to narrow this search down by name of drive
|
|
289
|
-
items: list["DriveItem"] = []
|
|
290
|
-
drives: list["Drive"] = site.drives.get_all().execute_query()
|
|
291
|
-
for drive in drives:
|
|
292
|
-
items.extend(drive.root.children.get_all().execute_query())
|
|
293
|
-
return items
|
|
60
|
+
@requires_dependencies(["office365"], extras="sharepoint")
|
|
61
|
+
async def run_async(self, **kwargs: Any) -> AsyncIterator[FileData]:
|
|
62
|
+
from office365.runtime.client_request_exception import ClientRequestException
|
|
294
63
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
"has_password": permission.has_password,
|
|
301
|
-
"link": permission.link.to_json(),
|
|
302
|
-
"granted_to_identities": permission.granted_to_identities.to_json(),
|
|
303
|
-
"granted_to": permission.granted_to.to_json(),
|
|
304
|
-
"granted_to_v2": permission.granted_to_v2.to_json(),
|
|
305
|
-
"granted_to_identities_v2": permission.granted_to_identities_v2.to_json(),
|
|
306
|
-
"invitation": permission.invitation.to_json(),
|
|
307
|
-
}
|
|
64
|
+
token_resp = await asyncio.to_thread(self.connection_config.get_token)
|
|
65
|
+
if "error" in token_resp:
|
|
66
|
+
raise SourceConnectionError(
|
|
67
|
+
f"[{CONNECTOR_TYPE}]: {token_resp['error']} ({token_resp.get('error_description')})"
|
|
68
|
+
)
|
|
308
69
|
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
existing_items = self.get_permissions_items(site=site)
|
|
316
|
-
for file_data in all_file_data:
|
|
317
|
-
etag = file_data.additional_metadata.get("ETag")
|
|
318
|
-
if not etag:
|
|
319
|
-
continue
|
|
320
|
-
matching_items = list(filter(lambda x: x.etag == etag, existing_items))
|
|
321
|
-
if not matching_items:
|
|
322
|
-
continue
|
|
323
|
-
if len(matching_items) > 1:
|
|
324
|
-
logger.warning(
|
|
325
|
-
"Found multiple drive items with etag matching {}, skipping: {}".format(
|
|
326
|
-
etag, ", ".join([i.name for i in matching_items])
|
|
327
|
-
)
|
|
328
|
-
)
|
|
329
|
-
continue
|
|
330
|
-
matching_item = matching_items[0]
|
|
331
|
-
permissions: list["Permission"] = matching_item.permissions.get_all().execute_query()
|
|
332
|
-
permissions_data = [
|
|
333
|
-
self.map_permission(permission=permission) for permission in permissions
|
|
334
|
-
]
|
|
335
|
-
file_data.metadata.permissions_data = permissions_data
|
|
70
|
+
client = await asyncio.to_thread(self.connection_config.get_client)
|
|
71
|
+
try:
|
|
72
|
+
site = client.sites.get_by_url(self.connection_config.site).get().execute_query()
|
|
73
|
+
site_drive_item = site.drive.get().execute_query().root
|
|
74
|
+
except ClientRequestException:
|
|
75
|
+
logger.info("Site not found")
|
|
336
76
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
return (
|
|
340
|
-
self.connection_config.permissions_config is not None
|
|
341
|
-
and self.connection_config.permissions_config.permissions_tenant
|
|
342
|
-
and self.connection_config.permissions_config.permissions_client_cred.get_secret_value()
|
|
343
|
-
and self.connection_config.permissions_config.permissions_application_id
|
|
77
|
+
drive_items = await self.list_objects(
|
|
78
|
+
folder=site_drive_item, recursive=self.index_config.recursive
|
|
344
79
|
)
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
root_folder = self.get_root(client=client)
|
|
349
|
-
logger.debug(f"processing content from path: {self.index_config.path}")
|
|
350
|
-
if not self.index_config.omit_files:
|
|
351
|
-
files = self.list_files(root_folder, recursive=self.index_config.recursive)
|
|
352
|
-
file_data = [self.file_to_file_data(file=file, client=client) for file in files]
|
|
353
|
-
if self.process_permissions:
|
|
354
|
-
self.enrich_permissions_on_files(
|
|
355
|
-
all_file_data=file_data, site_url=self.get_site_url(client=client)
|
|
356
|
-
)
|
|
357
|
-
for file in file_data:
|
|
358
|
-
yield file
|
|
359
|
-
if not self.index_config.omit_pages:
|
|
360
|
-
pages = self.list_pages(client=client)
|
|
361
|
-
for page in pages:
|
|
362
|
-
file_data = self.page_to_file_data(site_page=page)
|
|
363
|
-
file_data.metadata.record_locator["site_url"] = client.base_url
|
|
364
|
-
yield file_data
|
|
80
|
+
for drive_item in drive_items:
|
|
81
|
+
file_data = await self.drive_item_to_file_data(drive_item=drive_item)
|
|
82
|
+
yield file_data
|
|
365
83
|
|
|
366
84
|
|
|
367
|
-
class SharepointDownloaderConfig(
|
|
85
|
+
class SharepointDownloaderConfig(OnedriveDownloaderConfig):
|
|
368
86
|
pass
|
|
369
87
|
|
|
370
88
|
|
|
371
89
|
@dataclass
|
|
372
|
-
class SharepointDownloader(
|
|
90
|
+
class SharepointDownloader(OnedriveDownloader):
|
|
373
91
|
connection_config: SharepointConnectionConfig
|
|
374
92
|
download_config: SharepointDownloaderConfig
|
|
375
|
-
connector_type: str = CONNECTOR_TYPE
|
|
376
|
-
|
|
377
|
-
def get_download_path(self, file_data: FileData) -> Path:
|
|
378
|
-
download_path = super().get_download_path(file_data=file_data)
|
|
379
93
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
return download_path
|
|
385
|
-
|
|
386
|
-
def get_document(self, file_data: FileData) -> DownloadResponse:
|
|
387
|
-
client: "ClientContext" = self.connection_config.get_client()
|
|
388
|
-
file: "File" = client.web.get_file_by_id(unique_id=file_data.identifier)
|
|
389
|
-
download_path = self.get_download_path(file_data=file_data)
|
|
390
|
-
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
391
|
-
logger.debug(
|
|
392
|
-
f"writing document content {file_data.source_identifiers.fullpath} to {download_path}"
|
|
393
|
-
)
|
|
394
|
-
with download_path.open("wb") as f:
|
|
395
|
-
file.download(f).execute_query()
|
|
396
|
-
return self.generate_download_response(file_data=file_data, download_path=download_path)
|
|
94
|
+
@SourceConnectionNetworkError.wrap
|
|
95
|
+
@requires_dependencies(["office365"], extras="onedrive")
|
|
96
|
+
def _fetch_file(self, file_data: FileData) -> DriveItem:
|
|
97
|
+
from office365.runtime.client_request_exception import ClientRequestException
|
|
397
98
|
|
|
398
|
-
|
|
399
|
-
# TODO fetch comments for site page as well
|
|
400
|
-
from lxml import etree, html
|
|
401
|
-
|
|
402
|
-
canvas_content_raw = file_data.additional_metadata.get("CanvasContent1")
|
|
403
|
-
layout_web_parts_content_raw = file_data.additional_metadata.get("LayoutWebpartsContent")
|
|
404
|
-
html_content = []
|
|
405
|
-
if layout_web_parts_content_raw:
|
|
406
|
-
layout_web_parts_content = json.loads(layout_web_parts_content_raw)
|
|
407
|
-
for web_part in layout_web_parts_content:
|
|
408
|
-
properties = web_part.get("properties", {})
|
|
409
|
-
if title := properties.get("title"):
|
|
410
|
-
html_content.append(f"<title>{title}</title>")
|
|
411
|
-
if canvas_content_raw:
|
|
412
|
-
canvas_content = json.loads(canvas_content_raw)
|
|
413
|
-
for content in canvas_content:
|
|
414
|
-
if inner_html := content.get("innerHTML"):
|
|
415
|
-
html_content.append(inner_html)
|
|
416
|
-
htmls = "".join(html_content)
|
|
417
|
-
content = f"<div>{htmls}</div>"
|
|
418
|
-
document = html.fromstring(content)
|
|
419
|
-
download_path = self.get_download_path(file_data=file_data)
|
|
420
|
-
download_path.parent.mkdir(parents=True, exist_ok=True)
|
|
421
|
-
logger.debug(
|
|
422
|
-
f"writing site page content {file_data.source_identifiers.filename} to {download_path}"
|
|
423
|
-
)
|
|
424
|
-
with download_path.open("w") as f:
|
|
425
|
-
f.write(etree.tostring(document, encoding="unicode", pretty_print=True))
|
|
426
|
-
return self.generate_download_response(file_data=file_data, download_path=download_path)
|
|
427
|
-
|
|
428
|
-
def run(self, file_data: FileData, **kwargs: Any) -> DownloadResponse:
|
|
429
|
-
content_type = file_data.additional_metadata.get("sharepoint_content_type")
|
|
430
|
-
if not content_type:
|
|
99
|
+
if file_data.source_identifiers is None or not file_data.source_identifiers.fullpath:
|
|
431
100
|
raise ValueError(
|
|
432
|
-
f"
|
|
101
|
+
f"file data doesn't have enough information to get "
|
|
102
|
+
f"file content: {file_data.model_dump()}"
|
|
433
103
|
)
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
104
|
+
|
|
105
|
+
server_relative_path = file_data.source_identifiers.fullpath
|
|
106
|
+
client = self.connection_config.get_client()
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
site = client.sites.get_by_url(self.connection_config.site).get().execute_query()
|
|
110
|
+
site_drive_item = site.drive.get().execute_query().root
|
|
111
|
+
except ClientRequestException:
|
|
112
|
+
logger.info("Site not found")
|
|
113
|
+
file = site_drive_item.get_by_path(server_relative_path).get().execute_query()
|
|
114
|
+
|
|
115
|
+
if not file:
|
|
116
|
+
raise FileNotFoundError(f"file not found: {server_relative_path}")
|
|
117
|
+
return file
|
|
440
118
|
|
|
441
119
|
|
|
442
120
|
sharepoint_source_entry = SourceRegistryEntry(
|
|
@@ -51,9 +51,10 @@ class DatabricksDeltaTablesConnectionConfig(SQLConnectionConfig):
|
|
|
51
51
|
|
|
52
52
|
host = f"https://{self.server_hostname}"
|
|
53
53
|
access_configs = self.access_config.get_secret_value()
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
client_id = access_configs.client_id
|
|
55
|
+
client_secret = access_configs.client_secret
|
|
56
|
+
|
|
57
|
+
def _get_credentials_provider():
|
|
57
58
|
return oauth_service_principal(
|
|
58
59
|
Config(
|
|
59
60
|
host=host,
|
|
@@ -61,6 +62,10 @@ class DatabricksDeltaTablesConnectionConfig(SQLConnectionConfig):
|
|
|
61
62
|
client_secret=client_secret,
|
|
62
63
|
)
|
|
63
64
|
)
|
|
65
|
+
|
|
66
|
+
if client_id and client_secret:
|
|
67
|
+
return _get_credentials_provider
|
|
68
|
+
|
|
64
69
|
return False
|
|
65
70
|
|
|
66
71
|
def model_post_init(self, __context: Any) -> None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.7
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,28 +22,28 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist: pydantic>=2.7
|
|
26
25
|
Requires-Dist: pandas
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
Requires-Dist: python-dateutil
|
|
29
|
-
Requires-Dist: click
|
|
26
|
+
Requires-Dist: pydantic>=2.7
|
|
30
27
|
Requires-Dist: dataclasses-json
|
|
28
|
+
Requires-Dist: python-dateutil
|
|
31
29
|
Requires-Dist: opentelemetry-sdk
|
|
30
|
+
Requires-Dist: tqdm
|
|
31
|
+
Requires-Dist: click
|
|
32
32
|
Provides-Extra: airtable
|
|
33
33
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
34
34
|
Provides-Extra: astradb
|
|
35
35
|
Requires-Dist: astrapy; extra == "astradb"
|
|
36
36
|
Provides-Extra: azure
|
|
37
|
-
Requires-Dist: adlfs; extra == "azure"
|
|
38
37
|
Requires-Dist: fsspec; extra == "azure"
|
|
38
|
+
Requires-Dist: adlfs; extra == "azure"
|
|
39
39
|
Provides-Extra: azure-ai-search
|
|
40
40
|
Requires-Dist: azure-search-documents; extra == "azure-ai-search"
|
|
41
41
|
Provides-Extra: bedrock
|
|
42
|
-
Requires-Dist: aioboto3; extra == "bedrock"
|
|
43
42
|
Requires-Dist: boto3; extra == "bedrock"
|
|
43
|
+
Requires-Dist: aioboto3; extra == "bedrock"
|
|
44
44
|
Provides-Extra: biomed
|
|
45
|
-
Requires-Dist: bs4; extra == "biomed"
|
|
46
45
|
Requires-Dist: requests; extra == "biomed"
|
|
46
|
+
Requires-Dist: bs4; extra == "biomed"
|
|
47
47
|
Provides-Extra: box
|
|
48
48
|
Requires-Dist: fsspec; extra == "box"
|
|
49
49
|
Requires-Dist: boxfs; extra == "box"
|
|
@@ -52,8 +52,8 @@ Requires-Dist: chromadb; extra == "chroma"
|
|
|
52
52
|
Provides-Extra: clarifai
|
|
53
53
|
Requires-Dist: clarifai; extra == "clarifai"
|
|
54
54
|
Provides-Extra: confluence
|
|
55
|
-
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
56
55
|
Requires-Dist: requests; extra == "confluence"
|
|
56
|
+
Requires-Dist: atlassian-python-api; extra == "confluence"
|
|
57
57
|
Provides-Extra: couchbase
|
|
58
58
|
Requires-Dist: couchbase; extra == "couchbase"
|
|
59
59
|
Provides-Extra: csv
|
|
@@ -72,8 +72,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
|
|
|
72
72
|
Provides-Extra: docx
|
|
73
73
|
Requires-Dist: unstructured[docx]; extra == "docx"
|
|
74
74
|
Provides-Extra: dropbox
|
|
75
|
-
Requires-Dist: fsspec; extra == "dropbox"
|
|
76
75
|
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
76
|
+
Requires-Dist: fsspec; extra == "dropbox"
|
|
77
77
|
Provides-Extra: duckdb
|
|
78
78
|
Requires-Dist: duckdb; extra == "duckdb"
|
|
79
79
|
Provides-Extra: elasticsearch
|
|
@@ -92,19 +92,19 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
92
92
|
Provides-Extra: epub
|
|
93
93
|
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
94
94
|
Provides-Extra: gcs
|
|
95
|
-
Requires-Dist: bs4; extra == "gcs"
|
|
96
95
|
Requires-Dist: fsspec; extra == "gcs"
|
|
96
|
+
Requires-Dist: bs4; extra == "gcs"
|
|
97
97
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
98
98
|
Provides-Extra: github
|
|
99
|
-
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
100
99
|
Requires-Dist: requests; extra == "github"
|
|
100
|
+
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
101
101
|
Provides-Extra: gitlab
|
|
102
102
|
Requires-Dist: python-gitlab; extra == "gitlab"
|
|
103
103
|
Provides-Extra: google-drive
|
|
104
104
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
105
105
|
Provides-Extra: hubspot
|
|
106
|
-
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
107
106
|
Requires-Dist: urllib3; extra == "hubspot"
|
|
107
|
+
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
108
108
|
Provides-Extra: jira
|
|
109
109
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
110
110
|
Provides-Extra: kafka
|
|
@@ -122,20 +122,20 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
122
122
|
Provides-Extra: msg
|
|
123
123
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
124
124
|
Provides-Extra: neo4j
|
|
125
|
+
Requires-Dist: networkx; extra == "neo4j"
|
|
125
126
|
Requires-Dist: cymple; extra == "neo4j"
|
|
126
127
|
Requires-Dist: neo4j; extra == "neo4j"
|
|
127
|
-
Requires-Dist: networkx; extra == "neo4j"
|
|
128
128
|
Provides-Extra: notion
|
|
129
|
-
Requires-Dist: backoff; extra == "notion"
|
|
130
|
-
Requires-Dist: notion-client; extra == "notion"
|
|
131
129
|
Requires-Dist: httpx; extra == "notion"
|
|
132
130
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
131
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
132
|
+
Requires-Dist: backoff; extra == "notion"
|
|
133
133
|
Provides-Extra: odt
|
|
134
134
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
135
135
|
Provides-Extra: onedrive
|
|
136
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
137
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
138
136
|
Requires-Dist: msal; extra == "onedrive"
|
|
137
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
138
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
139
139
|
Provides-Extra: openai
|
|
140
140
|
Requires-Dist: tiktoken; extra == "openai"
|
|
141
141
|
Requires-Dist: openai; extra == "openai"
|
|
@@ -144,8 +144,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
|
|
|
144
144
|
Provides-Extra: org
|
|
145
145
|
Requires-Dist: unstructured[org]; extra == "org"
|
|
146
146
|
Provides-Extra: outlook
|
|
147
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
148
147
|
Requires-Dist: msal; extra == "outlook"
|
|
148
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
149
149
|
Provides-Extra: pdf
|
|
150
150
|
Requires-Dist: unstructured[pdf]; extra == "pdf"
|
|
151
151
|
Provides-Extra: pinecone
|
|
@@ -169,16 +169,16 @@ Requires-Dist: unstructured[rst]; extra == "rst"
|
|
|
169
169
|
Provides-Extra: rtf
|
|
170
170
|
Requires-Dist: unstructured[rtf]; extra == "rtf"
|
|
171
171
|
Provides-Extra: s3
|
|
172
|
-
Requires-Dist: s3fs; extra == "s3"
|
|
173
172
|
Requires-Dist: fsspec; extra == "s3"
|
|
173
|
+
Requires-Dist: s3fs; extra == "s3"
|
|
174
174
|
Provides-Extra: salesforce
|
|
175
175
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
176
176
|
Provides-Extra: sftp
|
|
177
|
-
Requires-Dist: fsspec; extra == "sftp"
|
|
178
177
|
Requires-Dist: paramiko; extra == "sftp"
|
|
178
|
+
Requires-Dist: fsspec; extra == "sftp"
|
|
179
179
|
Provides-Extra: sharepoint
|
|
180
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
181
180
|
Requires-Dist: msal; extra == "sharepoint"
|
|
181
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
182
182
|
Provides-Extra: singlestore
|
|
183
183
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
184
184
|
Provides-Extra: slack
|
|
@@ -191,9 +191,9 @@ Requires-Dist: together; extra == "togetherai"
|
|
|
191
191
|
Provides-Extra: tsv
|
|
192
192
|
Requires-Dist: unstructured[tsv]; extra == "tsv"
|
|
193
193
|
Provides-Extra: vastdb
|
|
194
|
-
Requires-Dist: ibis; extra == "vastdb"
|
|
195
194
|
Requires-Dist: vastdb; extra == "vastdb"
|
|
196
195
|
Requires-Dist: pyarrow; extra == "vastdb"
|
|
196
|
+
Requires-Dist: ibis; extra == "vastdb"
|
|
197
197
|
Provides-Extra: vectara
|
|
198
198
|
Requires-Dist: aiofiles; extra == "vectara"
|
|
199
199
|
Requires-Dist: httpx; extra == "vectara"
|
|
@@ -15,7 +15,7 @@ test/integration/connectors/test_milvus.py,sha256=7mI6zznN0PTxDL9DLogH1k3dxx6R8D
|
|
|
15
15
|
test/integration/connectors/test_mongodb.py,sha256=0A6DvF-iTCSZzOefisd_i20j9li8uNWTF2wyLGwlhco,12446
|
|
16
16
|
test/integration/connectors/test_neo4j.py,sha256=r4TRYtTXeeOdcRcfa_gvslhSKvoIWrwN1FRJ5XRoH4k,8456
|
|
17
17
|
test/integration/connectors/test_notion.py,sha256=ueXyVqYWzP4LuZYe6PauptkXNG6qkoV3srltFOSSKTA,5403
|
|
18
|
-
test/integration/connectors/test_onedrive.py,sha256=
|
|
18
|
+
test/integration/connectors/test_onedrive.py,sha256=rjgN2LhaW1htEMBJPxmlP_kcRB7p_oOeZcogFlHyJH4,3721
|
|
19
19
|
test/integration/connectors/test_pinecone.py,sha256=acKEu1vnAk0Ht3FhCnGtOEKaj_YlgCzZB7wRU17ehQ0,12407
|
|
20
20
|
test/integration/connectors/test_qdrant.py,sha256=Yme3ZZ5zIbaZ-yYLUqN2oy0hsrcAfvlleRLYWMSYeSE,8049
|
|
21
21
|
test/integration/connectors/test_redis.py,sha256=1aKwOb-K4zCxZwHmgW_WzGJwqLntbWTbpGQ-rtUwN9o,4360
|
|
@@ -34,7 +34,7 @@ test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2
|
|
|
34
34
|
test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=TsSEPsyaTUoEvFBadinrdM0b5C4FoUtEwCv24OUbpO8,12072
|
|
35
35
|
test/integration/connectors/elasticsearch/test_opensearch.py,sha256=7b7z0GqoBsBqA3IK35N6axmwEMjzJ1l3Fg2WT2c7uqs,11450
|
|
36
36
|
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
-
test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=
|
|
37
|
+
test/integration/connectors/sql/test_databricks_delta_tables.py,sha256=qHRHrvh5cCdtTFSLM1bDwRnNgtBQetbdnyRnU9qvK0Y,6144
|
|
38
38
|
test/integration/connectors/sql/test_postgres.py,sha256=bGDyzLRpgrXO7nl0U8nF2zSNr6ykUG-w8T4daIqUCG4,6970
|
|
39
39
|
test/integration/connectors/sql/test_singlestore.py,sha256=XeU2s4Kt_3tGyaDYYKTgYjdOyb8j2dnz4TgSMwFUjWs,6153
|
|
40
40
|
test/integration/connectors/sql/test_snowflake.py,sha256=LEwsRDoC6-rRiwYsqeo5B9Eo6RYygLLGAUsrtrgI9pM,7494
|
|
@@ -102,7 +102,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
102
102
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
103
103
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
104
104
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
105
|
-
unstructured_ingest/__version__.py,sha256=
|
|
105
|
+
unstructured_ingest/__version__.py,sha256=i2QrUEuUnVPQuTv5hg_JWbhbwm5k6KU4hPIFq0SIgdc,42
|
|
106
106
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
107
107
|
unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
|
|
108
108
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -270,7 +270,7 @@ unstructured_ingest/connector/notion/types/database_properties/unique_id.py,sha2
|
|
|
270
270
|
unstructured_ingest/connector/notion/types/database_properties/url.py,sha256=iXQ2tVUm9UlKVtDA0NQiFIRJ5PHYW9wOaWt2vFfSVCg,862
|
|
271
271
|
unstructured_ingest/connector/notion/types/database_properties/verification.py,sha256=J_DLjY-v2T6xDGMQ7FkI0YMKMA6SG6Y3yYW7qUD1hKA,2334
|
|
272
272
|
unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
273
|
-
unstructured_ingest/embed/azure_openai.py,sha256=
|
|
273
|
+
unstructured_ingest/embed/azure_openai.py,sha256=u9reyZzY6BtsT5U_TdIfS6vH_42lvohVBwKMPQAqvkI,1528
|
|
274
274
|
unstructured_ingest/embed/bedrock.py,sha256=50G8PBEdW3ILwyWXAWl4w-gUA9I0AR7LuFq6NLz-sWI,7284
|
|
275
275
|
unstructured_ingest/embed/huggingface.py,sha256=Avcc16st9Cp2xGScG6TeNEEd3T8YjjnESNN4OdIlnh0,2119
|
|
276
276
|
unstructured_ingest/embed/interfaces.py,sha256=7jsQ3rLOXy1hq__muf-EPcLnv17XzNQaD05AyGbZeNo,3739
|
|
@@ -399,7 +399,7 @@ unstructured_ingest/v2/interfaces/uploader.py,sha256=rrZLTjmTcrDL-amQIKzIP6j2fW-
|
|
|
399
399
|
unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
400
400
|
unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
|
|
401
401
|
unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
|
|
402
|
-
unstructured_ingest/v2/pipeline/pipeline.py,sha256
|
|
402
|
+
unstructured_ingest/v2/pipeline/pipeline.py,sha256=4IwCWMlBrMpZI6V82q5nzrbyQNDVM62AQsWt6MUBWa8,16508
|
|
403
403
|
unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
404
404
|
unstructured_ingest/v2/pipeline/steps/chunk.py,sha256=LK2ldM24TE4ukX_Z6Z81LpF53orMaRkddM3uhLtT5EQ,3221
|
|
405
405
|
unstructured_ingest/v2/pipeline/steps/download.py,sha256=nZ4B0d9p-6TgWqrBoKUQPlr8m6dz1RGNr_3OjUhRpWg,8259
|
|
@@ -433,16 +433,17 @@ unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWo
|
|
|
433
433
|
unstructured_ingest/v2/processes/connectors/milvus.py,sha256=wmcu9NVy3gYlQGT25inN5w_QrhFoL8-hRq0pJFSNw8g,8866
|
|
434
434
|
unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=cL0QUQZF_s2brh3nNNeAywXVpaIiND4b5JTAFlYjLjw,14273
|
|
435
435
|
unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=HU1IwchTM7Q1kkeIFVe-Lg6gInMItBpgkDkVwuTvkGY,14259
|
|
436
|
-
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=
|
|
436
|
+
unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=b616B_-9MfU6gxvpw7IBUa2szNFURA_VP8q5j2FXxnA,17632
|
|
437
437
|
unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
|
|
438
438
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=bQDCch7OGiQgpWO3n3ncLuQ4XCWqDc7ZWEB-Qrqkss8,10730
|
|
439
439
|
unstructured_ingest/v2/processes/connectors/redisdb.py,sha256=p0AY4ukBNpwAemV4bWzpScvVbLTVlI3DzsCNUKiBI5M,6757
|
|
440
440
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
441
|
-
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=
|
|
441
|
+
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=SdcbOEUzgi1sUZJA6doZDm-a8d4F3Qtud-OVbDKW7Ng,4456
|
|
442
442
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
|
|
443
443
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
|
|
444
444
|
unstructured_ingest/v2/processes/connectors/vectara.py,sha256=BlI_4nkpNR99aYxDd9eusm5LQsVB9EI0r-5Kc1D7pgQ,12255
|
|
445
445
|
unstructured_ingest/v2/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
446
|
+
unstructured_ingest/v2/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
|
|
446
447
|
unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=Oh8SwTWi66gO8BsNF6vRMoQVuegyBPPCpVozkOHEf3A,2136
|
|
447
448
|
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=Yj4fIxgGo9Qh1x_6-INdbrPGHCuZElu-QKNfSVtW7F4,7694
|
|
448
449
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=TA2e_1SIr4VaEI62873eyReCNfgmQ51_2Pko2I04pPM,2747
|
|
@@ -549,7 +550,7 @@ unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ
|
|
|
549
550
|
unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=BHI7HYSdbS05j2vrjyDvLzVG1WfsM8osKeq-lttlybQ,5437
|
|
550
551
|
unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
|
|
551
552
|
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=NSEZwJDHh_9kFc31LnG14iRtYF3meK2UfUlQfYnwYEQ,2059
|
|
552
|
-
unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=
|
|
553
|
+
unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py,sha256=xbZ90rmehiCnBoqFXMz-3ZMXeYb0PzWB6iobCNSHTmQ,8955
|
|
553
554
|
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=BATfX1PQGT2kl8jAbdNKXTojYKJxh3pJV9-h3OBnHGo,5124
|
|
554
555
|
unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=OPBDQ2c_5KjWHEFfqXxf3pQ2tWC-N4MtslMulMgP1Wc,5503
|
|
555
556
|
unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=QE-WBqrPVjCgcxR5EdVD9iTHBjgDSSSQgWYvq5N61qU,7746
|
|
@@ -561,9 +562,9 @@ unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-
|
|
|
561
562
|
unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
|
|
562
563
|
unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
|
|
563
564
|
unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=yJza_jBSEFnzZRq5L6vJ0Mm3uS1uxkOiKIimPpUyQds,12418
|
|
564
|
-
unstructured_ingest-0.4.
|
|
565
|
-
unstructured_ingest-0.4.
|
|
566
|
-
unstructured_ingest-0.4.
|
|
567
|
-
unstructured_ingest-0.4.
|
|
568
|
-
unstructured_ingest-0.4.
|
|
569
|
-
unstructured_ingest-0.4.
|
|
565
|
+
unstructured_ingest-0.4.7.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
566
|
+
unstructured_ingest-0.4.7.dist-info/METADATA,sha256=yGcahQ8fZmoU_c1h02b76tRn5w0uj_931AAQKlFrqxs,8051
|
|
567
|
+
unstructured_ingest-0.4.7.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
568
|
+
unstructured_ingest-0.4.7.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
569
|
+
unstructured_ingest-0.4.7.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
570
|
+
unstructured_ingest-0.4.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.4.5.dist-info → unstructured_ingest-0.4.7.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|