unstructured-ingest 0.5.21__py3-none-any.whl → 0.5.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/interfaces/__init__.py +8 -1
- unstructured_ingest/v2/interfaces/file_data.py +13 -116
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +7 -1
- unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py +7 -1
- unstructured_ingest/v2/types/__init__.py +0 -0
- unstructured_ingest/v2/types/file_data.py +116 -0
- {unstructured_ingest-0.5.21.dist-info → unstructured_ingest-0.5.23.dist-info}/METADATA +27 -27
- {unstructured_ingest-0.5.21.dist-info → unstructured_ingest-0.5.23.dist-info}/RECORD +13 -11
- {unstructured_ingest-0.5.21.dist-info → unstructured_ingest-0.5.23.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.21.dist-info → unstructured_ingest-0.5.23.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.21.dist-info → unstructured_ingest-0.5.23.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.21.dist-info → unstructured_ingest-0.5.23.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.23" # pragma: no cover
|
|
@@ -1,6 +1,13 @@
|
|
|
1
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
2
|
+
BatchFileData,
|
|
3
|
+
BatchItem,
|
|
4
|
+
FileData,
|
|
5
|
+
FileDataSourceMetadata,
|
|
6
|
+
SourceIdentifiers,
|
|
7
|
+
)
|
|
8
|
+
|
|
1
9
|
from .connector import AccessConfig, BaseConnector, ConnectionConfig
|
|
2
10
|
from .downloader import Downloader, DownloaderConfig, DownloadResponse, download_responses
|
|
3
|
-
from .file_data import BatchFileData, BatchItem, FileData, FileDataSourceMetadata, SourceIdentifiers
|
|
4
11
|
from .indexer import Indexer, IndexerConfig
|
|
5
12
|
from .process import BaseProcess
|
|
6
13
|
from .processor import ProcessorConfig
|
|
@@ -1,116 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
rel_path: Optional[str] = None
|
|
15
|
-
|
|
16
|
-
@property
|
|
17
|
-
def filename_stem(self) -> str:
|
|
18
|
-
return Path(self.filename).stem
|
|
19
|
-
|
|
20
|
-
@property
|
|
21
|
-
def relative_path(self) -> str:
|
|
22
|
-
return self.rel_path or self.fullpath
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class FileDataSourceMetadata(BaseModel):
|
|
26
|
-
url: Optional[str] = None
|
|
27
|
-
version: Optional[str] = None
|
|
28
|
-
record_locator: Optional[dict[str, Any]] = None
|
|
29
|
-
date_created: Optional[str] = None
|
|
30
|
-
date_modified: Optional[str] = None
|
|
31
|
-
date_processed: Optional[str] = None
|
|
32
|
-
permissions_data: Optional[list[dict[str, Any]]] = None
|
|
33
|
-
filesize_bytes: Optional[int] = None
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class FileData(BaseModel):
|
|
37
|
-
identifier: str
|
|
38
|
-
connector_type: str
|
|
39
|
-
source_identifiers: SourceIdentifiers
|
|
40
|
-
metadata: FileDataSourceMetadata = Field(default_factory=lambda: FileDataSourceMetadata())
|
|
41
|
-
additional_metadata: dict[str, Any] = Field(default_factory=dict)
|
|
42
|
-
reprocess: bool = False
|
|
43
|
-
local_download_path: Optional[str] = None
|
|
44
|
-
display_name: Optional[str] = None
|
|
45
|
-
|
|
46
|
-
@classmethod
|
|
47
|
-
def from_file(cls, path: str) -> "FileData":
|
|
48
|
-
path = Path(path).resolve()
|
|
49
|
-
if not path.exists() or not path.is_file():
|
|
50
|
-
raise ValueError(f"file path not valid: {path}")
|
|
51
|
-
with open(str(path.resolve()), "rb") as f:
|
|
52
|
-
file_data_dict = json.load(f)
|
|
53
|
-
file_data = cls.model_validate(file_data_dict)
|
|
54
|
-
return file_data
|
|
55
|
-
|
|
56
|
-
@classmethod
|
|
57
|
-
def cast(cls, file_data: "FileData", **kwargs) -> "FileData":
|
|
58
|
-
file_data_dict = file_data.model_dump()
|
|
59
|
-
return cls.model_validate(file_data_dict, **kwargs)
|
|
60
|
-
|
|
61
|
-
def to_file(self, path: str) -> None:
|
|
62
|
-
path = Path(path).resolve()
|
|
63
|
-
path.parent.mkdir(parents=True, exist_ok=True)
|
|
64
|
-
with open(str(path.resolve()), "w") as f:
|
|
65
|
-
json.dump(self.model_dump(), f, indent=2)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class BatchItem(BaseModel):
|
|
69
|
-
identifier: str
|
|
70
|
-
version: Optional[str] = None
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
class BatchFileData(FileData):
|
|
74
|
-
identifier: str = Field(init=False)
|
|
75
|
-
batch_items: list[BatchItem]
|
|
76
|
-
source_identifiers: Optional[SourceIdentifiers] = None
|
|
77
|
-
|
|
78
|
-
@field_validator("batch_items")
|
|
79
|
-
@classmethod
|
|
80
|
-
def check_batch_items(cls, v: list[BatchItem]) -> list[BatchItem]:
|
|
81
|
-
if not v:
|
|
82
|
-
raise ValueError("batch items cannot be empty")
|
|
83
|
-
all_identifiers = [item.identifier for item in v]
|
|
84
|
-
if len(all_identifiers) != len(set(all_identifiers)):
|
|
85
|
-
raise ValueError(f"duplicate identifiers: {all_identifiers}")
|
|
86
|
-
sorted_batch_items = sorted(v, key=lambda item: item.identifier)
|
|
87
|
-
return sorted_batch_items
|
|
88
|
-
|
|
89
|
-
@model_validator(mode="before")
|
|
90
|
-
@classmethod
|
|
91
|
-
def populate_identifier(cls, data: Any) -> Any:
|
|
92
|
-
if isinstance(data, dict) and "identifier" not in data:
|
|
93
|
-
batch_items = data["batch_items"]
|
|
94
|
-
identifier_data = json.dumps(
|
|
95
|
-
{item.identifier: item.version for item in batch_items}, sort_keys=True
|
|
96
|
-
)
|
|
97
|
-
data["identifier"] = str(uuid5(NAMESPACE_DNS, str(identifier_data)))
|
|
98
|
-
return data
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def file_data_from_file(path: str) -> FileData:
|
|
102
|
-
try:
|
|
103
|
-
return BatchFileData.from_file(path=path)
|
|
104
|
-
except ValidationError:
|
|
105
|
-
logger.debug(f"{path} not detected as batch file data")
|
|
106
|
-
|
|
107
|
-
return FileData.from_file(path=path)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def file_data_from_dict(data: dict) -> FileData:
|
|
111
|
-
try:
|
|
112
|
-
return BatchFileData.model_validate(data)
|
|
113
|
-
except ValidationError:
|
|
114
|
-
logger.debug(f"{data} not valid for batch file data")
|
|
115
|
-
|
|
116
|
-
return FileData.model_validate(data)
|
|
1
|
+
"""
|
|
2
|
+
COMPATABILITY NOTICE:
|
|
3
|
+
This file has moved to the v2/types/ module.
|
|
4
|
+
The following line exists for backward compatibility.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.v2.types.file_data import * # noqa - star imports are bad, but this is for maximal backward compatability
|
|
8
|
+
|
|
9
|
+
# Eventually this file should go away. Let's start warning users now:
|
|
10
|
+
logger.warning( # noqa - using logger from the star import
|
|
11
|
+
"Importing file_data.py through interfaces is deprecated. "
|
|
12
|
+
"Please use unstructured_ingest.v2.types.file_data instead!"
|
|
13
|
+
)
|
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from time import time
|
|
7
7
|
from typing import TYPE_CHECKING, Any, Generator, Optional, Union
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, Field, Secret, SecretStr
|
|
9
|
+
from pydantic import BaseModel, Field, Secret, SecretStr, field_validator
|
|
10
10
|
|
|
11
11
|
from unstructured_ingest.error import (
|
|
12
12
|
DestinationConnectionError,
|
|
@@ -98,6 +98,12 @@ class ElasticsearchConnectionConfig(ConnectionConfig):
|
|
|
98
98
|
ca_certs: Optional[Path] = None
|
|
99
99
|
access_config: Secret[ElasticsearchAccessConfig]
|
|
100
100
|
|
|
101
|
+
@field_validator("hosts", mode="before")
|
|
102
|
+
def to_list(cls, value):
|
|
103
|
+
if isinstance(value, str):
|
|
104
|
+
return [value]
|
|
105
|
+
return value
|
|
106
|
+
|
|
101
107
|
def get_client_kwargs(self) -> dict:
|
|
102
108
|
# Update auth related fields to conform to what the SDK expects based on the
|
|
103
109
|
# supported methods:
|
|
@@ -2,7 +2,7 @@ from dataclasses import dataclass, field
|
|
|
2
2
|
from pathlib import Path
|
|
3
3
|
from typing import TYPE_CHECKING, Optional
|
|
4
4
|
|
|
5
|
-
from pydantic import BaseModel, Field, Secret
|
|
5
|
+
from pydantic import BaseModel, Field, Secret, field_validator
|
|
6
6
|
|
|
7
7
|
from unstructured_ingest.error import (
|
|
8
8
|
DestinationConnectionError,
|
|
@@ -78,6 +78,12 @@ class OpenSearchConnectionConfig(ConnectionConfig):
|
|
|
78
78
|
|
|
79
79
|
access_config: Secret[OpenSearchAccessConfig]
|
|
80
80
|
|
|
81
|
+
@field_validator("hosts", mode="before")
|
|
82
|
+
def to_list(cls, value):
|
|
83
|
+
if isinstance(value, str):
|
|
84
|
+
return [value]
|
|
85
|
+
return value
|
|
86
|
+
|
|
81
87
|
def get_client_kwargs(self) -> dict:
|
|
82
88
|
# Update auth related fields to conform to what the SDK expects based on the
|
|
83
89
|
# supported methods:
|
|
File without changes
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
from uuid import NAMESPACE_DNS, uuid5
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.v2.logger import logger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SourceIdentifiers(BaseModel):
|
|
12
|
+
filename: str
|
|
13
|
+
fullpath: str
|
|
14
|
+
rel_path: Optional[str] = None
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def filename_stem(self) -> str:
|
|
18
|
+
return Path(self.filename).stem
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def relative_path(self) -> str:
|
|
22
|
+
return self.rel_path or self.fullpath
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FileDataSourceMetadata(BaseModel):
|
|
26
|
+
url: Optional[str] = None
|
|
27
|
+
version: Optional[str] = None
|
|
28
|
+
record_locator: Optional[dict[str, Any]] = None
|
|
29
|
+
date_created: Optional[str] = None
|
|
30
|
+
date_modified: Optional[str] = None
|
|
31
|
+
date_processed: Optional[str] = None
|
|
32
|
+
permissions_data: Optional[list[dict[str, Any]]] = None
|
|
33
|
+
filesize_bytes: Optional[int] = None
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class FileData(BaseModel):
|
|
37
|
+
identifier: str
|
|
38
|
+
connector_type: str
|
|
39
|
+
source_identifiers: SourceIdentifiers
|
|
40
|
+
metadata: FileDataSourceMetadata = Field(default_factory=lambda: FileDataSourceMetadata())
|
|
41
|
+
additional_metadata: dict[str, Any] = Field(default_factory=dict)
|
|
42
|
+
reprocess: bool = False
|
|
43
|
+
local_download_path: Optional[str] = None
|
|
44
|
+
display_name: Optional[str] = None
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def from_file(cls, path: str) -> "FileData":
|
|
48
|
+
path = Path(path).resolve()
|
|
49
|
+
if not path.exists() or not path.is_file():
|
|
50
|
+
raise ValueError(f"file path not valid: {path}")
|
|
51
|
+
with open(str(path.resolve()), "rb") as f:
|
|
52
|
+
file_data_dict = json.load(f)
|
|
53
|
+
file_data = cls.model_validate(file_data_dict)
|
|
54
|
+
return file_data
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def cast(cls, file_data: "FileData", **kwargs) -> "FileData":
|
|
58
|
+
file_data_dict = file_data.model_dump()
|
|
59
|
+
return cls.model_validate(file_data_dict, **kwargs)
|
|
60
|
+
|
|
61
|
+
def to_file(self, path: str) -> None:
|
|
62
|
+
path = Path(path).resolve()
|
|
63
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
with open(str(path.resolve()), "w") as f:
|
|
65
|
+
json.dump(self.model_dump(), f, indent=2)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class BatchItem(BaseModel):
|
|
69
|
+
identifier: str
|
|
70
|
+
version: Optional[str] = None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class BatchFileData(FileData):
|
|
74
|
+
identifier: str = Field(init=False)
|
|
75
|
+
batch_items: list[BatchItem]
|
|
76
|
+
source_identifiers: Optional[SourceIdentifiers] = None
|
|
77
|
+
|
|
78
|
+
@field_validator("batch_items")
|
|
79
|
+
@classmethod
|
|
80
|
+
def check_batch_items(cls, v: list[BatchItem]) -> list[BatchItem]:
|
|
81
|
+
if not v:
|
|
82
|
+
raise ValueError("batch items cannot be empty")
|
|
83
|
+
all_identifiers = [item.identifier for item in v]
|
|
84
|
+
if len(all_identifiers) != len(set(all_identifiers)):
|
|
85
|
+
raise ValueError(f"duplicate identifiers: {all_identifiers}")
|
|
86
|
+
sorted_batch_items = sorted(v, key=lambda item: item.identifier)
|
|
87
|
+
return sorted_batch_items
|
|
88
|
+
|
|
89
|
+
@model_validator(mode="before")
|
|
90
|
+
@classmethod
|
|
91
|
+
def populate_identifier(cls, data: Any) -> Any:
|
|
92
|
+
if isinstance(data, dict) and "identifier" not in data:
|
|
93
|
+
batch_items = data["batch_items"]
|
|
94
|
+
identifier_data = json.dumps(
|
|
95
|
+
{item.identifier: item.version for item in batch_items}, sort_keys=True
|
|
96
|
+
)
|
|
97
|
+
data["identifier"] = str(uuid5(NAMESPACE_DNS, str(identifier_data)))
|
|
98
|
+
return data
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def file_data_from_file(path: str) -> FileData:
|
|
102
|
+
try:
|
|
103
|
+
return BatchFileData.from_file(path=path)
|
|
104
|
+
except ValidationError:
|
|
105
|
+
logger.debug(f"{path} not detected as batch file data")
|
|
106
|
+
|
|
107
|
+
return FileData.from_file(path=path)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def file_data_from_dict(data: dict) -> FileData:
|
|
111
|
+
try:
|
|
112
|
+
return BatchFileData.model_validate(data)
|
|
113
|
+
except ValidationError:
|
|
114
|
+
logger.debug(f"{data} not valid for batch file data")
|
|
115
|
+
|
|
116
|
+
return FileData.model_validate(data)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.23
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,12 +22,12 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.14
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist: click
|
|
26
25
|
Requires-Dist: dataclasses_json
|
|
27
|
-
Requires-Dist: pydantic>=2.7
|
|
28
|
-
Requires-Dist: python-dateutil
|
|
29
26
|
Requires-Dist: opentelemetry-sdk
|
|
27
|
+
Requires-Dist: click
|
|
30
28
|
Requires-Dist: tqdm
|
|
29
|
+
Requires-Dist: python-dateutil
|
|
30
|
+
Requires-Dist: pydantic>=2.7
|
|
31
31
|
Requires-Dist: numpy
|
|
32
32
|
Requires-Dist: pandas
|
|
33
33
|
Provides-Extra: remote
|
|
@@ -103,8 +103,8 @@ Requires-Dist: astrapy; extra == "astradb"
|
|
|
103
103
|
Requires-Dist: numpy; extra == "astradb"
|
|
104
104
|
Requires-Dist: pandas; extra == "astradb"
|
|
105
105
|
Provides-Extra: azure
|
|
106
|
-
Requires-Dist: fsspec; extra == "azure"
|
|
107
106
|
Requires-Dist: adlfs; extra == "azure"
|
|
107
|
+
Requires-Dist: fsspec; extra == "azure"
|
|
108
108
|
Requires-Dist: numpy; extra == "azure"
|
|
109
109
|
Requires-Dist: pandas; extra == "azure"
|
|
110
110
|
Provides-Extra: azure-ai-search
|
|
@@ -117,8 +117,8 @@ Requires-Dist: requests; extra == "biomed"
|
|
|
117
117
|
Requires-Dist: numpy; extra == "biomed"
|
|
118
118
|
Requires-Dist: pandas; extra == "biomed"
|
|
119
119
|
Provides-Extra: box
|
|
120
|
-
Requires-Dist: fsspec; extra == "box"
|
|
121
120
|
Requires-Dist: boxfs; extra == "box"
|
|
121
|
+
Requires-Dist: fsspec; extra == "box"
|
|
122
122
|
Requires-Dist: numpy; extra == "box"
|
|
123
123
|
Requires-Dist: pandas; extra == "box"
|
|
124
124
|
Provides-Extra: chroma
|
|
@@ -148,8 +148,8 @@ Requires-Dist: discord.py; extra == "discord"
|
|
|
148
148
|
Requires-Dist: numpy; extra == "discord"
|
|
149
149
|
Requires-Dist: pandas; extra == "discord"
|
|
150
150
|
Provides-Extra: dropbox
|
|
151
|
-
Requires-Dist: fsspec; extra == "dropbox"
|
|
152
151
|
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
152
|
+
Requires-Dist: fsspec; extra == "dropbox"
|
|
153
153
|
Requires-Dist: numpy; extra == "dropbox"
|
|
154
154
|
Requires-Dist: pandas; extra == "dropbox"
|
|
155
155
|
Provides-Extra: duckdb
|
|
@@ -161,9 +161,9 @@ Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
|
161
161
|
Requires-Dist: numpy; extra == "elasticsearch"
|
|
162
162
|
Requires-Dist: pandas; extra == "elasticsearch"
|
|
163
163
|
Provides-Extra: gcs
|
|
164
|
-
Requires-Dist: bs4; extra == "gcs"
|
|
165
|
-
Requires-Dist: fsspec; extra == "gcs"
|
|
166
164
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
165
|
+
Requires-Dist: fsspec; extra == "gcs"
|
|
166
|
+
Requires-Dist: bs4; extra == "gcs"
|
|
167
167
|
Requires-Dist: numpy; extra == "gcs"
|
|
168
168
|
Requires-Dist: pandas; extra == "gcs"
|
|
169
169
|
Provides-Extra: github
|
|
@@ -185,10 +185,10 @@ Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
|
185
185
|
Requires-Dist: numpy; extra == "hubspot"
|
|
186
186
|
Requires-Dist: pandas; extra == "hubspot"
|
|
187
187
|
Provides-Extra: ibm-watsonx-s3
|
|
188
|
-
Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
|
|
189
188
|
Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
|
|
190
|
-
Requires-Dist: httpx; extra == "ibm-watsonx-s3"
|
|
191
189
|
Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
|
|
190
|
+
Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
|
|
191
|
+
Requires-Dist: httpx; extra == "ibm-watsonx-s3"
|
|
192
192
|
Requires-Dist: numpy; extra == "ibm-watsonx-s3"
|
|
193
193
|
Requires-Dist: pandas; extra == "ibm-watsonx-s3"
|
|
194
194
|
Provides-Extra: jira
|
|
@@ -216,22 +216,22 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
216
216
|
Requires-Dist: numpy; extra == "mongodb"
|
|
217
217
|
Requires-Dist: pandas; extra == "mongodb"
|
|
218
218
|
Provides-Extra: neo4j
|
|
219
|
-
Requires-Dist: networkx; extra == "neo4j"
|
|
220
219
|
Requires-Dist: neo4j-rust-ext; extra == "neo4j"
|
|
220
|
+
Requires-Dist: networkx; extra == "neo4j"
|
|
221
221
|
Requires-Dist: cymple; extra == "neo4j"
|
|
222
222
|
Requires-Dist: numpy; extra == "neo4j"
|
|
223
223
|
Requires-Dist: pandas; extra == "neo4j"
|
|
224
224
|
Provides-Extra: notion
|
|
225
|
-
Requires-Dist:
|
|
225
|
+
Requires-Dist: backoff; extra == "notion"
|
|
226
226
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
227
227
|
Requires-Dist: notion-client; extra == "notion"
|
|
228
|
-
Requires-Dist:
|
|
228
|
+
Requires-Dist: httpx; extra == "notion"
|
|
229
229
|
Requires-Dist: numpy; extra == "notion"
|
|
230
230
|
Requires-Dist: pandas; extra == "notion"
|
|
231
231
|
Provides-Extra: onedrive
|
|
232
|
-
Requires-Dist: bs4; extra == "onedrive"
|
|
233
|
-
Requires-Dist: msal; extra == "onedrive"
|
|
234
232
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
233
|
+
Requires-Dist: msal; extra == "onedrive"
|
|
234
|
+
Requires-Dist: bs4; extra == "onedrive"
|
|
235
235
|
Requires-Dist: numpy; extra == "onedrive"
|
|
236
236
|
Requires-Dist: pandas; extra == "onedrive"
|
|
237
237
|
Provides-Extra: opensearch
|
|
@@ -239,8 +239,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
|
|
|
239
239
|
Requires-Dist: numpy; extra == "opensearch"
|
|
240
240
|
Requires-Dist: pandas; extra == "opensearch"
|
|
241
241
|
Provides-Extra: outlook
|
|
242
|
-
Requires-Dist: msal; extra == "outlook"
|
|
243
242
|
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
243
|
+
Requires-Dist: msal; extra == "outlook"
|
|
244
244
|
Requires-Dist: numpy; extra == "outlook"
|
|
245
245
|
Requires-Dist: pandas; extra == "outlook"
|
|
246
246
|
Provides-Extra: pinecone
|
|
@@ -264,13 +264,13 @@ Requires-Dist: redis; extra == "redis"
|
|
|
264
264
|
Requires-Dist: numpy; extra == "redis"
|
|
265
265
|
Requires-Dist: pandas; extra == "redis"
|
|
266
266
|
Provides-Extra: s3
|
|
267
|
-
Requires-Dist: fsspec; extra == "s3"
|
|
268
267
|
Requires-Dist: s3fs; extra == "s3"
|
|
268
|
+
Requires-Dist: fsspec; extra == "s3"
|
|
269
269
|
Requires-Dist: numpy; extra == "s3"
|
|
270
270
|
Requires-Dist: pandas; extra == "s3"
|
|
271
271
|
Provides-Extra: sharepoint
|
|
272
|
-
Requires-Dist: msal; extra == "sharepoint"
|
|
273
272
|
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
273
|
+
Requires-Dist: msal; extra == "sharepoint"
|
|
274
274
|
Requires-Dist: numpy; extra == "sharepoint"
|
|
275
275
|
Requires-Dist: pandas; extra == "sharepoint"
|
|
276
276
|
Provides-Extra: salesforce
|
|
@@ -287,8 +287,8 @@ Requires-Dist: slack_sdk[optional]; extra == "slack"
|
|
|
287
287
|
Requires-Dist: numpy; extra == "slack"
|
|
288
288
|
Requires-Dist: pandas; extra == "slack"
|
|
289
289
|
Provides-Extra: snowflake
|
|
290
|
-
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
291
290
|
Requires-Dist: snowflake-connector-python; extra == "snowflake"
|
|
291
|
+
Requires-Dist: psycopg2-binary; extra == "snowflake"
|
|
292
292
|
Requires-Dist: numpy; extra == "snowflake"
|
|
293
293
|
Requires-Dist: pandas; extra == "snowflake"
|
|
294
294
|
Provides-Extra: wikipedia
|
|
@@ -312,21 +312,21 @@ Requires-Dist: singlestoredb; extra == "singlestore"
|
|
|
312
312
|
Requires-Dist: numpy; extra == "singlestore"
|
|
313
313
|
Requires-Dist: pandas; extra == "singlestore"
|
|
314
314
|
Provides-Extra: vectara
|
|
315
|
-
Requires-Dist: httpx; extra == "vectara"
|
|
316
315
|
Requires-Dist: aiofiles; extra == "vectara"
|
|
317
316
|
Requires-Dist: requests; extra == "vectara"
|
|
317
|
+
Requires-Dist: httpx; extra == "vectara"
|
|
318
318
|
Requires-Dist: numpy; extra == "vectara"
|
|
319
319
|
Requires-Dist: pandas; extra == "vectara"
|
|
320
320
|
Provides-Extra: vastdb
|
|
321
|
-
Requires-Dist: ibis; extra == "vastdb"
|
|
322
|
-
Requires-Dist: pyarrow; extra == "vastdb"
|
|
323
321
|
Requires-Dist: vastdb; extra == "vastdb"
|
|
322
|
+
Requires-Dist: pyarrow; extra == "vastdb"
|
|
323
|
+
Requires-Dist: ibis; extra == "vastdb"
|
|
324
324
|
Requires-Dist: numpy; extra == "vastdb"
|
|
325
325
|
Requires-Dist: pandas; extra == "vastdb"
|
|
326
326
|
Provides-Extra: zendesk
|
|
327
327
|
Requires-Dist: bs4; extra == "zendesk"
|
|
328
|
-
Requires-Dist: httpx; extra == "zendesk"
|
|
329
328
|
Requires-Dist: aiofiles; extra == "zendesk"
|
|
329
|
+
Requires-Dist: httpx; extra == "zendesk"
|
|
330
330
|
Requires-Dist: numpy; extra == "zendesk"
|
|
331
331
|
Requires-Dist: pandas; extra == "zendesk"
|
|
332
332
|
Provides-Extra: embed-huggingface
|
|
@@ -334,8 +334,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
|
|
|
334
334
|
Requires-Dist: numpy; extra == "embed-huggingface"
|
|
335
335
|
Requires-Dist: pandas; extra == "embed-huggingface"
|
|
336
336
|
Provides-Extra: embed-octoai
|
|
337
|
-
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
338
337
|
Requires-Dist: openai; extra == "embed-octoai"
|
|
338
|
+
Requires-Dist: tiktoken; extra == "embed-octoai"
|
|
339
339
|
Requires-Dist: numpy; extra == "embed-octoai"
|
|
340
340
|
Requires-Dist: pandas; extra == "embed-octoai"
|
|
341
341
|
Provides-Extra: embed-vertexai
|
|
@@ -351,13 +351,13 @@ Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
|
|
|
351
351
|
Requires-Dist: numpy; extra == "embed-mixedbreadai"
|
|
352
352
|
Requires-Dist: pandas; extra == "embed-mixedbreadai"
|
|
353
353
|
Provides-Extra: openai
|
|
354
|
-
Requires-Dist: tiktoken; extra == "openai"
|
|
355
354
|
Requires-Dist: openai; extra == "openai"
|
|
355
|
+
Requires-Dist: tiktoken; extra == "openai"
|
|
356
356
|
Requires-Dist: numpy; extra == "openai"
|
|
357
357
|
Requires-Dist: pandas; extra == "openai"
|
|
358
358
|
Provides-Extra: bedrock
|
|
359
|
-
Requires-Dist: aioboto3; extra == "bedrock"
|
|
360
359
|
Requires-Dist: boto3; extra == "bedrock"
|
|
360
|
+
Requires-Dist: aioboto3; extra == "bedrock"
|
|
361
361
|
Requires-Dist: numpy; extra == "bedrock"
|
|
362
362
|
Requires-Dist: pandas; extra == "bedrock"
|
|
363
363
|
Provides-Extra: togetherai
|
|
@@ -113,7 +113,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
|
|
|
113
113
|
test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
114
|
test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
|
|
115
115
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
116
|
-
unstructured_ingest/__version__.py,sha256=
|
|
116
|
+
unstructured_ingest/__version__.py,sha256=zwHqD3LgpFA-cY-rrS_2n5Kv-NY8b8mUJsGrAiSe2kA,43
|
|
117
117
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
118
118
|
unstructured_ingest/interfaces.py,sha256=7DOnDpGvUNlCoFR7UPRGmOarqH5sFtuUOO5vf8X3oTM,31489
|
|
119
119
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -398,10 +398,10 @@ unstructured_ingest/v2/cli/base/src.py,sha256=cpQ43qQju4e5s_YSaPxUtA70BaisRkTBdj
|
|
|
398
398
|
unstructured_ingest/v2/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
399
399
|
unstructured_ingest/v2/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFBIEBe4A_n7mH4,7827
|
|
400
400
|
unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1KRxlwITNW1xl1YxMAG8BcTk0,7604
|
|
401
|
-
unstructured_ingest/v2/interfaces/__init__.py,sha256=
|
|
401
|
+
unstructured_ingest/v2/interfaces/__init__.py,sha256=Jn5qtWOnmBZzsb2PoQYN3Xj5xHa9thSVc0BEoIN0Pw0,1059
|
|
402
402
|
unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
|
|
403
403
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=Qi_wISgUACZKEPu5p1kUaG3uiCXcr3zWg9z9uRDwoOk,2927
|
|
404
|
-
unstructured_ingest/v2/interfaces/file_data.py,sha256=
|
|
404
|
+
unstructured_ingest/v2/interfaces/file_data.py,sha256=DQYzXr8yjlm6VkGuwQLGJ1sia4Gr0d__POAFLrow1PE,525
|
|
405
405
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=i0oftyifXefxfKa4a3sCfSwkzWGSPE6EvC9sg6fwZgk,833
|
|
406
406
|
unstructured_ingest/v2/interfaces/process.py,sha256=S3A_9gkwwGC-iQxvnpj3Er6IJAjAT5npzpSgxuFAzUM,449
|
|
407
407
|
unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
@@ -468,8 +468,8 @@ unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=o3J81DnSwt3lmA
|
|
|
468
468
|
unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=NIo2CCiPiuTFotNC891Mbelzg01knItryYGUtOM96xg,4393
|
|
469
469
|
unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=RW-Cw94Hs3ZsN8Kb4ciSh_N-Qkp0cqkw_xkJbt8CDNU,4656
|
|
470
470
|
unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
|
|
471
|
-
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=
|
|
472
|
-
unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=
|
|
471
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=KmlQCA7LXppxhL9e27LBBqNT999nUcc39qe2IkZsUJ8,18988
|
|
472
|
+
unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=tzOV0eNMyVHMXE5nedp6u0yyWC0Gn_blklg2ZdoOa4c,6956
|
|
473
473
|
unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
|
|
474
474
|
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=kw0UfGI2fx3oQ8jVpzF45pH8Qg_QP_que5C_VXgnktc,7156
|
|
475
475
|
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=aJCtCHRBAauLwdWEQe704Cm4UHv-ukTXV2bT3SBENVk,5881
|
|
@@ -581,9 +581,11 @@ unstructured_ingest/v2/processes/connectors/zendesk/client.py,sha256=DDAYQB7catK
|
|
|
581
581
|
unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py,sha256=R8SXYkRhVUoWEHdGCt2CzcTxxuFundw_0GlGZ34YmbM,8987
|
|
582
582
|
unstructured_ingest/v2/processes/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
583
583
|
unstructured_ingest/v2/processes/utils/blob_storage.py,sha256=EWvK4HRYubr9i1UyMhv5cU9u0UzVkCDC_BIm4Uxab7Y,964
|
|
584
|
-
unstructured_ingest
|
|
585
|
-
unstructured_ingest
|
|
586
|
-
unstructured_ingest-0.5.
|
|
587
|
-
unstructured_ingest-0.5.
|
|
588
|
-
unstructured_ingest-0.5.
|
|
589
|
-
unstructured_ingest-0.5.
|
|
584
|
+
unstructured_ingest/v2/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
585
|
+
unstructured_ingest/v2/types/file_data.py,sha256=kowOhvYy0q_-khX3IuR111AfjkdQezEfxjzK6QDH7oA,3836
|
|
586
|
+
unstructured_ingest-0.5.23.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
587
|
+
unstructured_ingest-0.5.23.dist-info/METADATA,sha256=yEHUhxSR1EF-2IoXViunb9iiNlEy9p0LgMTngzwtjLM,14999
|
|
588
|
+
unstructured_ingest-0.5.23.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
589
|
+
unstructured_ingest-0.5.23.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
590
|
+
unstructured_ingest-0.5.23.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
591
|
+
unstructured_ingest-0.5.23.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
{unstructured_ingest-0.5.21.dist-info → unstructured_ingest-0.5.23.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|