unstructured-ingest 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/sql/test_singlestore.py +156 -0
- test/integration/connectors/test_s3.py +1 -1
- test/integration/connectors/utils/docker_compose.py +23 -8
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/interfaces/file_data.py +1 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +3 -6
- unstructured_ingest/v2/processes/connectors/astradb.py +278 -55
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +3 -1
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +1 -0
- unstructured_ingest/v2/processes/connectors/sql/__init__.py +5 -0
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +1 -20
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +168 -0
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +2 -4
- unstructured_ingest/v2/processes/connectors/sql/sql.py +13 -2
- unstructured_ingest/v2/unstructured_api.py +1 -1
- {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/METADATA +17 -17
- {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/RECORD +21 -20
- unstructured_ingest/v2/processes/connectors/singlestore.py +0 -156
- {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -7,6 +7,8 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
7
7
|
|
|
8
8
|
from .postgres import CONNECTOR_TYPE as POSTGRES_CONNECTOR_TYPE
|
|
9
9
|
from .postgres import postgres_destination_entry, postgres_source_entry
|
|
10
|
+
from .singlestore import CONNECTOR_TYPE as SINGLESTORE_CONNECTOR_TYPE
|
|
11
|
+
from .singlestore import singlestore_destination_entry
|
|
10
12
|
from .snowflake import CONNECTOR_TYPE as SNOWFLAKE_CONNECTOR_TYPE
|
|
11
13
|
from .snowflake import snowflake_destination_entry, snowflake_source_entry
|
|
12
14
|
from .sqlite import CONNECTOR_TYPE as SQLITE_CONNECTOR_TYPE
|
|
@@ -19,3 +21,6 @@ add_source_entry(source_type=SNOWFLAKE_CONNECTOR_TYPE, entry=snowflake_source_en
|
|
|
19
21
|
add_destination_entry(destination_type=SQLITE_CONNECTOR_TYPE, entry=sqlite_destination_entry)
|
|
20
22
|
add_destination_entry(destination_type=POSTGRES_CONNECTOR_TYPE, entry=postgres_destination_entry)
|
|
21
23
|
add_destination_entry(destination_type=SNOWFLAKE_CONNECTOR_TYPE, entry=snowflake_destination_entry)
|
|
24
|
+
add_destination_entry(
|
|
25
|
+
destination_type=SINGLESTORE_CONNECTOR_TYPE, entry=singlestore_destination_entry
|
|
26
|
+
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from contextlib import contextmanager
|
|
2
2
|
from dataclasses import dataclass, field
|
|
3
|
-
from typing import TYPE_CHECKING,
|
|
3
|
+
from typing import TYPE_CHECKING, Generator, Optional
|
|
4
4
|
|
|
5
5
|
from pydantic import Field, Secret
|
|
6
6
|
|
|
@@ -12,7 +12,6 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
12
12
|
SourceRegistryEntry,
|
|
13
13
|
)
|
|
14
14
|
from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
15
|
-
_DATE_COLUMNS,
|
|
16
15
|
SQLAccessConfig,
|
|
17
16
|
SQLConnectionConfig,
|
|
18
17
|
SQLDownloader,
|
|
@@ -23,7 +22,6 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
23
22
|
SQLUploaderConfig,
|
|
24
23
|
SQLUploadStager,
|
|
25
24
|
SQLUploadStagerConfig,
|
|
26
|
-
parse_date_string,
|
|
27
25
|
)
|
|
28
26
|
|
|
29
27
|
if TYPE_CHECKING:
|
|
@@ -138,23 +136,6 @@ class PostgresUploader(SQLUploader):
|
|
|
138
136
|
connector_type: str = CONNECTOR_TYPE
|
|
139
137
|
values_delimiter: str = "%s"
|
|
140
138
|
|
|
141
|
-
def prepare_data(
|
|
142
|
-
self, columns: list[str], data: tuple[tuple[Any, ...], ...]
|
|
143
|
-
) -> list[tuple[Any, ...]]:
|
|
144
|
-
output = []
|
|
145
|
-
for row in data:
|
|
146
|
-
parsed = []
|
|
147
|
-
for column_name, value in zip(columns, row):
|
|
148
|
-
if column_name in _DATE_COLUMNS:
|
|
149
|
-
if value is None:
|
|
150
|
-
parsed.append(None)
|
|
151
|
-
else:
|
|
152
|
-
parsed.append(parse_date_string(value))
|
|
153
|
-
else:
|
|
154
|
-
parsed.append(value)
|
|
155
|
-
output.append(tuple(parsed))
|
|
156
|
-
return output
|
|
157
|
-
|
|
158
139
|
|
|
159
140
|
postgres_source_entry = SourceRegistryEntry(
|
|
160
141
|
connection_config=PostgresConnectionConfig,
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
5
|
+
|
|
6
|
+
from pydantic import Field, Secret
|
|
7
|
+
|
|
8
|
+
from unstructured_ingest.v2.interfaces import FileData
|
|
9
|
+
from unstructured_ingest.v2.logger import logger
|
|
10
|
+
from unstructured_ingest.v2.processes.connector_registry import (
|
|
11
|
+
DestinationRegistryEntry,
|
|
12
|
+
SourceRegistryEntry,
|
|
13
|
+
)
|
|
14
|
+
from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
15
|
+
_DATE_COLUMNS,
|
|
16
|
+
SQLAccessConfig,
|
|
17
|
+
SQLConnectionConfig,
|
|
18
|
+
SQLDownloader,
|
|
19
|
+
SQLDownloaderConfig,
|
|
20
|
+
SQLIndexer,
|
|
21
|
+
SQLIndexerConfig,
|
|
22
|
+
SQLUploader,
|
|
23
|
+
SQLUploaderConfig,
|
|
24
|
+
SQLUploadStager,
|
|
25
|
+
SQLUploadStagerConfig,
|
|
26
|
+
parse_date_string,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from singlestoredb.connection import Connection as SingleStoreConnection
|
|
31
|
+
from singlestoredb.connection import Cursor as SingleStoreCursor
|
|
32
|
+
|
|
33
|
+
CONNECTOR_TYPE = "singlestore"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class SingleStoreAccessConfig(SQLAccessConfig):
|
|
37
|
+
password: Optional[str] = Field(default=None, description="SingleStore password")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class SingleStoreConnectionConfig(SQLConnectionConfig):
|
|
41
|
+
access_config: Secret[SingleStoreAccessConfig]
|
|
42
|
+
host: Optional[str] = Field(default=None, description="SingleStore host")
|
|
43
|
+
port: Optional[int] = Field(default=None, description="SingleStore port")
|
|
44
|
+
user: Optional[str] = Field(default=None, description="SingleStore user")
|
|
45
|
+
database: Optional[str] = Field(default=None, description="SingleStore database")
|
|
46
|
+
|
|
47
|
+
@contextmanager
|
|
48
|
+
def get_connection(self) -> Generator["SingleStoreConnection", None, None]:
|
|
49
|
+
import singlestoredb as s2
|
|
50
|
+
|
|
51
|
+
connection = s2.connect(
|
|
52
|
+
host=self.host,
|
|
53
|
+
port=self.port,
|
|
54
|
+
database=self.database,
|
|
55
|
+
user=self.user,
|
|
56
|
+
password=self.access_config.get_secret_value().password,
|
|
57
|
+
)
|
|
58
|
+
try:
|
|
59
|
+
yield connection
|
|
60
|
+
finally:
|
|
61
|
+
connection.commit()
|
|
62
|
+
connection.close()
|
|
63
|
+
|
|
64
|
+
@contextmanager
|
|
65
|
+
def get_cursor(self) -> Generator["SingleStoreCursor", None, None]:
|
|
66
|
+
with self.get_connection() as connection:
|
|
67
|
+
with connection.cursor() as cursor:
|
|
68
|
+
try:
|
|
69
|
+
yield cursor
|
|
70
|
+
finally:
|
|
71
|
+
cursor.close()
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class SingleStoreIndexerConfig(SQLIndexerConfig):
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class SingleStoreIndexer(SQLIndexer):
|
|
80
|
+
connection_config: SingleStoreConnectionConfig
|
|
81
|
+
index_config: SingleStoreIndexerConfig
|
|
82
|
+
connector_type: str = CONNECTOR_TYPE
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class SingleStoreDownloaderConfig(SQLDownloaderConfig):
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class SingleStoreDownloader(SQLDownloader):
|
|
91
|
+
connection_config: SingleStoreConnectionConfig
|
|
92
|
+
download_config: SingleStoreDownloaderConfig
|
|
93
|
+
connector_type: str = CONNECTOR_TYPE
|
|
94
|
+
|
|
95
|
+
def query_db(self, file_data: FileData) -> tuple[list[tuple], list[str]]:
|
|
96
|
+
table_name = file_data.additional_metadata["table_name"]
|
|
97
|
+
id_column = file_data.additional_metadata["id_column"]
|
|
98
|
+
ids = file_data.additional_metadata["ids"]
|
|
99
|
+
with self.connection_config.get_connection() as sqlite_connection:
|
|
100
|
+
cursor = sqlite_connection.cursor()
|
|
101
|
+
fields = ",".join(self.download_config.fields) if self.download_config.fields else "*"
|
|
102
|
+
query = "SELECT {fields} FROM {table_name} WHERE {id_column} in ({ids})".format(
|
|
103
|
+
fields=fields,
|
|
104
|
+
table_name=table_name,
|
|
105
|
+
id_column=id_column,
|
|
106
|
+
ids=",".join([str(i) for i in ids]),
|
|
107
|
+
)
|
|
108
|
+
logger.debug(f"running query: {query}")
|
|
109
|
+
cursor.execute(query)
|
|
110
|
+
rows = cursor.fetchall()
|
|
111
|
+
columns = [col[0] for col in cursor.description]
|
|
112
|
+
return rows, columns
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class SingleStoreUploadStagerConfig(SQLUploadStagerConfig):
|
|
116
|
+
pass
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class SingleStoreUploadStager(SQLUploadStager):
|
|
120
|
+
upload_stager_config: SingleStoreUploadStagerConfig
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class SingleStoreUploaderConfig(SQLUploaderConfig):
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
@dataclass
|
|
128
|
+
class SingleStoreUploader(SQLUploader):
|
|
129
|
+
upload_config: SingleStoreUploaderConfig = field(default_factory=SingleStoreUploaderConfig)
|
|
130
|
+
connection_config: SingleStoreConnectionConfig
|
|
131
|
+
values_delimiter: str = "%s"
|
|
132
|
+
connector_type: str = CONNECTOR_TYPE
|
|
133
|
+
|
|
134
|
+
def prepare_data(
|
|
135
|
+
self, columns: list[str], data: tuple[tuple[Any, ...], ...]
|
|
136
|
+
) -> list[tuple[Any, ...]]:
|
|
137
|
+
output = []
|
|
138
|
+
for row in data:
|
|
139
|
+
parsed = []
|
|
140
|
+
for column_name, value in zip(columns, row):
|
|
141
|
+
if isinstance(value, (list, dict)):
|
|
142
|
+
value = json.dumps(value)
|
|
143
|
+
if column_name in _DATE_COLUMNS:
|
|
144
|
+
if value is None:
|
|
145
|
+
parsed.append(None)
|
|
146
|
+
else:
|
|
147
|
+
parsed.append(parse_date_string(value))
|
|
148
|
+
else:
|
|
149
|
+
parsed.append(value)
|
|
150
|
+
output.append(tuple(parsed))
|
|
151
|
+
return output
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
singlestore_source_entry = SourceRegistryEntry(
|
|
155
|
+
connection_config=SingleStoreConnectionConfig,
|
|
156
|
+
indexer_config=SingleStoreIndexerConfig,
|
|
157
|
+
indexer=SQLIndexer,
|
|
158
|
+
downloader_config=SingleStoreDownloaderConfig,
|
|
159
|
+
downloader=SingleStoreDownloader,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
singlestore_destination_entry = DestinationRegistryEntry(
|
|
163
|
+
connection_config=SingleStoreConnectionConfig,
|
|
164
|
+
uploader=SingleStoreUploader,
|
|
165
|
+
uploader_config=SingleStoreUploaderConfig,
|
|
166
|
+
upload_stager=SingleStoreUploadStager,
|
|
167
|
+
upload_stager_config=SingleStoreUploadStagerConfig,
|
|
168
|
+
)
|
|
@@ -51,10 +51,7 @@ class SnowflakeConnectionConfig(SQLConnectionConfig):
|
|
|
51
51
|
default=None,
|
|
52
52
|
description="Database name.",
|
|
53
53
|
)
|
|
54
|
-
|
|
55
|
-
default=None,
|
|
56
|
-
description="Database schema.",
|
|
57
|
-
)
|
|
54
|
+
db_schema: str = Field(default=None, description="Database schema.", alias="schema")
|
|
58
55
|
role: str = Field(
|
|
59
56
|
default=None,
|
|
60
57
|
description="Database role.",
|
|
@@ -68,6 +65,7 @@ class SnowflakeConnectionConfig(SQLConnectionConfig):
|
|
|
68
65
|
from snowflake.connector import connect
|
|
69
66
|
|
|
70
67
|
connect_kwargs = self.model_dump()
|
|
68
|
+
connect_kwargs["schema"] = connect_kwargs.pop("db_schema")
|
|
71
69
|
connect_kwargs.pop("access_configs", None)
|
|
72
70
|
connect_kwargs["password"] = self.access_config.get_secret_value().password
|
|
73
71
|
# https://peps.python.org/pep-0249/#paramstyle
|
|
@@ -308,11 +308,22 @@ class SQLUploader(Uploader):
|
|
|
308
308
|
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
309
309
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
310
310
|
|
|
311
|
-
@abstractmethod
|
|
312
311
|
def prepare_data(
|
|
313
312
|
self, columns: list[str], data: tuple[tuple[Any, ...], ...]
|
|
314
313
|
) -> list[tuple[Any, ...]]:
|
|
315
|
-
|
|
314
|
+
output = []
|
|
315
|
+
for row in data:
|
|
316
|
+
parsed = []
|
|
317
|
+
for column_name, value in zip(columns, row):
|
|
318
|
+
if column_name in _DATE_COLUMNS:
|
|
319
|
+
if value is None:
|
|
320
|
+
parsed.append(None)
|
|
321
|
+
else:
|
|
322
|
+
parsed.append(parse_date_string(value))
|
|
323
|
+
else:
|
|
324
|
+
parsed.append(value)
|
|
325
|
+
output.append(tuple(parsed))
|
|
326
|
+
return output
|
|
316
327
|
|
|
317
328
|
def upload_contents(self, path: Path) -> None:
|
|
318
329
|
df = pd.read_json(path, orient="records", lines=True)
|
|
@@ -26,7 +26,7 @@ def create_partition_request(filename: Path, parameters_dict: dict) -> "Partitio
|
|
|
26
26
|
# NOTE(austin): PartitionParameters is a Pydantic model in v0.26.0
|
|
27
27
|
# Prior to this it was a dataclass which doesn't have .__fields
|
|
28
28
|
try:
|
|
29
|
-
possible_fields = PartitionParameters.
|
|
29
|
+
possible_fields = PartitionParameters.model_fields
|
|
30
30
|
except AttributeError:
|
|
31
31
|
possible_fields = [f.name for f in fields(PartitionParameters)]
|
|
32
32
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unstructured-ingest
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: A library that prepares raw documents for downstream ML tasks.
|
|
5
5
|
Home-page: https://github.com/Unstructured-IO/unstructured-ingest
|
|
6
6
|
Author: Unstructured Technologies
|
|
@@ -22,30 +22,30 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
|
22
22
|
Requires-Python: >=3.9.0,<3.13
|
|
23
23
|
Description-Content-Type: text/markdown
|
|
24
24
|
License-File: LICENSE.md
|
|
25
|
-
Requires-Dist: pydantic>=2.7
|
|
26
|
-
Requires-Dist: opentelemetry-sdk
|
|
27
|
-
Requires-Dist: python-dateutil
|
|
28
25
|
Requires-Dist: tqdm
|
|
26
|
+
Requires-Dist: python-dateutil
|
|
29
27
|
Requires-Dist: pandas
|
|
30
28
|
Requires-Dist: click
|
|
29
|
+
Requires-Dist: pydantic>=2.7
|
|
31
30
|
Requires-Dist: dataclasses-json
|
|
31
|
+
Requires-Dist: opentelemetry-sdk
|
|
32
32
|
Provides-Extra: airtable
|
|
33
33
|
Requires-Dist: pyairtable; extra == "airtable"
|
|
34
34
|
Provides-Extra: astradb
|
|
35
35
|
Requires-Dist: astrapy; extra == "astradb"
|
|
36
36
|
Provides-Extra: azure
|
|
37
|
-
Requires-Dist: adlfs; extra == "azure"
|
|
38
37
|
Requires-Dist: fsspec; extra == "azure"
|
|
38
|
+
Requires-Dist: adlfs; extra == "azure"
|
|
39
39
|
Provides-Extra: azure-cognitive-search
|
|
40
40
|
Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
|
|
41
41
|
Provides-Extra: bedrock
|
|
42
42
|
Requires-Dist: boto3; extra == "bedrock"
|
|
43
43
|
Provides-Extra: biomed
|
|
44
|
-
Requires-Dist: bs4; extra == "biomed"
|
|
45
44
|
Requires-Dist: requests; extra == "biomed"
|
|
45
|
+
Requires-Dist: bs4; extra == "biomed"
|
|
46
46
|
Provides-Extra: box
|
|
47
|
-
Requires-Dist: boxfs; extra == "box"
|
|
48
47
|
Requires-Dist: fsspec; extra == "box"
|
|
48
|
+
Requires-Dist: boxfs; extra == "box"
|
|
49
49
|
Provides-Extra: chroma
|
|
50
50
|
Requires-Dist: chromadb; extra == "chroma"
|
|
51
51
|
Provides-Extra: clarifai
|
|
@@ -60,8 +60,8 @@ Requires-Dist: unstructured[tsv]; extra == "csv"
|
|
|
60
60
|
Provides-Extra: databricks-volumes
|
|
61
61
|
Requires-Dist: databricks-sdk; extra == "databricks-volumes"
|
|
62
62
|
Provides-Extra: delta-table
|
|
63
|
-
Requires-Dist: deltalake; extra == "delta-table"
|
|
64
63
|
Requires-Dist: fsspec; extra == "delta-table"
|
|
64
|
+
Requires-Dist: deltalake; extra == "delta-table"
|
|
65
65
|
Provides-Extra: discord
|
|
66
66
|
Requires-Dist: discord-py; extra == "discord"
|
|
67
67
|
Provides-Extra: doc
|
|
@@ -69,8 +69,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
|
|
|
69
69
|
Provides-Extra: docx
|
|
70
70
|
Requires-Dist: unstructured[docx]; extra == "docx"
|
|
71
71
|
Provides-Extra: dropbox
|
|
72
|
-
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
73
72
|
Requires-Dist: fsspec; extra == "dropbox"
|
|
73
|
+
Requires-Dist: dropboxdrivefs; extra == "dropbox"
|
|
74
74
|
Provides-Extra: elasticsearch
|
|
75
75
|
Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
|
|
76
76
|
Provides-Extra: embed-huggingface
|
|
@@ -87,9 +87,9 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
|
|
|
87
87
|
Provides-Extra: epub
|
|
88
88
|
Requires-Dist: unstructured[epub]; extra == "epub"
|
|
89
89
|
Provides-Extra: gcs
|
|
90
|
-
Requires-Dist: bs4; extra == "gcs"
|
|
91
90
|
Requires-Dist: gcsfs; extra == "gcs"
|
|
92
91
|
Requires-Dist: fsspec; extra == "gcs"
|
|
92
|
+
Requires-Dist: bs4; extra == "gcs"
|
|
93
93
|
Provides-Extra: github
|
|
94
94
|
Requires-Dist: pygithub>1.58.0; extra == "github"
|
|
95
95
|
Requires-Dist: requests; extra == "github"
|
|
@@ -98,8 +98,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
|
|
|
98
98
|
Provides-Extra: google-drive
|
|
99
99
|
Requires-Dist: google-api-python-client; extra == "google-drive"
|
|
100
100
|
Provides-Extra: hubspot
|
|
101
|
-
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
102
101
|
Requires-Dist: urllib3; extra == "hubspot"
|
|
102
|
+
Requires-Dist: hubspot-api-client; extra == "hubspot"
|
|
103
103
|
Provides-Extra: jira
|
|
104
104
|
Requires-Dist: atlassian-python-api; extra == "jira"
|
|
105
105
|
Provides-Extra: kafka
|
|
@@ -115,16 +115,16 @@ Requires-Dist: pymongo; extra == "mongodb"
|
|
|
115
115
|
Provides-Extra: msg
|
|
116
116
|
Requires-Dist: unstructured[msg]; extra == "msg"
|
|
117
117
|
Provides-Extra: notion
|
|
118
|
-
Requires-Dist: backoff; extra == "notion"
|
|
119
118
|
Requires-Dist: httpx; extra == "notion"
|
|
120
|
-
Requires-Dist:
|
|
119
|
+
Requires-Dist: backoff; extra == "notion"
|
|
121
120
|
Requires-Dist: htmlBuilder; extra == "notion"
|
|
121
|
+
Requires-Dist: notion-client; extra == "notion"
|
|
122
122
|
Provides-Extra: odt
|
|
123
123
|
Requires-Dist: unstructured[odt]; extra == "odt"
|
|
124
124
|
Provides-Extra: onedrive
|
|
125
|
+
Requires-Dist: msal; extra == "onedrive"
|
|
125
126
|
Requires-Dist: bs4; extra == "onedrive"
|
|
126
127
|
Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
|
|
127
|
-
Requires-Dist: msal; extra == "onedrive"
|
|
128
128
|
Provides-Extra: openai
|
|
129
129
|
Requires-Dist: openai; extra == "openai"
|
|
130
130
|
Requires-Dist: tiktoken; extra == "openai"
|
|
@@ -133,8 +133,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
|
|
|
133
133
|
Provides-Extra: org
|
|
134
134
|
Requires-Dist: unstructured[org]; extra == "org"
|
|
135
135
|
Provides-Extra: outlook
|
|
136
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
137
136
|
Requires-Dist: msal; extra == "outlook"
|
|
137
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
|
|
138
138
|
Provides-Extra: pdf
|
|
139
139
|
Requires-Dist: unstructured[pdf]; extra == "pdf"
|
|
140
140
|
Provides-Extra: pinecone
|
|
@@ -161,11 +161,11 @@ Requires-Dist: s3fs; extra == "s3"
|
|
|
161
161
|
Provides-Extra: salesforce
|
|
162
162
|
Requires-Dist: simple-salesforce; extra == "salesforce"
|
|
163
163
|
Provides-Extra: sftp
|
|
164
|
-
Requires-Dist: paramiko; extra == "sftp"
|
|
165
164
|
Requires-Dist: fsspec; extra == "sftp"
|
|
165
|
+
Requires-Dist: paramiko; extra == "sftp"
|
|
166
166
|
Provides-Extra: sharepoint
|
|
167
|
-
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
168
167
|
Requires-Dist: msal; extra == "sharepoint"
|
|
168
|
+
Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
|
|
169
169
|
Provides-Extra: singlestore
|
|
170
170
|
Requires-Dist: singlestoredb; extra == "singlestore"
|
|
171
171
|
Provides-Extra: slack
|
|
@@ -6,17 +6,18 @@ test/integration/chunkers/test_chunkers.py,sha256=pqn1Rqh36jZTJL4qpU0iuOMFAEQ-Lr
|
|
|
6
6
|
test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
7
|
test/integration/connectors/conftest.py,sha256=6dVNMBrL6WIO4KXA-0nf2tNrPYk_tsor8uomi6fbi3Q,727
|
|
8
8
|
test/integration/connectors/test_delta_table.py,sha256=4_KPyQJpd6DmyIjjtXWPMw6NNf7xULRkxmqfbvmZ80g,5018
|
|
9
|
-
test/integration/connectors/test_s3.py,sha256=
|
|
9
|
+
test/integration/connectors/test_s3.py,sha256=1ErPRpNmbg-88ig80SfIyxujF7xnAWtI42WSue4sgKU,5850
|
|
10
10
|
test/integration/connectors/databricks_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
test/integration/connectors/databricks_tests/test_volumes_native.py,sha256=k4lALbwNtlyuI3wd3OHoBULI21E3Ck2Fo8EJXaVfwgw,5812
|
|
12
12
|
test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
test/integration/connectors/sql/test_postgres.py,sha256=gDBuNyvWmpVPmDrSSYC99z3t17B_a196P1MwIAOp5Dk,6584
|
|
14
|
+
test/integration/connectors/sql/test_singlestore.py,sha256=wGI3-lc6qh0qN4-WD9VtiXBB9MlekeqK402_9EXQyX0,5876
|
|
14
15
|
test/integration/connectors/sql/test_snowflake.py,sha256=XXU2-2z_k8jHWP684v2IuaGOlV3cmPpg3RxkwMp08v8,6998
|
|
15
16
|
test/integration/connectors/sql/test_sqlite.py,sha256=51QrFufAq-XxNjHAkmPWxdJUkGdIRRIGKeRT09A5pkA,5704
|
|
16
17
|
test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
18
|
test/integration/connectors/utils/constants.py,sha256=0zSPnsZVqJuNhXduXvdXFQLZTRIQa5Fo_1qjBYVCfb8,209
|
|
18
19
|
test/integration/connectors/utils/docker.py,sha256=-wknXRVlzr3BVPdEhCyJgsdNjO9aSb2xjb-mQ306j7Q,2256
|
|
19
|
-
test/integration/connectors/utils/docker_compose.py,sha256=
|
|
20
|
+
test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
|
|
20
21
|
test/integration/connectors/utils/validation.py,sha256=gnflehoYbFkSBJdXQV-7HwcrlL_Cuqni2ri1YmArjT0,12019
|
|
21
22
|
test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
23
|
test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
|
|
@@ -46,7 +47,7 @@ test/unit/embed/test_openai.py,sha256=0O1yshDcE0BMKv1yJqrNuiNLSdPhLpKqJ-D_wmnids
|
|
|
46
47
|
test/unit/embed/test_vertexai.py,sha256=Pl7COc9E3tf_yGidkTEmTizNGyZF1F5zuL2TgPTMnfI,1048
|
|
47
48
|
test/unit/embed/test_voyageai.py,sha256=DviCOJFhe5H4e26-kNyX3JNe8h3qB5Yl0KOe8rQEMrc,981
|
|
48
49
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
49
|
-
unstructured_ingest/__version__.py,sha256=
|
|
50
|
+
unstructured_ingest/__version__.py,sha256=Hmm5OuicK0ynl_R5DSnpRYWJpEXwe7guJdsAMHH7K60,42
|
|
50
51
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
51
52
|
unstructured_ingest/interfaces.py,sha256=m03BgenxSA34HbW157L7V9TGxK_dTG7N2AnAhF31W-U,31364
|
|
52
53
|
unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
|
|
@@ -313,7 +314,7 @@ unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LG
|
|
|
313
314
|
unstructured_ingest/v2/logger.py,sha256=wcln4s5Nyp2fjjJux9iM3d6t9aQFNJ2H1IAZXmIknjI,4323
|
|
314
315
|
unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
|
|
315
316
|
unstructured_ingest/v2/otel.py,sha256=2fGj1c7cVcC3J8NwL6MNYhyPEAXiB33DsilvRDkrdLo,4130
|
|
316
|
-
unstructured_ingest/v2/unstructured_api.py,sha256=
|
|
317
|
+
unstructured_ingest/v2/unstructured_api.py,sha256=HqOaQ80YTdAnFj_2Ce108g7Pp3-F9Qg329Uw2OXtRmA,3375
|
|
317
318
|
unstructured_ingest/v2/utils.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
318
319
|
unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
319
320
|
unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
|
|
@@ -329,7 +330,7 @@ unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1K
|
|
|
329
330
|
unstructured_ingest/v2/interfaces/__init__.py,sha256=Rfa8crx6De7WNOK-EjsWWwFVpsUfCc6gY8B8tQ3ae9I,899
|
|
330
331
|
unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
|
|
331
332
|
unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
|
|
332
|
-
unstructured_ingest/v2/interfaces/file_data.py,sha256=
|
|
333
|
+
unstructured_ingest/v2/interfaces/file_data.py,sha256=D71bXImJ7Pyjtl3I3pa2O2B2iBqIaY-mC-hdoEF3RmI,1983
|
|
333
334
|
unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
|
|
334
335
|
unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
|
|
335
336
|
unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
|
|
@@ -356,9 +357,9 @@ unstructured_ingest/v2/processes/embedder.py,sha256=PQn0IO8xbGRQHpcT2VVl-J8gTJ5H
|
|
|
356
357
|
unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
|
|
357
358
|
unstructured_ingest/v2/processes/partitioner.py,sha256=2Lhztd730soVC2TOqrn_ba7CGZna8AHHpqJY2ZUYVxE,7776
|
|
358
359
|
unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
|
|
359
|
-
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=
|
|
360
|
+
unstructured_ingest/v2/processes/connectors/__init__.py,sha256=zMO50wOGWOJrCTdh19Najj-i5tfMUyf977TKz4yN04A,5249
|
|
360
361
|
unstructured_ingest/v2/processes/connectors/airtable.py,sha256=Yi7PEv_FejZ9_y3BPY3gu5YGVfeLh-9YX-qLyQHjJsY,8921
|
|
361
|
-
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=
|
|
362
|
+
unstructured_ingest/v2/processes/connectors/astradb.py,sha256=k6zaxm05-ESpRV6w1jgrtfE10-I2Z50kafURxxJVzdk,14043
|
|
362
363
|
unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=S55v7TXu30rEdgythMBB_2VcuomyMPmcPtLYykbhw_E,8466
|
|
363
364
|
unstructured_ingest/v2/processes/connectors/chroma.py,sha256=skrxRPHZ8y3JxNa0dt5SVitHiDQ5WVxLvY_kh2-QUrQ,8029
|
|
364
365
|
unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=SONLywyEfoAlLc-HPabXeGzoiwKnekMHIbRMXd4CGXs,12146
|
|
@@ -375,12 +376,11 @@ unstructured_ingest/v2/processes/connectors/outlook.py,sha256=NK67Pd8Nk5oUIXTK-s
|
|
|
375
376
|
unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=k_GH55S_OQ6-wCLC6gkhRrNpXIFECYZ_2Gjz_XRtY6Y,7561
|
|
376
377
|
unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
|
|
377
378
|
unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=hOaV5gBcHFc6N5Rbu3MgM-5Aol1ht-QkNIN4PqjvfxE,19665
|
|
378
|
-
unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=4rVvWKK2iQr03Ff6cB5zjfE1MpN0JyIGpCxxFCDI6hc,5563
|
|
379
379
|
unstructured_ingest/v2/processes/connectors/slack.py,sha256=b9IanzUApUexiJzuNg7PR3tujOoeG8dhM0L0v4MDuPw,9256
|
|
380
380
|
unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
|
|
381
381
|
unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=Ss0YyD5T6k-00eJ6dr5lSo2H0LcOjVTMmozehyTvnAo,8866
|
|
382
382
|
unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=jO71UTC7bLA_N12CrLWJzh_yZML5gfT7VohxzCpUGWg,1848
|
|
383
|
-
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=
|
|
383
|
+
unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=8FasrRcoqa9zrhmnbfYN_rBBTH6xBXM50TzGsUMEm98,6581
|
|
384
384
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=I1MJwe5LOxoPLjwo00H0XbXO6u_SJHWYgsj4s6ePoyI,2754
|
|
385
385
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=P4rfcE3td7WyuuguRgUnGQytCMDpfeYrrpshBZuVynY,3539
|
|
386
386
|
unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=UUotY_-HpgSEJkvdQfZTlbxY7CRLZ4ctL8TlryeFvxk,2790
|
|
@@ -389,19 +389,20 @@ unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Yp
|
|
|
389
389
|
unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=Y01BuVRql0Kvzc_cdaZE9dDGYjJzrwJu-etfUrEGcUU,7061
|
|
390
390
|
unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=Cjk0LUxqOCDbme0GmnD_5_b1hfStjI23cKw6BquKNrg,5488
|
|
391
391
|
unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=NNAxIRdOQxUncfwhu7J7SnQRM6BSStNOyQZi-4E51iY,5816
|
|
392
|
-
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=
|
|
392
|
+
unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=5uZ_nGBXNQgwvfjNcor6mwzbYOHeja4-EV3nNCXvxaQ,11512
|
|
393
393
|
unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=-_pYHbsBG9FyRyNIaf_xyFbPiiR7pnWEEg_8mp0rIZ8,7053
|
|
394
394
|
unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=je1BDqFWlyMfPa4oAMMNFQLLQtCY9quuqx3xjTwF8OQ,6251
|
|
395
395
|
unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=dwpyqDq0qceCBWX3zM1hiUlgXB4hzX6ObOr-sh-5CJs,6926
|
|
396
396
|
unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
|
|
397
|
-
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=
|
|
398
|
-
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=
|
|
399
|
-
unstructured_ingest/v2/processes/connectors/sql/
|
|
400
|
-
unstructured_ingest/v2/processes/connectors/sql/
|
|
397
|
+
unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=D43wrV2ADvQsToIYwbEWnZ7mhzlsYcZMFCqf6jIC7dQ,1333
|
|
398
|
+
unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=__Wf5lkCQGhbtEH_2DxfNmQyWP-UKC9o_KEawG81jY0,4905
|
|
399
|
+
unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=YrmhAL1RQ1c5-2fnR3UAyj_4KfvjYTQ2cWzpvsdJOnU,5535
|
|
400
|
+
unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=0s0oBfMttPg5JL6jn8SsoCeTSRoXXdVy2bJAZv_hiSk,5576
|
|
401
|
+
unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=rWDkefUnYkzJT0mhIcHxieECdaIWLTvbDcOcZgLA4FQ,11636
|
|
401
402
|
unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=9605K36nQ5-gBxzt1daYKYotON1SE85RETusqCJrbdk,5230
|
|
402
|
-
unstructured_ingest-0.2.
|
|
403
|
-
unstructured_ingest-0.2.
|
|
404
|
-
unstructured_ingest-0.2.
|
|
405
|
-
unstructured_ingest-0.2.
|
|
406
|
-
unstructured_ingest-0.2.
|
|
407
|
-
unstructured_ingest-0.2.
|
|
403
|
+
unstructured_ingest-0.2.1.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
404
|
+
unstructured_ingest-0.2.1.dist-info/METADATA,sha256=NBV3OAonxt8Y0Tra7LWqQBoLSROwA106sf8vDCsXu2k,7271
|
|
405
|
+
unstructured_ingest-0.2.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
406
|
+
unstructured_ingest-0.2.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
407
|
+
unstructured_ingest-0.2.1.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
|
|
408
|
+
unstructured_ingest-0.2.1.dist-info/RECORD,,
|