unstructured-ingest 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (22) hide show
  1. test/integration/connectors/sql/test_singlestore.py +156 -0
  2. test/integration/connectors/test_s3.py +1 -1
  3. test/integration/connectors/utils/docker_compose.py +23 -8
  4. unstructured_ingest/__version__.py +1 -1
  5. unstructured_ingest/v2/interfaces/file_data.py +1 -0
  6. unstructured_ingest/v2/processes/connectors/__init__.py +3 -6
  7. unstructured_ingest/v2/processes/connectors/astradb.py +278 -55
  8. unstructured_ingest/v2/processes/connectors/databricks/volumes.py +3 -1
  9. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +1 -0
  10. unstructured_ingest/v2/processes/connectors/sql/__init__.py +5 -0
  11. unstructured_ingest/v2/processes/connectors/sql/postgres.py +1 -20
  12. unstructured_ingest/v2/processes/connectors/sql/singlestore.py +168 -0
  13. unstructured_ingest/v2/processes/connectors/sql/snowflake.py +2 -4
  14. unstructured_ingest/v2/processes/connectors/sql/sql.py +13 -2
  15. unstructured_ingest/v2/unstructured_api.py +1 -1
  16. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/METADATA +17 -17
  17. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/RECORD +21 -20
  18. unstructured_ingest/v2/processes/connectors/singlestore.py +0 -156
  19. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/LICENSE.md +0 -0
  20. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/WHEEL +0 -0
  21. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/entry_points.txt +0 -0
  22. {unstructured_ingest-0.2.0.dist-info → unstructured_ingest-0.2.1.dist-info}/top_level.txt +0 -0
@@ -7,6 +7,8 @@ from unstructured_ingest.v2.processes.connector_registry import (
7
7
 
8
8
  from .postgres import CONNECTOR_TYPE as POSTGRES_CONNECTOR_TYPE
9
9
  from .postgres import postgres_destination_entry, postgres_source_entry
10
+ from .singlestore import CONNECTOR_TYPE as SINGLESTORE_CONNECTOR_TYPE
11
+ from .singlestore import singlestore_destination_entry
10
12
  from .snowflake import CONNECTOR_TYPE as SNOWFLAKE_CONNECTOR_TYPE
11
13
  from .snowflake import snowflake_destination_entry, snowflake_source_entry
12
14
  from .sqlite import CONNECTOR_TYPE as SQLITE_CONNECTOR_TYPE
@@ -19,3 +21,6 @@ add_source_entry(source_type=SNOWFLAKE_CONNECTOR_TYPE, entry=snowflake_source_en
19
21
  add_destination_entry(destination_type=SQLITE_CONNECTOR_TYPE, entry=sqlite_destination_entry)
20
22
  add_destination_entry(destination_type=POSTGRES_CONNECTOR_TYPE, entry=postgres_destination_entry)
21
23
  add_destination_entry(destination_type=SNOWFLAKE_CONNECTOR_TYPE, entry=snowflake_destination_entry)
24
+ add_destination_entry(
25
+ destination_type=SINGLESTORE_CONNECTOR_TYPE, entry=singlestore_destination_entry
26
+ )
@@ -1,6 +1,6 @@
1
1
  from contextlib import contextmanager
2
2
  from dataclasses import dataclass, field
3
- from typing import TYPE_CHECKING, Any, Generator, Optional
3
+ from typing import TYPE_CHECKING, Generator, Optional
4
4
 
5
5
  from pydantic import Field, Secret
6
6
 
@@ -12,7 +12,6 @@ from unstructured_ingest.v2.processes.connector_registry import (
12
12
  SourceRegistryEntry,
13
13
  )
14
14
  from unstructured_ingest.v2.processes.connectors.sql.sql import (
15
- _DATE_COLUMNS,
16
15
  SQLAccessConfig,
17
16
  SQLConnectionConfig,
18
17
  SQLDownloader,
@@ -23,7 +22,6 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
23
22
  SQLUploaderConfig,
24
23
  SQLUploadStager,
25
24
  SQLUploadStagerConfig,
26
- parse_date_string,
27
25
  )
28
26
 
29
27
  if TYPE_CHECKING:
@@ -138,23 +136,6 @@ class PostgresUploader(SQLUploader):
138
136
  connector_type: str = CONNECTOR_TYPE
139
137
  values_delimiter: str = "%s"
140
138
 
141
- def prepare_data(
142
- self, columns: list[str], data: tuple[tuple[Any, ...], ...]
143
- ) -> list[tuple[Any, ...]]:
144
- output = []
145
- for row in data:
146
- parsed = []
147
- for column_name, value in zip(columns, row):
148
- if column_name in _DATE_COLUMNS:
149
- if value is None:
150
- parsed.append(None)
151
- else:
152
- parsed.append(parse_date_string(value))
153
- else:
154
- parsed.append(value)
155
- output.append(tuple(parsed))
156
- return output
157
-
158
139
 
159
140
  postgres_source_entry = SourceRegistryEntry(
160
141
  connection_config=PostgresConnectionConfig,
@@ -0,0 +1,168 @@
1
+ import json
2
+ from contextlib import contextmanager
3
+ from dataclasses import dataclass, field
4
+ from typing import TYPE_CHECKING, Any, Generator, Optional
5
+
6
+ from pydantic import Field, Secret
7
+
8
+ from unstructured_ingest.v2.interfaces import FileData
9
+ from unstructured_ingest.v2.logger import logger
10
+ from unstructured_ingest.v2.processes.connector_registry import (
11
+ DestinationRegistryEntry,
12
+ SourceRegistryEntry,
13
+ )
14
+ from unstructured_ingest.v2.processes.connectors.sql.sql import (
15
+ _DATE_COLUMNS,
16
+ SQLAccessConfig,
17
+ SQLConnectionConfig,
18
+ SQLDownloader,
19
+ SQLDownloaderConfig,
20
+ SQLIndexer,
21
+ SQLIndexerConfig,
22
+ SQLUploader,
23
+ SQLUploaderConfig,
24
+ SQLUploadStager,
25
+ SQLUploadStagerConfig,
26
+ parse_date_string,
27
+ )
28
+
29
+ if TYPE_CHECKING:
30
+ from singlestoredb.connection import Connection as SingleStoreConnection
31
+ from singlestoredb.connection import Cursor as SingleStoreCursor
32
+
33
+ CONNECTOR_TYPE = "singlestore"
34
+
35
+
36
+ class SingleStoreAccessConfig(SQLAccessConfig):
37
+ password: Optional[str] = Field(default=None, description="SingleStore password")
38
+
39
+
40
+ class SingleStoreConnectionConfig(SQLConnectionConfig):
41
+ access_config: Secret[SingleStoreAccessConfig]
42
+ host: Optional[str] = Field(default=None, description="SingleStore host")
43
+ port: Optional[int] = Field(default=None, description="SingleStore port")
44
+ user: Optional[str] = Field(default=None, description="SingleStore user")
45
+ database: Optional[str] = Field(default=None, description="SingleStore database")
46
+
47
+ @contextmanager
48
+ def get_connection(self) -> Generator["SingleStoreConnection", None, None]:
49
+ import singlestoredb as s2
50
+
51
+ connection = s2.connect(
52
+ host=self.host,
53
+ port=self.port,
54
+ database=self.database,
55
+ user=self.user,
56
+ password=self.access_config.get_secret_value().password,
57
+ )
58
+ try:
59
+ yield connection
60
+ finally:
61
+ connection.commit()
62
+ connection.close()
63
+
64
+ @contextmanager
65
+ def get_cursor(self) -> Generator["SingleStoreCursor", None, None]:
66
+ with self.get_connection() as connection:
67
+ with connection.cursor() as cursor:
68
+ try:
69
+ yield cursor
70
+ finally:
71
+ cursor.close()
72
+
73
+
74
+ class SingleStoreIndexerConfig(SQLIndexerConfig):
75
+ pass
76
+
77
+
78
+ @dataclass
79
+ class SingleStoreIndexer(SQLIndexer):
80
+ connection_config: SingleStoreConnectionConfig
81
+ index_config: SingleStoreIndexerConfig
82
+ connector_type: str = CONNECTOR_TYPE
83
+
84
+
85
+ class SingleStoreDownloaderConfig(SQLDownloaderConfig):
86
+ pass
87
+
88
+
89
+ @dataclass
90
+ class SingleStoreDownloader(SQLDownloader):
91
+ connection_config: SingleStoreConnectionConfig
92
+ download_config: SingleStoreDownloaderConfig
93
+ connector_type: str = CONNECTOR_TYPE
94
+
95
+ def query_db(self, file_data: FileData) -> tuple[list[tuple], list[str]]:
96
+ table_name = file_data.additional_metadata["table_name"]
97
+ id_column = file_data.additional_metadata["id_column"]
98
+ ids = file_data.additional_metadata["ids"]
99
+ with self.connection_config.get_connection() as sqlite_connection:
100
+ cursor = sqlite_connection.cursor()
101
+ fields = ",".join(self.download_config.fields) if self.download_config.fields else "*"
102
+ query = "SELECT {fields} FROM {table_name} WHERE {id_column} in ({ids})".format(
103
+ fields=fields,
104
+ table_name=table_name,
105
+ id_column=id_column,
106
+ ids=",".join([str(i) for i in ids]),
107
+ )
108
+ logger.debug(f"running query: {query}")
109
+ cursor.execute(query)
110
+ rows = cursor.fetchall()
111
+ columns = [col[0] for col in cursor.description]
112
+ return rows, columns
113
+
114
+
115
+ class SingleStoreUploadStagerConfig(SQLUploadStagerConfig):
116
+ pass
117
+
118
+
119
+ class SingleStoreUploadStager(SQLUploadStager):
120
+ upload_stager_config: SingleStoreUploadStagerConfig
121
+
122
+
123
+ class SingleStoreUploaderConfig(SQLUploaderConfig):
124
+ pass
125
+
126
+
127
+ @dataclass
128
+ class SingleStoreUploader(SQLUploader):
129
+ upload_config: SingleStoreUploaderConfig = field(default_factory=SingleStoreUploaderConfig)
130
+ connection_config: SingleStoreConnectionConfig
131
+ values_delimiter: str = "%s"
132
+ connector_type: str = CONNECTOR_TYPE
133
+
134
+ def prepare_data(
135
+ self, columns: list[str], data: tuple[tuple[Any, ...], ...]
136
+ ) -> list[tuple[Any, ...]]:
137
+ output = []
138
+ for row in data:
139
+ parsed = []
140
+ for column_name, value in zip(columns, row):
141
+ if isinstance(value, (list, dict)):
142
+ value = json.dumps(value)
143
+ if column_name in _DATE_COLUMNS:
144
+ if value is None:
145
+ parsed.append(None)
146
+ else:
147
+ parsed.append(parse_date_string(value))
148
+ else:
149
+ parsed.append(value)
150
+ output.append(tuple(parsed))
151
+ return output
152
+
153
+
154
+ singlestore_source_entry = SourceRegistryEntry(
155
+ connection_config=SingleStoreConnectionConfig,
156
+ indexer_config=SingleStoreIndexerConfig,
157
+ indexer=SQLIndexer,
158
+ downloader_config=SingleStoreDownloaderConfig,
159
+ downloader=SingleStoreDownloader,
160
+ )
161
+
162
+ singlestore_destination_entry = DestinationRegistryEntry(
163
+ connection_config=SingleStoreConnectionConfig,
164
+ uploader=SingleStoreUploader,
165
+ uploader_config=SingleStoreUploaderConfig,
166
+ upload_stager=SingleStoreUploadStager,
167
+ upload_stager_config=SingleStoreUploadStagerConfig,
168
+ )
@@ -51,10 +51,7 @@ class SnowflakeConnectionConfig(SQLConnectionConfig):
51
51
  default=None,
52
52
  description="Database name.",
53
53
  )
54
- schema: str = Field(
55
- default=None,
56
- description="Database schema.",
57
- )
54
+ db_schema: str = Field(default=None, description="Database schema.", alias="schema")
58
55
  role: str = Field(
59
56
  default=None,
60
57
  description="Database role.",
@@ -68,6 +65,7 @@ class SnowflakeConnectionConfig(SQLConnectionConfig):
68
65
  from snowflake.connector import connect
69
66
 
70
67
  connect_kwargs = self.model_dump()
68
+ connect_kwargs["schema"] = connect_kwargs.pop("db_schema")
71
69
  connect_kwargs.pop("access_configs", None)
72
70
  connect_kwargs["password"] = self.access_config.get_secret_value().password
73
71
  # https://peps.python.org/pep-0249/#paramstyle
@@ -308,11 +308,22 @@ class SQLUploader(Uploader):
308
308
  logger.error(f"failed to validate connection: {e}", exc_info=True)
309
309
  raise DestinationConnectionError(f"failed to validate connection: {e}")
310
310
 
311
- @abstractmethod
312
311
  def prepare_data(
313
312
  self, columns: list[str], data: tuple[tuple[Any, ...], ...]
314
313
  ) -> list[tuple[Any, ...]]:
315
- pass
314
+ output = []
315
+ for row in data:
316
+ parsed = []
317
+ for column_name, value in zip(columns, row):
318
+ if column_name in _DATE_COLUMNS:
319
+ if value is None:
320
+ parsed.append(None)
321
+ else:
322
+ parsed.append(parse_date_string(value))
323
+ else:
324
+ parsed.append(value)
325
+ output.append(tuple(parsed))
326
+ return output
316
327
 
317
328
  def upload_contents(self, path: Path) -> None:
318
329
  df = pd.read_json(path, orient="records", lines=True)
@@ -26,7 +26,7 @@ def create_partition_request(filename: Path, parameters_dict: dict) -> "Partitio
26
26
  # NOTE(austin): PartitionParameters is a Pydantic model in v0.26.0
27
27
  # Prior to this it was a dataclass which doesn't have .__fields
28
28
  try:
29
- possible_fields = PartitionParameters.__fields__
29
+ possible_fields = PartitionParameters.model_fields
30
30
  except AttributeError:
31
31
  possible_fields = [f.name for f in fields(PartitionParameters)]
32
32
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,30 +22,30 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: pydantic>=2.7
26
- Requires-Dist: opentelemetry-sdk
27
- Requires-Dist: python-dateutil
28
25
  Requires-Dist: tqdm
26
+ Requires-Dist: python-dateutil
29
27
  Requires-Dist: pandas
30
28
  Requires-Dist: click
29
+ Requires-Dist: pydantic>=2.7
31
30
  Requires-Dist: dataclasses-json
31
+ Requires-Dist: opentelemetry-sdk
32
32
  Provides-Extra: airtable
33
33
  Requires-Dist: pyairtable; extra == "airtable"
34
34
  Provides-Extra: astradb
35
35
  Requires-Dist: astrapy; extra == "astradb"
36
36
  Provides-Extra: azure
37
- Requires-Dist: adlfs; extra == "azure"
38
37
  Requires-Dist: fsspec; extra == "azure"
38
+ Requires-Dist: adlfs; extra == "azure"
39
39
  Provides-Extra: azure-cognitive-search
40
40
  Requires-Dist: azure-search-documents; extra == "azure-cognitive-search"
41
41
  Provides-Extra: bedrock
42
42
  Requires-Dist: boto3; extra == "bedrock"
43
43
  Provides-Extra: biomed
44
- Requires-Dist: bs4; extra == "biomed"
45
44
  Requires-Dist: requests; extra == "biomed"
45
+ Requires-Dist: bs4; extra == "biomed"
46
46
  Provides-Extra: box
47
- Requires-Dist: boxfs; extra == "box"
48
47
  Requires-Dist: fsspec; extra == "box"
48
+ Requires-Dist: boxfs; extra == "box"
49
49
  Provides-Extra: chroma
50
50
  Requires-Dist: chromadb; extra == "chroma"
51
51
  Provides-Extra: clarifai
@@ -60,8 +60,8 @@ Requires-Dist: unstructured[tsv]; extra == "csv"
60
60
  Provides-Extra: databricks-volumes
61
61
  Requires-Dist: databricks-sdk; extra == "databricks-volumes"
62
62
  Provides-Extra: delta-table
63
- Requires-Dist: deltalake; extra == "delta-table"
64
63
  Requires-Dist: fsspec; extra == "delta-table"
64
+ Requires-Dist: deltalake; extra == "delta-table"
65
65
  Provides-Extra: discord
66
66
  Requires-Dist: discord-py; extra == "discord"
67
67
  Provides-Extra: doc
@@ -69,8 +69,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
69
69
  Provides-Extra: docx
70
70
  Requires-Dist: unstructured[docx]; extra == "docx"
71
71
  Provides-Extra: dropbox
72
- Requires-Dist: dropboxdrivefs; extra == "dropbox"
73
72
  Requires-Dist: fsspec; extra == "dropbox"
73
+ Requires-Dist: dropboxdrivefs; extra == "dropbox"
74
74
  Provides-Extra: elasticsearch
75
75
  Requires-Dist: elasticsearch[async]; extra == "elasticsearch"
76
76
  Provides-Extra: embed-huggingface
@@ -87,9 +87,9 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
87
87
  Provides-Extra: epub
88
88
  Requires-Dist: unstructured[epub]; extra == "epub"
89
89
  Provides-Extra: gcs
90
- Requires-Dist: bs4; extra == "gcs"
91
90
  Requires-Dist: gcsfs; extra == "gcs"
92
91
  Requires-Dist: fsspec; extra == "gcs"
92
+ Requires-Dist: bs4; extra == "gcs"
93
93
  Provides-Extra: github
94
94
  Requires-Dist: pygithub>1.58.0; extra == "github"
95
95
  Requires-Dist: requests; extra == "github"
@@ -98,8 +98,8 @@ Requires-Dist: python-gitlab; extra == "gitlab"
98
98
  Provides-Extra: google-drive
99
99
  Requires-Dist: google-api-python-client; extra == "google-drive"
100
100
  Provides-Extra: hubspot
101
- Requires-Dist: hubspot-api-client; extra == "hubspot"
102
101
  Requires-Dist: urllib3; extra == "hubspot"
102
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
103
103
  Provides-Extra: jira
104
104
  Requires-Dist: atlassian-python-api; extra == "jira"
105
105
  Provides-Extra: kafka
@@ -115,16 +115,16 @@ Requires-Dist: pymongo; extra == "mongodb"
115
115
  Provides-Extra: msg
116
116
  Requires-Dist: unstructured[msg]; extra == "msg"
117
117
  Provides-Extra: notion
118
- Requires-Dist: backoff; extra == "notion"
119
118
  Requires-Dist: httpx; extra == "notion"
120
- Requires-Dist: notion-client; extra == "notion"
119
+ Requires-Dist: backoff; extra == "notion"
121
120
  Requires-Dist: htmlBuilder; extra == "notion"
121
+ Requires-Dist: notion-client; extra == "notion"
122
122
  Provides-Extra: odt
123
123
  Requires-Dist: unstructured[odt]; extra == "odt"
124
124
  Provides-Extra: onedrive
125
+ Requires-Dist: msal; extra == "onedrive"
125
126
  Requires-Dist: bs4; extra == "onedrive"
126
127
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
127
- Requires-Dist: msal; extra == "onedrive"
128
128
  Provides-Extra: openai
129
129
  Requires-Dist: openai; extra == "openai"
130
130
  Requires-Dist: tiktoken; extra == "openai"
@@ -133,8 +133,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
133
133
  Provides-Extra: org
134
134
  Requires-Dist: unstructured[org]; extra == "org"
135
135
  Provides-Extra: outlook
136
- Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
137
136
  Requires-Dist: msal; extra == "outlook"
137
+ Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
138
138
  Provides-Extra: pdf
139
139
  Requires-Dist: unstructured[pdf]; extra == "pdf"
140
140
  Provides-Extra: pinecone
@@ -161,11 +161,11 @@ Requires-Dist: s3fs; extra == "s3"
161
161
  Provides-Extra: salesforce
162
162
  Requires-Dist: simple-salesforce; extra == "salesforce"
163
163
  Provides-Extra: sftp
164
- Requires-Dist: paramiko; extra == "sftp"
165
164
  Requires-Dist: fsspec; extra == "sftp"
165
+ Requires-Dist: paramiko; extra == "sftp"
166
166
  Provides-Extra: sharepoint
167
- Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
168
167
  Requires-Dist: msal; extra == "sharepoint"
168
+ Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
169
169
  Provides-Extra: singlestore
170
170
  Requires-Dist: singlestoredb; extra == "singlestore"
171
171
  Provides-Extra: slack
@@ -6,17 +6,18 @@ test/integration/chunkers/test_chunkers.py,sha256=pqn1Rqh36jZTJL4qpU0iuOMFAEQ-Lr
6
6
  test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  test/integration/connectors/conftest.py,sha256=6dVNMBrL6WIO4KXA-0nf2tNrPYk_tsor8uomi6fbi3Q,727
8
8
  test/integration/connectors/test_delta_table.py,sha256=4_KPyQJpd6DmyIjjtXWPMw6NNf7xULRkxmqfbvmZ80g,5018
9
- test/integration/connectors/test_s3.py,sha256=fK0soCTkNxp-4hm4O2LPrhlZXvYmaeTmeEgeNh1b0k8,5839
9
+ test/integration/connectors/test_s3.py,sha256=1ErPRpNmbg-88ig80SfIyxujF7xnAWtI42WSue4sgKU,5850
10
10
  test/integration/connectors/databricks_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  test/integration/connectors/databricks_tests/test_volumes_native.py,sha256=k4lALbwNtlyuI3wd3OHoBULI21E3Ck2Fo8EJXaVfwgw,5812
12
12
  test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  test/integration/connectors/sql/test_postgres.py,sha256=gDBuNyvWmpVPmDrSSYC99z3t17B_a196P1MwIAOp5Dk,6584
14
+ test/integration/connectors/sql/test_singlestore.py,sha256=wGI3-lc6qh0qN4-WD9VtiXBB9MlekeqK402_9EXQyX0,5876
14
15
  test/integration/connectors/sql/test_snowflake.py,sha256=XXU2-2z_k8jHWP684v2IuaGOlV3cmPpg3RxkwMp08v8,6998
15
16
  test/integration/connectors/sql/test_sqlite.py,sha256=51QrFufAq-XxNjHAkmPWxdJUkGdIRRIGKeRT09A5pkA,5704
16
17
  test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
18
  test/integration/connectors/utils/constants.py,sha256=0zSPnsZVqJuNhXduXvdXFQLZTRIQa5Fo_1qjBYVCfb8,209
18
19
  test/integration/connectors/utils/docker.py,sha256=-wknXRVlzr3BVPdEhCyJgsdNjO9aSb2xjb-mQ306j7Q,2256
19
- test/integration/connectors/utils/docker_compose.py,sha256=6XeYOKQFZCBRLEmcgH2mmBAaVs6R6jCWAhJLjq6p-aM,1771
20
+ test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
20
21
  test/integration/connectors/utils/validation.py,sha256=gnflehoYbFkSBJdXQV-7HwcrlL_Cuqni2ri1YmArjT0,12019
21
22
  test/integration/embedders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
23
  test/integration/embedders/conftest.py,sha256=B2W771RbijR7G_GybsCzRyIvOzXqzbKZdRIlNDd5AGY,334
@@ -46,7 +47,7 @@ test/unit/embed/test_openai.py,sha256=0O1yshDcE0BMKv1yJqrNuiNLSdPhLpKqJ-D_wmnids
46
47
  test/unit/embed/test_vertexai.py,sha256=Pl7COc9E3tf_yGidkTEmTizNGyZF1F5zuL2TgPTMnfI,1048
47
48
  test/unit/embed/test_voyageai.py,sha256=DviCOJFhe5H4e26-kNyX3JNe8h3qB5Yl0KOe8rQEMrc,981
48
49
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
49
- unstructured_ingest/__version__.py,sha256=BPrBFKCFfY7EcVqYVDVJGmj1rrsGlJa3283pycTFA3o,42
50
+ unstructured_ingest/__version__.py,sha256=Hmm5OuicK0ynl_R5DSnpRYWJpEXwe7guJdsAMHH7K60,42
50
51
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
51
52
  unstructured_ingest/interfaces.py,sha256=m03BgenxSA34HbW157L7V9TGxK_dTG7N2AnAhF31W-U,31364
52
53
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -313,7 +314,7 @@ unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LG
313
314
  unstructured_ingest/v2/logger.py,sha256=wcln4s5Nyp2fjjJux9iM3d6t9aQFNJ2H1IAZXmIknjI,4323
314
315
  unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
315
316
  unstructured_ingest/v2/otel.py,sha256=2fGj1c7cVcC3J8NwL6MNYhyPEAXiB33DsilvRDkrdLo,4130
316
- unstructured_ingest/v2/unstructured_api.py,sha256=1EQVuL-TojmFxWoWFzXr1oCFPEC3IkCQqhjXM8spdTY,3373
317
+ unstructured_ingest/v2/unstructured_api.py,sha256=HqOaQ80YTdAnFj_2Ce108g7Pp3-F9Qg329Uw2OXtRmA,3375
317
318
  unstructured_ingest/v2/utils.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
318
319
  unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
319
320
  unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
@@ -329,7 +330,7 @@ unstructured_ingest/v2/cli/utils/model_conversion.py,sha256=7eEIkk1KU51-ZNiIfI1K
329
330
  unstructured_ingest/v2/interfaces/__init__.py,sha256=Rfa8crx6De7WNOK-EjsWWwFVpsUfCc6gY8B8tQ3ae9I,899
330
331
  unstructured_ingest/v2/interfaces/connector.py,sha256=qUFFJ3qgDMenTCZMtVRjq1DIwsVak6pxNjQOH2eVkMw,1623
331
332
  unstructured_ingest/v2/interfaces/downloader.py,sha256=Lj3nTY1hPA71GfNeedFVCdHdZsHLle8qrx5RtXAy9GY,2940
332
- unstructured_ingest/v2/interfaces/file_data.py,sha256=ieJK-hqHCEOmoYNGoFbCHziSaZyMtRS9VpSoYbwoKCE,1944
333
+ unstructured_ingest/v2/interfaces/file_data.py,sha256=D71bXImJ7Pyjtl3I3pa2O2B2iBqIaY-mC-hdoEF3RmI,1983
333
334
  unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
334
335
  unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
335
336
  unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
@@ -356,9 +357,9 @@ unstructured_ingest/v2/processes/embedder.py,sha256=PQn0IO8xbGRQHpcT2VVl-J8gTJ5H
356
357
  unstructured_ingest/v2/processes/filter.py,sha256=kjUmMw2SDq2bme0JCAOxs6cJriIG6Ty09KOznS-xz08,2145
357
358
  unstructured_ingest/v2/processes/partitioner.py,sha256=2Lhztd730soVC2TOqrn_ba7CGZna8AHHpqJY2ZUYVxE,7776
358
359
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
359
- unstructured_ingest/v2/processes/connectors/__init__.py,sha256=a7L4N7A2-SzthS6-42FKWymQRW1ydr0cGvDdI2QE--I,5377
360
+ unstructured_ingest/v2/processes/connectors/__init__.py,sha256=zMO50wOGWOJrCTdh19Najj-i5tfMUyf977TKz4yN04A,5249
360
361
  unstructured_ingest/v2/processes/connectors/airtable.py,sha256=Yi7PEv_FejZ9_y3BPY3gu5YGVfeLh-9YX-qLyQHjJsY,8921
361
- unstructured_ingest/v2/processes/connectors/astradb.py,sha256=ZctZRfXcOAMBGPkKgHvhTmV_-2F0YN5vqwfY9UCHIlU,5791
362
+ unstructured_ingest/v2/processes/connectors/astradb.py,sha256=k6zaxm05-ESpRV6w1jgrtfE10-I2Z50kafURxxJVzdk,14043
362
363
  unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=S55v7TXu30rEdgythMBB_2VcuomyMPmcPtLYykbhw_E,8466
363
364
  unstructured_ingest/v2/processes/connectors/chroma.py,sha256=skrxRPHZ8y3JxNa0dt5SVitHiDQ5WVxLvY_kh2-QUrQ,8029
364
365
  unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=SONLywyEfoAlLc-HPabXeGzoiwKnekMHIbRMXd4CGXs,12146
@@ -375,12 +376,11 @@ unstructured_ingest/v2/processes/connectors/outlook.py,sha256=NK67Pd8Nk5oUIXTK-s
375
376
  unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=k_GH55S_OQ6-wCLC6gkhRrNpXIFECYZ_2Gjz_XRtY6Y,7561
376
377
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
377
378
  unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=hOaV5gBcHFc6N5Rbu3MgM-5Aol1ht-QkNIN4PqjvfxE,19665
378
- unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=4rVvWKK2iQr03Ff6cB5zjfE1MpN0JyIGpCxxFCDI6hc,5563
379
379
  unstructured_ingest/v2/processes/connectors/slack.py,sha256=b9IanzUApUexiJzuNg7PR3tujOoeG8dhM0L0v4MDuPw,9256
380
380
  unstructured_ingest/v2/processes/connectors/utils.py,sha256=8kd0g7lo9NqnpaIkjeO-Ut6erhwUNH_gS9koevpe3WE,878
381
381
  unstructured_ingest/v2/processes/connectors/weaviate.py,sha256=Ss0YyD5T6k-00eJ6dr5lSo2H0LcOjVTMmozehyTvnAo,8866
382
382
  unstructured_ingest/v2/processes/connectors/databricks/__init__.py,sha256=jO71UTC7bLA_N12CrLWJzh_yZML5gfT7VohxzCpUGWg,1848
383
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=IBCGt6BQ7vULkPI3jTJZ52emwYg7QeyLZXjOFz9SO3E,6549
383
+ unstructured_ingest/v2/processes/connectors/databricks/volumes.py,sha256=8FasrRcoqa9zrhmnbfYN_rBBTH6xBXM50TzGsUMEm98,6581
384
384
  unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py,sha256=I1MJwe5LOxoPLjwo00H0XbXO6u_SJHWYgsj4s6ePoyI,2754
385
385
  unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=P4rfcE3td7WyuuguRgUnGQytCMDpfeYrrpshBZuVynY,3539
386
386
  unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=UUotY_-HpgSEJkvdQfZTlbxY7CRLZ4ctL8TlryeFvxk,2790
@@ -389,19 +389,20 @@ unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Yp
389
389
  unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=Y01BuVRql0Kvzc_cdaZE9dDGYjJzrwJu-etfUrEGcUU,7061
390
390
  unstructured_ingest/v2/processes/connectors/fsspec/box.py,sha256=Cjk0LUxqOCDbme0GmnD_5_b1hfStjI23cKw6BquKNrg,5488
391
391
  unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py,sha256=NNAxIRdOQxUncfwhu7J7SnQRM6BSStNOyQZi-4E51iY,5816
392
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=eFcrpSAB8wbLHuCiDb-2QpEUtgEEUA_iSqcT81H2-3Q,11472
392
+ unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py,sha256=5uZ_nGBXNQgwvfjNcor6mwzbYOHeja4-EV3nNCXvxaQ,11512
393
393
  unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=-_pYHbsBG9FyRyNIaf_xyFbPiiR7pnWEEg_8mp0rIZ8,7053
394
394
  unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=je1BDqFWlyMfPa4oAMMNFQLLQtCY9quuqx3xjTwF8OQ,6251
395
395
  unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=dwpyqDq0qceCBWX3zM1hiUlgXB4hzX6ObOr-sh-5CJs,6926
396
396
  unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
397
- unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=hdGD-V4U3RgnVoJV5S3exKVUfzCLLY7wTwKWvVaihJs,1098
398
- unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=WUqyjzjmuVvLKCMKnhFhYNRAAQs_cFh0DkSXAJEERyU,5548
399
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=AcW2TxEalYj6c8fhrOWB78JlaB-1hApmdDzCUhQlzW4,5513
400
- unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=XdMJRgQvcR4Lo2Udl1y8-ZkJw6nVrcXTL-gTsaAHAJw,11196
397
+ unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=D43wrV2ADvQsToIYwbEWnZ7mhzlsYcZMFCqf6jIC7dQ,1333
398
+ unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=__Wf5lkCQGhbtEH_2DxfNmQyWP-UKC9o_KEawG81jY0,4905
399
+ unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=YrmhAL1RQ1c5-2fnR3UAyj_4KfvjYTQ2cWzpvsdJOnU,5535
400
+ unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=0s0oBfMttPg5JL6jn8SsoCeTSRoXXdVy2bJAZv_hiSk,5576
401
+ unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=rWDkefUnYkzJT0mhIcHxieECdaIWLTvbDcOcZgLA4FQ,11636
401
402
  unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=9605K36nQ5-gBxzt1daYKYotON1SE85RETusqCJrbdk,5230
402
- unstructured_ingest-0.2.0.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
403
- unstructured_ingest-0.2.0.dist-info/METADATA,sha256=F8s5t23zy5zdxICEj6BseR0teRWCQc7IjB_xtlZUkaM,7271
404
- unstructured_ingest-0.2.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
405
- unstructured_ingest-0.2.0.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
406
- unstructured_ingest-0.2.0.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
407
- unstructured_ingest-0.2.0.dist-info/RECORD,,
403
+ unstructured_ingest-0.2.1.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
404
+ unstructured_ingest-0.2.1.dist-info/METADATA,sha256=NBV3OAonxt8Y0Tra7LWqQBoLSROwA106sf8vDCsXu2k,7271
405
+ unstructured_ingest-0.2.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
406
+ unstructured_ingest-0.2.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
407
+ unstructured_ingest-0.2.1.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
408
+ unstructured_ingest-0.2.1.dist-info/RECORD,,