unstructured-ingest 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (64) hide show
  1. test/integration/chunkers/test_chunkers.py +0 -11
  2. test/integration/connectors/conftest.py +11 -1
  3. test/integration/connectors/databricks_tests/test_volumes_native.py +4 -3
  4. test/integration/connectors/duckdb/conftest.py +14 -0
  5. test/integration/connectors/duckdb/test_duckdb.py +51 -44
  6. test/integration/connectors/duckdb/test_motherduck.py +37 -48
  7. test/integration/connectors/elasticsearch/test_elasticsearch.py +26 -4
  8. test/integration/connectors/elasticsearch/test_opensearch.py +26 -3
  9. test/integration/connectors/sql/test_postgres.py +102 -91
  10. test/integration/connectors/sql/test_singlestore.py +111 -99
  11. test/integration/connectors/sql/test_snowflake.py +142 -117
  12. test/integration/connectors/sql/test_sqlite.py +86 -75
  13. test/integration/connectors/test_astradb.py +22 -1
  14. test/integration/connectors/test_azure_ai_search.py +25 -3
  15. test/integration/connectors/test_chroma.py +120 -0
  16. test/integration/connectors/test_confluence.py +4 -4
  17. test/integration/connectors/test_delta_table.py +1 -0
  18. test/integration/connectors/test_kafka.py +4 -4
  19. test/integration/connectors/test_milvus.py +21 -0
  20. test/integration/connectors/test_mongodb.py +3 -3
  21. test/integration/connectors/test_neo4j.py +236 -0
  22. test/integration/connectors/test_pinecone.py +25 -1
  23. test/integration/connectors/test_qdrant.py +25 -2
  24. test/integration/connectors/test_s3.py +9 -6
  25. test/integration/connectors/utils/docker.py +6 -0
  26. test/integration/connectors/utils/validation/__init__.py +0 -0
  27. test/integration/connectors/utils/validation/destination.py +88 -0
  28. test/integration/connectors/utils/validation/equality.py +75 -0
  29. test/integration/connectors/utils/{validation.py → validation/source.py} +15 -91
  30. test/integration/connectors/utils/validation/utils.py +36 -0
  31. unstructured_ingest/__version__.py +1 -1
  32. unstructured_ingest/utils/chunking.py +11 -0
  33. unstructured_ingest/utils/data_prep.py +36 -0
  34. unstructured_ingest/v2/interfaces/upload_stager.py +70 -6
  35. unstructured_ingest/v2/interfaces/uploader.py +11 -2
  36. unstructured_ingest/v2/pipeline/steps/stage.py +3 -1
  37. unstructured_ingest/v2/processes/connectors/astradb.py +8 -30
  38. unstructured_ingest/v2/processes/connectors/azure_ai_search.py +16 -40
  39. unstructured_ingest/v2/processes/connectors/chroma.py +36 -59
  40. unstructured_ingest/v2/processes/connectors/couchbase.py +42 -52
  41. unstructured_ingest/v2/processes/connectors/delta_table.py +11 -33
  42. unstructured_ingest/v2/processes/connectors/duckdb/base.py +26 -26
  43. unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +29 -20
  44. unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +37 -44
  45. unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +5 -30
  46. unstructured_ingest/v2/processes/connectors/gitlab.py +32 -31
  47. unstructured_ingest/v2/processes/connectors/google_drive.py +32 -29
  48. unstructured_ingest/v2/processes/connectors/kafka/kafka.py +2 -4
  49. unstructured_ingest/v2/processes/connectors/kdbai.py +44 -70
  50. unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +8 -10
  51. unstructured_ingest/v2/processes/connectors/local.py +13 -2
  52. unstructured_ingest/v2/processes/connectors/milvus.py +16 -57
  53. unstructured_ingest/v2/processes/connectors/mongodb.py +4 -8
  54. unstructured_ingest/v2/processes/connectors/neo4j.py +381 -0
  55. unstructured_ingest/v2/processes/connectors/pinecone.py +3 -33
  56. unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +32 -41
  57. unstructured_ingest/v2/processes/connectors/sql/sql.py +41 -40
  58. unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +9 -31
  59. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/METADATA +18 -14
  60. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/RECORD +64 -56
  61. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/LICENSE.md +0 -0
  62. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/WHEEL +0 -0
  63. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/entry_points.txt +0 -0
  64. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/top_level.txt +0 -0
@@ -15,7 +15,7 @@ from dateutil import parser
15
15
  from pydantic import Field, Secret
16
16
 
17
17
  from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
18
- from unstructured_ingest.utils.data_prep import split_dataframe
18
+ from unstructured_ingest.utils.data_prep import get_data_df, split_dataframe
19
19
  from unstructured_ingest.v2.constants import RECORD_ID_LABEL
20
20
  from unstructured_ingest.v2.interfaces import (
21
21
  AccessConfig,
@@ -238,27 +238,24 @@ class SQLUploadStagerConfig(UploadStagerConfig):
238
238
  class SQLUploadStager(UploadStager):
239
239
  upload_stager_config: SQLUploadStagerConfig = field(default_factory=SQLUploadStagerConfig)
240
240
 
241
- @staticmethod
242
- def conform_dict(data: dict, file_data: FileData) -> pd.DataFrame:
243
- working_data = data.copy()
244
- output = []
245
- for element in working_data:
246
- metadata: dict[str, Any] = element.pop("metadata", {})
247
- data_source = metadata.pop("data_source", {})
248
- coordinates = metadata.pop("coordinates", {})
241
+ def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
242
+ data = element_dict.copy()
243
+ metadata: dict[str, Any] = data.pop("metadata", {})
244
+ data_source = metadata.pop("data_source", {})
245
+ coordinates = metadata.pop("coordinates", {})
249
246
 
250
- element.update(metadata)
251
- element.update(data_source)
252
- element.update(coordinates)
247
+ data.update(metadata)
248
+ data.update(data_source)
249
+ data.update(coordinates)
253
250
 
254
- element["id"] = get_enhanced_element_id(element_dict=element, file_data=file_data)
251
+ data["id"] = get_enhanced_element_id(element_dict=data, file_data=file_data)
255
252
 
256
- # remove extraneous, not supported columns
257
- element = {k: v for k, v in element.items() if k in _COLUMNS}
258
- element[RECORD_ID_LABEL] = file_data.identifier
259
- output.append(element)
253
+ # remove extraneous, not supported columns
254
+ element = {k: v for k, v in data.items() if k in _COLUMNS}
255
+ element[RECORD_ID_LABEL] = file_data.identifier
256
+ return element
260
257
 
261
- df = pd.DataFrame.from_dict(output)
258
+ def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
262
259
  for column in filter(lambda x: x in df.columns, _DATE_COLUMNS):
263
260
  df[column] = df[column].apply(parse_date_string)
264
261
  for column in filter(
@@ -283,19 +280,19 @@ class SQLUploadStager(UploadStager):
283
280
  output_filename: str,
284
281
  **kwargs: Any,
285
282
  ) -> Path:
286
- with open(elements_filepath) as elements_file:
287
- elements_contents: list[dict] = json.load(elements_file)
283
+ elements_contents = self.get_data(elements_filepath=elements_filepath)
288
284
 
289
- df = self.conform_dict(data=elements_contents, file_data=file_data)
290
- if Path(output_filename).suffix != ".json":
291
- output_filename = f"{output_filename}.json"
292
- else:
293
- output_filename = f"{Path(output_filename).stem}.json"
294
- output_path = Path(output_dir) / Path(f"{output_filename}")
295
- output_path.parent.mkdir(parents=True, exist_ok=True)
285
+ df = pd.DataFrame(
286
+ data=[
287
+ self.conform_dict(element_dict=element_dict, file_data=file_data)
288
+ for element_dict in elements_contents
289
+ ]
290
+ )
291
+ df = self.conform_dataframe(df=df)
296
292
 
297
- with output_path.open("w") as output_file:
298
- df.to_json(output_file, orient="records", lines=True)
293
+ output_path = self.get_output_path(output_filename=output_filename, output_dir=output_dir)
294
+
295
+ self.write_output(output_path=output_path, data=df.to_dict(orient="records"))
299
296
  return output_path
300
297
 
301
298
 
@@ -361,8 +358,15 @@ class SQLUploader(Uploader):
361
358
  for column in missing_columns:
362
359
  df[column] = pd.Series()
363
360
 
364
- def upload_contents(self, path: Path) -> None:
365
- df = pd.read_json(path, orient="records", lines=True)
361
+ def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
362
+ if self.can_delete():
363
+ self.delete_by_record_id(file_data=file_data)
364
+ else:
365
+ logger.warning(
366
+ f"table doesn't contain expected "
367
+ f"record id column "
368
+ f"{self.upload_config.record_id_key}, skipping delete"
369
+ )
366
370
  df.replace({np.nan: None}, inplace=True)
367
371
  self._fit_to_schema(df=df, columns=self.get_table_columns())
368
372
 
@@ -411,13 +415,10 @@ class SQLUploader(Uploader):
411
415
  rowcount = cursor.rowcount
412
416
  logger.info(f"deleted {rowcount} rows from table {self.upload_config.table_name}")
413
417
 
418
+ def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
419
+ df = pd.DataFrame(data)
420
+ self.upload_dataframe(df=df, file_data=file_data)
421
+
414
422
  def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
415
- if self.can_delete():
416
- self.delete_by_record_id(file_data=file_data)
417
- else:
418
- logger.warning(
419
- f"table doesn't contain expected "
420
- f"record id column "
421
- f"{self.upload_config.record_id_key}, skipping delete"
422
- )
423
- self.upload_contents(path=path)
423
+ df = get_data_df(path=path)
424
+ self.upload_dataframe(df=df, file_data=file_data)
@@ -3,7 +3,6 @@ from abc import ABC, abstractmethod
3
3
  from contextlib import contextmanager
4
4
  from dataclasses import dataclass, field
5
5
  from datetime import date, datetime
6
- from pathlib import Path
7
6
  from typing import TYPE_CHECKING, Any, Generator, Optional
8
7
 
9
8
  from dateutil import parser
@@ -74,11 +73,11 @@ class WeaviateUploadStager(UploadStager):
74
73
  logger.debug(f"date {date_string} string not a timestamp: {e}")
75
74
  return parser.parse(date_string)
76
75
 
77
- @classmethod
78
- def conform_dict(cls, data: dict, file_data: FileData) -> dict:
76
+ def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
79
77
  """
80
78
  Updates the element dictionary to conform to the Weaviate schema
81
79
  """
80
+ data = element_dict.copy()
82
81
  working_data = data.copy()
83
82
  # Dict as string formatting
84
83
  if (
@@ -111,7 +110,7 @@ class WeaviateUploadStager(UploadStager):
111
110
  .get("data_source", {})
112
111
  .get("date_created")
113
112
  ):
114
- working_data["metadata"]["data_source"]["date_created"] = cls.parse_date_string(
113
+ working_data["metadata"]["data_source"]["date_created"] = self.parse_date_string(
115
114
  date_created
116
115
  ).strftime(
117
116
  "%Y-%m-%dT%H:%M:%S.%fZ",
@@ -122,7 +121,7 @@ class WeaviateUploadStager(UploadStager):
122
121
  .get("data_source", {})
123
122
  .get("date_modified")
124
123
  ):
125
- working_data["metadata"]["data_source"]["date_modified"] = cls.parse_date_string(
124
+ working_data["metadata"]["data_source"]["date_modified"] = self.parse_date_string(
126
125
  date_modified
127
126
  ).strftime(
128
127
  "%Y-%m-%dT%H:%M:%S.%fZ",
@@ -133,14 +132,14 @@ class WeaviateUploadStager(UploadStager):
133
132
  .get("data_source", {})
134
133
  .get("date_processed")
135
134
  ):
136
- working_data["metadata"]["data_source"]["date_processed"] = cls.parse_date_string(
135
+ working_data["metadata"]["data_source"]["date_processed"] = self.parse_date_string(
137
136
  date_processed
138
137
  ).strftime(
139
138
  "%Y-%m-%dT%H:%M:%S.%fZ",
140
139
  )
141
140
 
142
141
  if last_modified := working_data.get("metadata", {}).get("last_modified"):
143
- working_data["metadata"]["last_modified"] = cls.parse_date_string(
142
+ working_data["metadata"]["last_modified"] = self.parse_date_string(
144
143
  last_modified
145
144
  ).strftime(
146
145
  "%Y-%m-%dT%H:%M:%S.%fZ",
@@ -159,25 +158,6 @@ class WeaviateUploadStager(UploadStager):
159
158
  working_data[RECORD_ID_LABEL] = file_data.identifier
160
159
  return working_data
161
160
 
162
- def run(
163
- self,
164
- elements_filepath: Path,
165
- file_data: FileData,
166
- output_dir: Path,
167
- output_filename: str,
168
- **kwargs: Any,
169
- ) -> Path:
170
- with open(elements_filepath) as elements_file:
171
- elements_contents = json.load(elements_file)
172
- updated_elements = [
173
- self.conform_dict(data=element, file_data=file_data) for element in elements_contents
174
- ]
175
- output_path = Path(output_dir) / Path(f"{output_filename}.json")
176
- output_path.parent.mkdir(parents=True, exist_ok=True)
177
- with open(output_path, "w") as output_file:
178
- json.dump(updated_elements, output_file, indent=2)
179
- return output_path
180
-
181
161
 
182
162
  class WeaviateUploaderConfig(UploaderConfig):
183
163
  collection: str = Field(description="The name of the collection this object belongs to")
@@ -268,18 +248,16 @@ class WeaviateUploader(Uploader, ABC):
268
248
  if not resp.failed and not resp.successful:
269
249
  break
270
250
 
271
- def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
272
- with path.open("r") as file:
273
- elements_dict = json.load(file)
251
+ def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
274
252
  logger.info(
275
- f"writing {len(elements_dict)} objects to destination "
253
+ f"writing {len(data)} objects to destination "
276
254
  f"class {self.connection_config.access_config} "
277
255
  )
278
256
 
279
257
  with self.connection_config.get_client() as weaviate_client:
280
258
  self.delete_by_record_id(client=weaviate_client, file_data=file_data)
281
259
  with self.upload_config.get_batch_client(client=weaviate_client) as batch_client:
282
- for e in elements_dict:
260
+ for e in data:
283
261
  vector = e.pop("embeddings", None)
284
262
  batch_client.add_object(
285
263
  collection=self.upload_config.collection,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.3.8
3
+ Version: 0.3.9
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -23,12 +23,13 @@ Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
25
  Requires-Dist: click
26
- Requires-Dist: pandas
27
26
  Requires-Dist: pydantic>=2.7
28
- Requires-Dist: opentelemetry-sdk
29
- Requires-Dist: tqdm
27
+ Requires-Dist: pandas
30
28
  Requires-Dist: dataclasses-json
31
29
  Requires-Dist: python-dateutil
30
+ Requires-Dist: tqdm
31
+ Requires-Dist: ndjson
32
+ Requires-Dist: opentelemetry-sdk
32
33
  Provides-Extra: airtable
33
34
  Requires-Dist: pyairtable; extra == "airtable"
34
35
  Provides-Extra: astradb
@@ -41,8 +42,8 @@ Requires-Dist: azure-search-documents; extra == "azure-ai-search"
41
42
  Provides-Extra: bedrock
42
43
  Requires-Dist: boto3; extra == "bedrock"
43
44
  Provides-Extra: biomed
44
- Requires-Dist: requests; extra == "biomed"
45
45
  Requires-Dist: bs4; extra == "biomed"
46
+ Requires-Dist: requests; extra == "biomed"
46
47
  Provides-Extra: box
47
48
  Requires-Dist: fsspec; extra == "box"
48
49
  Requires-Dist: boxfs; extra == "box"
@@ -51,8 +52,8 @@ Requires-Dist: chromadb; extra == "chroma"
51
52
  Provides-Extra: clarifai
52
53
  Requires-Dist: clarifai; extra == "clarifai"
53
54
  Provides-Extra: confluence
54
- Requires-Dist: requests; extra == "confluence"
55
55
  Requires-Dist: atlassian-python-api; extra == "confluence"
56
+ Requires-Dist: requests; extra == "confluence"
56
57
  Provides-Extra: couchbase
57
58
  Requires-Dist: couchbase; extra == "couchbase"
58
59
  Provides-Extra: csv
@@ -60,8 +61,8 @@ Requires-Dist: unstructured[tsv]; extra == "csv"
60
61
  Provides-Extra: databricks-volumes
61
62
  Requires-Dist: databricks-sdk; extra == "databricks-volumes"
62
63
  Provides-Extra: delta-table
63
- Requires-Dist: deltalake; extra == "delta-table"
64
64
  Requires-Dist: boto3; extra == "delta-table"
65
+ Requires-Dist: deltalake; extra == "delta-table"
65
66
  Provides-Extra: discord
66
67
  Requires-Dist: discord-py; extra == "discord"
67
68
  Provides-Extra: doc
@@ -80,8 +81,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
80
81
  Provides-Extra: embed-mixedbreadai
81
82
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
82
83
  Provides-Extra: embed-octoai
83
- Requires-Dist: tiktoken; extra == "embed-octoai"
84
84
  Requires-Dist: openai; extra == "embed-octoai"
85
+ Requires-Dist: tiktoken; extra == "embed-octoai"
85
86
  Provides-Extra: embed-vertexai
86
87
  Requires-Dist: vertexai; extra == "embed-vertexai"
87
88
  Provides-Extra: embed-voyageai
@@ -93,15 +94,15 @@ Requires-Dist: fsspec; extra == "gcs"
93
94
  Requires-Dist: bs4; extra == "gcs"
94
95
  Requires-Dist: gcsfs; extra == "gcs"
95
96
  Provides-Extra: github
96
- Requires-Dist: requests; extra == "github"
97
97
  Requires-Dist: pygithub>1.58.0; extra == "github"
98
+ Requires-Dist: requests; extra == "github"
98
99
  Provides-Extra: gitlab
99
100
  Requires-Dist: python-gitlab; extra == "gitlab"
100
101
  Provides-Extra: google-drive
101
102
  Requires-Dist: google-api-python-client; extra == "google-drive"
102
103
  Provides-Extra: hubspot
103
- Requires-Dist: hubspot-api-client; extra == "hubspot"
104
104
  Requires-Dist: urllib3; extra == "hubspot"
105
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
105
106
  Provides-Extra: jira
106
107
  Requires-Dist: atlassian-python-api; extra == "jira"
107
108
  Provides-Extra: kafka
@@ -118,20 +119,23 @@ Provides-Extra: mongodb
118
119
  Requires-Dist: pymongo; extra == "mongodb"
119
120
  Provides-Extra: msg
120
121
  Requires-Dist: unstructured[msg]; extra == "msg"
122
+ Provides-Extra: neo4j
123
+ Requires-Dist: neo4j; extra == "neo4j"
124
+ Requires-Dist: cymple; extra == "neo4j"
121
125
  Provides-Extra: notion
126
+ Requires-Dist: htmlBuilder; extra == "notion"
122
127
  Requires-Dist: httpx; extra == "notion"
123
128
  Requires-Dist: backoff; extra == "notion"
124
- Requires-Dist: htmlBuilder; extra == "notion"
125
129
  Requires-Dist: notion-client; extra == "notion"
126
130
  Provides-Extra: odt
127
131
  Requires-Dist: unstructured[odt]; extra == "odt"
128
132
  Provides-Extra: onedrive
129
133
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
130
- Requires-Dist: bs4; extra == "onedrive"
131
134
  Requires-Dist: msal; extra == "onedrive"
135
+ Requires-Dist: bs4; extra == "onedrive"
132
136
  Provides-Extra: openai
133
- Requires-Dist: tiktoken; extra == "openai"
134
137
  Requires-Dist: openai; extra == "openai"
138
+ Requires-Dist: tiktoken; extra == "openai"
135
139
  Provides-Extra: opensearch
136
140
  Requires-Dist: opensearch-py; extra == "opensearch"
137
141
  Provides-Extra: org
@@ -165,8 +169,8 @@ Requires-Dist: s3fs; extra == "s3"
165
169
  Provides-Extra: salesforce
166
170
  Requires-Dist: simple-salesforce; extra == "salesforce"
167
171
  Provides-Extra: sftp
168
- Requires-Dist: paramiko; extra == "sftp"
169
172
  Requires-Dist: fsspec; extra == "sftp"
173
+ Requires-Dist: paramiko; extra == "sftp"
170
174
  Provides-Extra: sharepoint
171
175
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
172
176
  Requires-Dist: msal; extra == "sharepoint"
@@ -2,40 +2,47 @@ test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  test/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  test/integration/utils.py,sha256=CWqzEGw6TA_ZoP9hRUkW64TWYssooBbufcTRmbJvod8,401
4
4
  test/integration/chunkers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- test/integration/chunkers/test_chunkers.py,sha256=pqn1Rqh36jZTJL4qpU0iuOMFAEQ-LrKAPOgWtQMAt_I,1482
5
+ test/integration/chunkers/test_chunkers.py,sha256=USkltQN_mVVCxI0FkJsrS1gnLXlVr-fvsc0tPaK2sWI,1062
6
6
  test/integration/connectors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- test/integration/connectors/conftest.py,sha256=6dVNMBrL6WIO4KXA-0nf2tNrPYk_tsor8uomi6fbi3Q,727
8
- test/integration/connectors/test_astradb.py,sha256=QPFrODXmOHagpuKaiooxXb3OEW93w2g4fmq8BkaBCnY,5303
9
- test/integration/connectors/test_azure_ai_search.py,sha256=dae4GifRiKue5YpsxworDiaMQoMsxcPDBithb6OFkx4,8876
10
- test/integration/connectors/test_confluence.py,sha256=xcPmZ_vi_pkCt-tUPn10P49FH9i_9YUbrAPO6fYk5rU,3521
11
- test/integration/connectors/test_delta_table.py,sha256=GSzWIkbEUzOrRPt2F1uO0dabcp7kTFDj75BhhI2y-WU,6856
12
- test/integration/connectors/test_kafka.py,sha256=P8XN8cGUqqNuymXVTgtv15xxtRvLPAu6hOOZMvwoaJk,10721
7
+ test/integration/connectors/conftest.py,sha256=vYs4WDlCuieAwwErkJxCk4a1lGvr3qpeiAm-YaDznSo,1018
8
+ test/integration/connectors/test_astradb.py,sha256=yNRbIfSOz7izIljv46-0yFiafQssVRdYGrAdTl1w45c,5941
9
+ test/integration/connectors/test_azure_ai_search.py,sha256=EGV-G_Lq3h6pHhhmmQGWjIU1Mgsqg3ICZSgF0z3lZZs,9608
10
+ test/integration/connectors/test_chroma.py,sha256=KQCzBJsOHAOtg0Ehp0tNtuYchFtiSmhHDKyOju33kJg,3686
11
+ test/integration/connectors/test_confluence.py,sha256=adJxIggjuO-jgMimBZdv_AqWeBFlQoodELucIYwWC98,3546
12
+ test/integration/connectors/test_delta_table.py,sha256=xsnJmwlWVQrccYeAtpt2lm0DYm2jGxiKXeERQXqCDCM,6884
13
+ test/integration/connectors/test_kafka.py,sha256=0Tur_iUupxG0UffpLzZ7LOjcNpRkt0-0v9ZB-pWjtJA,10746
13
14
  test/integration/connectors/test_lancedb.py,sha256=U2HfIrf6iJ7lYMn-vz0j-LesVyDY-jc9QrQhlJVhG9Q,9183
14
- test/integration/connectors/test_milvus.py,sha256=p4UujDr_tsRaQDmhDmDZp38t8oSFm7hrTqiq6NNuhGo,5933
15
- test/integration/connectors/test_mongodb.py,sha256=YeS_DUnVYN02F76j87W8RhXGHnJMzQYb3n-L1-oWGXI,12254
15
+ test/integration/connectors/test_milvus.py,sha256=abYQOjF8grEFj3FB1_wQgFSbWPFWfZ2pEsgKarfKJE4,6574
16
+ test/integration/connectors/test_mongodb.py,sha256=rZeIY4-JGD8aVHNHMubo3J-xD1NDoqGsGK0U0tYly5k,12273
17
+ test/integration/connectors/test_neo4j.py,sha256=Esiq_Z9k1JLrWNXPmLBsX3LLwyEozwKoxX7iwMEJjRM,8252
16
18
  test/integration/connectors/test_onedrive.py,sha256=KIkBwKh1hnv203VCL2UABnDkS_bP4NxOFm1AL8EPGLA,3554
17
- test/integration/connectors/test_pinecone.py,sha256=i-v5WkAI9M6SUZI7ch9qdILlRHopAdptpkSY12-BaTk,9483
18
- test/integration/connectors/test_qdrant.py,sha256=eTsZPVWEZd6-1Hnipkr4DK_bdTFhVfMmgRrUUyC2HZM,7141
19
- test/integration/connectors/test_s3.py,sha256=YHEYMqWTKTfR7wlL4VoxtgMs1YiYKyhLIBdG-anaQGo,6896
19
+ test/integration/connectors/test_pinecone.py,sha256=suPFi40d6rHXurQQLIpCzW5XRTdgzlP-f-KLPhGCUHo,10208
20
+ test/integration/connectors/test_qdrant.py,sha256=hyuqSJDaylkQVxWh7byD8jo8bwPuBxSa8MWRD3sBu-Y,7906
21
+ test/integration/connectors/test_s3.py,sha256=PJaAwFRF2lXMQlkbv9JHpngPc6706ML7zowOlXT3TcY,7033
20
22
  test/integration/connectors/databricks_tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- test/integration/connectors/databricks_tests/test_volumes_native.py,sha256=k4lALbwNtlyuI3wd3OHoBULI21E3Ck2Fo8EJXaVfwgw,5812
23
+ test/integration/connectors/databricks_tests/test_volumes_native.py,sha256=9Ndo0Q8uzBGGOYYjmTV6EdZtaTRy97lHAXvJczTOJe4,5859
22
24
  test/integration/connectors/duckdb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- test/integration/connectors/duckdb/test_duckdb.py,sha256=SSxg6jRJqo1a7bGFKDYxuO4c5yq5Zv_kcigSYBn_Jtc,2910
24
- test/integration/connectors/duckdb/test_motherduck.py,sha256=Nn8JaqnjWGNweYdarwt4XGiYWiz6upbnJMEhp3FF2SU,3833
25
+ test/integration/connectors/duckdb/conftest.py,sha256=rlBHMJTiJ2a5xbvIxTOyhhcuTBc9DO-yTzD6Kf8X3hY,301
26
+ test/integration/connectors/duckdb/test_duckdb.py,sha256=tZfHJYNILVqwT20XD-aJUFZ67TnJvHLpfAxNvNiE51o,2891
27
+ test/integration/connectors/duckdb/test_motherduck.py,sha256=45anpo0bhbHJh84jOIoKULKrneCMSoq7GuFaBy-HS7g,3206
25
28
  test/integration/connectors/elasticsearch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
29
  test/integration/connectors/elasticsearch/conftest.py,sha256=-i4_7MkIxSQENz7nuD2uHuhGU9mZ33vpeTPhHtRpQfs,989
27
- test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=nqdHwBpvgk_74orzDaQIKALK5cb0YloxSdt7QDJX0r0,11169
28
- test/integration/connectors/elasticsearch/test_opensearch.py,sha256=Rk4tQ_Qv5icycDWMUpnzTbg-QzwGyb6nKqB0gDef9D0,10555
30
+ test/integration/connectors/elasticsearch/test_elasticsearch.py,sha256=Lm8topVssTsqcI8H2Tzohuxb9j-CFHv9orM6WfAqCZw,11933
31
+ test/integration/connectors/elasticsearch/test_opensearch.py,sha256=fWpZrhzRiVpm9AOlZvgZRCjyXSYvWG7-8j06x-HR3PY,11311
29
32
  test/integration/connectors/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- test/integration/connectors/sql/test_postgres.py,sha256=lrymDI7bVX_4qij5gsUc_bTvHPeelu6hpJemQ6WWmlY,6783
31
- test/integration/connectors/sql/test_singlestore.py,sha256=iCp9q6tzhNIUCUubCPiRKj6VmJnwot4JGo9fkkTHg_U,5960
32
- test/integration/connectors/sql/test_snowflake.py,sha256=DqQIV9H5Uv7HaHtDyrAPdqefd316oVt5lKtdJ2Zdk6Q,7082
33
- test/integration/connectors/sql/test_sqlite.py,sha256=gSfp2hXAb5BGknzZXVa7K5bBwEb5Li4k5493mQCFjBQ,5719
33
+ test/integration/connectors/sql/test_postgres.py,sha256=DYrWosG4JC4IgMkG1fVqFc7D2m0wDS5NbAm87bvuHws,6734
34
+ test/integration/connectors/sql/test_singlestore.py,sha256=-B4pn16jKeZo5xTyn_DHvMzSCvxQHWDD-4pv7mFfQ-Q,5917
35
+ test/integration/connectors/sql/test_snowflake.py,sha256=-fzpaB1LlFOW2bZmJHQMbH6T0zU30dMypAHLRAbT4d0,7258
36
+ test/integration/connectors/sql/test_sqlite.py,sha256=bs6-yPQOSeIqauIvOQj-zK7RWxK0bBIh9RYB9MPHRiw,5720
34
37
  test/integration/connectors/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
38
  test/integration/connectors/utils/constants.py,sha256=0zSPnsZVqJuNhXduXvdXFQLZTRIQa5Fo_1qjBYVCfb8,209
36
- test/integration/connectors/utils/docker.py,sha256=lnSjRgYoQa5c5nBdg2eLkB8KJVOjk4eyqq_C6PtTkME,4806
39
+ test/integration/connectors/utils/docker.py,sha256=8uOTJ3AVG1dxK4OiLvOLfRxL_TsYQX2KKCID9TZ7-Ac,4995
37
40
  test/integration/connectors/utils/docker_compose.py,sha256=GVTB6Cel05c0VQ2n4AwkQQx_cBfz13ZTs1HpbaYipNU,2223
38
- test/integration/connectors/utils/validation.py,sha256=SwvPVuHjJxTo8xEUwnuL9FZNpu3sZZ8iouOz5xh_kB8,14272
41
+ test/integration/connectors/utils/validation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ test/integration/connectors/utils/validation/destination.py,sha256=BG3161_T8aUMX2mYaDlcTneJPEWaqOM1bG3hCLiLiR4,3008
43
+ test/integration/connectors/utils/validation/equality.py,sha256=8riUASLHccqnRZLoEv4rz5_ba54PFEHcxfxuJaJy6X0,2570
44
+ test/integration/connectors/utils/validation/source.py,sha256=fGp8smpu5GiK4nyArHi55vAZSkbqziduKT9w9nrZJUk,11482
45
+ test/integration/connectors/utils/validation/utils.py,sha256=xYYvAbqP6_lZyH09_JjB4w2Sf8aQPvDVT5vZTs05ILs,1428
39
46
  test/integration/connectors/weaviate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
47
  test/integration/connectors/weaviate/conftest.py,sha256=6Q6QdrLJmGHowRFSmoVSzup2EX6qASfS2Z5tqlpTm9M,387
41
48
  test/integration/connectors/weaviate/test_cloud.py,sha256=07VxNRxWWcgTstFfpoZ1FlVnEhcBnQlo5nosWKjKz_4,979
@@ -87,7 +94,7 @@ test/unit/v2/partitioners/test_partitioner.py,sha256=iIYg7IpftV3LusoO4H8tr1IHY1U
87
94
  test/unit/v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
88
95
  test/unit/v2/utils/data_generator.py,sha256=UoYVNjG4S4wlaA9gceQ82HIpF9_6I1UTHD1_GrQBHp0,973
89
96
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
90
- unstructured_ingest/__version__.py,sha256=91iYzXMGhyMPaHI-Y8piVjcrk2AXRekqcknDzOkyQzk,42
97
+ unstructured_ingest/__version__.py,sha256=W33_nIlFxbGYaw_yy4UyMO0bL5bOhODY9fh1eWzuoo8,42
91
98
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
92
99
  unstructured_ingest/interfaces.py,sha256=OYVUP0bzBJpT-Lz92BDyz_hLBvyfxkuSwWHhUdnUayA,31493
93
100
  unstructured_ingest/logger.py,sha256=S5nSqGcABoQyeicgRnBQFjDScCaTvFVivOCvbo-laL0,4479
@@ -344,9 +351,9 @@ unstructured_ingest/runner/writers/fsspec/dropbox.py,sha256=y0kmx5Xjc9Ypfg6t6N_x
344
351
  unstructured_ingest/runner/writers/fsspec/gcs.py,sha256=ia-gconOz1kWI1jmYeB9NY6cwjWfofoZAydKfZsaFs0,606
345
352
  unstructured_ingest/runner/writers/fsspec/s3.py,sha256=kHJq2O3864QBd_tL2SKb0mdywczOCr2VI5e_bVms-Vw,622
346
353
  unstructured_ingest/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
347
- unstructured_ingest/utils/chunking.py,sha256=efWEfMcCukG5zASZrXhkNgAX8AzHa6t3rClMzm2TwFE,1521
354
+ unstructured_ingest/utils/chunking.py,sha256=9b3sXMA6L8RW5xAkKQbwdtVudGLAcj_sgT6Grh5tyYM,1870
348
355
  unstructured_ingest/utils/compression.py,sha256=NNiY-2S2Gf3at7zC1PYxMijaEza9vVSzRn5mdFf6mHo,4434
349
- unstructured_ingest/utils/data_prep.py,sha256=IDAedOSBdgZpD9IY4tLJT-rmKGV7GHtU6KRj6VM-_tE,4666
356
+ unstructured_ingest/utils/data_prep.py,sha256=K4Rfx2V6EiDVFHDAkt0bvcjDqc_rn6YFM5-O50YSx-U,5808
350
357
  unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
351
358
  unstructured_ingest/utils/google_filetype.py,sha256=YVspEkiiBrRUSGVeVbsavvLvTmizdy2e6TsjigXTSRU,468
352
359
  unstructured_ingest/utils/string_and_date_utils.py,sha256=kijtPlGAbH376vVjFSo5H_ZhW-FEcMC2sCNsSNwDOjo,1729
@@ -376,8 +383,8 @@ unstructured_ingest/v2/interfaces/file_data.py,sha256=D71bXImJ7Pyjtl3I3pa2O2B2iB
376
383
  unstructured_ingest/v2/interfaces/indexer.py,sha256=gsa1MLhFa82BzD2h4Yb7ons0VxRwKINZOrzvHAahwVU,846
377
384
  unstructured_ingest/v2/interfaces/process.py,sha256=BgglTu5K93FnDDopZKKr_rkK2LTZOguR6kcQjKHjF40,392
378
385
  unstructured_ingest/v2/interfaces/processor.py,sha256=VX7JqXlbG1plxMK8THWhWINPbTICaaUEk4XUXhnOixY,3303
379
- unstructured_ingest/v2/interfaces/upload_stager.py,sha256=ZFkDxcwKn-6EPrTbdBEgOkz1kGAq4gUtze98KP48KG4,1146
380
- unstructured_ingest/v2/interfaces/uploader.py,sha256=JmZDl1blJa5rS61YHCae3Hfet84ixSSJ_NYRjflYsbY,1168
386
+ unstructured_ingest/v2/interfaces/upload_stager.py,sha256=dYZWDABbQ26_8X_e0QsAIgsXcXWPNAXh_hyNAEND1kI,3993
387
+ unstructured_ingest/v2/interfaces/uploader.py,sha256=T2oHbN-d4Px1w1oATKKYZA10aUssqytEpiaqBM92r0Q,1600
381
388
  unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
382
389
  unstructured_ingest/v2/pipeline/interfaces.py,sha256=-Y6gPnl-SbNxIx5-dQCmiYSPKUMjivrRlBLIKIUWVeM,8658
383
390
  unstructured_ingest/v2/pipeline/otel.py,sha256=K3pQvWVgWzyOWMKCBUofsH7wTZPJ0Ysw5sLjMBLW41I,1088
@@ -389,7 +396,7 @@ unstructured_ingest/v2/pipeline/steps/embed.py,sha256=-YFvmchdsonWiSXxaD7PJfuUUt
389
396
  unstructured_ingest/v2/pipeline/steps/filter.py,sha256=q7bNieaFMprqoF8Mx7w-ZN6jyA5peiGeTGyPtvcV-uw,1199
390
397
  unstructured_ingest/v2/pipeline/steps/index.py,sha256=YUUf1sYZRZSrRgapca3Sfzk1sNPJ05yyTQ5wKlyDjEo,3543
391
398
  unstructured_ingest/v2/pipeline/steps/partition.py,sha256=9MQViptxK3ALKco8uE4gK9PpEoGq5JjzyU14C_18blU,3193
392
- unstructured_ingest/v2/pipeline/steps/stage.py,sha256=cphKgHScLz2rNLZRI5Olsb6dAH-MKGu3p6MYS1BEzkA,2246
399
+ unstructured_ingest/v2/pipeline/steps/stage.py,sha256=SabIBBYE0dXeZKjJwoIdPu8UEN_0MV0tLjEhdPEOxBE,2335
393
400
  unstructured_ingest/v2/pipeline/steps/uncompress.py,sha256=CFSy4tGp6BAvF0oIwWFN8v4zFzh5pRDeESjEn5iP9hE,1756
394
401
  unstructured_ingest/v2/pipeline/steps/upload.py,sha256=zlgXgwReX9TBOdfTpS9hETah4SeSmzPB2g8dAGfLIvM,1987
395
402
  unstructured_ingest/v2/processes/__init__.py,sha256=FaHWSCGyc7GWVnAsNEUUj7L8hT8gCVY3_hUE2VzWtUg,462
@@ -401,21 +408,22 @@ unstructured_ingest/v2/processes/partitioner.py,sha256=agpHwB9FR8OZVQqE7zFEb0IcD
401
408
  unstructured_ingest/v2/processes/uncompress.py,sha256=Z_XfsITGdyaRwhtNUc7bMj5Y2jLuBge8KoK4nxhqKag,2425
402
409
  unstructured_ingest/v2/processes/connectors/__init__.py,sha256=yNIV9Kf_A_uEb-9LM_vW4w5gNmQ9LsTyY_3WvtERCBQ,5239
403
410
  unstructured_ingest/v2/processes/connectors/airtable.py,sha256=eeZJe-bBNxt5Sa-XEFCdcGeJCguJU5WN2Mv9kLp5dVQ,8917
404
- unstructured_ingest/v2/processes/connectors/astradb.py,sha256=QTUQ-cv_iZi9eaXRRHQNKhtgFn-Pi20AXdSVaDFg9DM,15498
405
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=G4zk8L_ckVYshQ3_d_uKdwRO-Oq38OptaHv2K17pQZg,12279
406
- unstructured_ingest/v2/processes/connectors/chroma.py,sha256=oB4628T3tJgIF8mAhMhl6K0DdS59P__RCaxZc2iGetM,8136
411
+ unstructured_ingest/v2/processes/connectors/astradb.py,sha256=gUybGpA89Td3b8DQ-yE7LTN6z3g5vxfew6kr0HlI1M4,14636
412
+ unstructured_ingest/v2/processes/connectors/azure_ai_search.py,sha256=ngPDpU0oZ6m5sxIlB6u5ebQpqCS_SJ-_amCC1KQ03EQ,11529
413
+ unstructured_ingest/v2/processes/connectors/chroma.py,sha256=G1DQHhhFQCS2RLF0cVvoUH9QO8KkVjIyNZ9nKh__aHw,7220
407
414
  unstructured_ingest/v2/processes/connectors/confluence.py,sha256=-Y1OU_ZXhZQNj5NH3EN01CP8QKKZJaJ9xkXoAlSgnIk,7604
408
- unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=LbUJLt6fqaNYSmy9vUiovG-UOALMcvh8OD-gZAaf-f4,12333
409
- unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=1yS7ivEyiucwd_kv6LL5HQdGabT43yeG6XCdwiz89hc,8019
410
- unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=yBgCeLy9iCVI8bBDcHHuHB0H3BO05e9E1OccbHwvKAo,9724
411
- unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=EEwXK1Anlu-eXl5qxmdDIqPYW7eMSez6WGlTPG2vSn8,13121
412
- unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=s_ijx4gRT8Brr3ULIuRgVV7U9M--MkBxYX7b_Auidj4,6009
413
- unstructured_ingest/v2/processes/connectors/local.py,sha256=a3stgnIkhBbXPIQD0O-RaRM-Eb-szHj9Yy4Fz881-9c,6723
414
- unstructured_ingest/v2/processes/connectors/milvus.py,sha256=3sV0Yv2vYMLyxszKCqAqlMcHHJSBR-GGbaZf1nvobLE,10089
415
- unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=XLuprTCY0D9tAh_qn81MjJrDN9YaNqMlKe7BJl3eTZc,14998
415
+ unstructured_ingest/v2/processes/connectors/couchbase.py,sha256=6iVmT3Qo0dCOlSHUf-7cezYARqsqadAyu3fg78x_h50,11954
416
+ unstructured_ingest/v2/processes/connectors/delta_table.py,sha256=SotSXZQ85_6TO906YvFi3yTml8jE9A_zV6nBJ4oTx8A,7075
417
+ unstructured_ingest/v2/processes/connectors/gitlab.py,sha256=ufE65Z8q_tC4oppGg5BsGXwSaL7RbEXcaagJQYsylNo,9984
418
+ unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=5k7pdAzJGXSdyPCzW9vu2OaAjGVTo2JevDyGaXM1Hvk,13370
419
+ unstructured_ingest/v2/processes/connectors/kdbai.py,sha256=VRDAiou_7oWOIAgQTdOGQWxudzQEDopXM8XkfkQ2j6g,5004
420
+ unstructured_ingest/v2/processes/connectors/local.py,sha256=ZvWTj6ZYkwnvQMNFsZWoaQyp9zp0WVqAywMaHJ2kcAc,7153
421
+ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=I57hyH5nz_p7utmUOkvt_6vCPxNIVQMoukplUgIyYi8,8503
422
+ unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=M7qeuOp_jJjQldtFhYSvn9xXle7Y6ZD3lZuAR8BDaPs,14864
423
+ unstructured_ingest/v2/processes/connectors/neo4j.py,sha256=LKHgeYOjcSxMzjzpU1OB-e1XLxwTAgCHJpW81M2OPZM,14111
416
424
  unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=heZMtOIrCySi552ldIk8iH0pSRXZ0W2LeD-CcNOwCFQ,15979
417
425
  unstructured_ingest/v2/processes/connectors/outlook.py,sha256=KgNGM8hImRhy6_SpswRP2VwRD4VOrqqJoySgxf2oduI,9290
418
- unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=BLi9wQzoAnt61m2vOa0xGvmR04kBH_tw9EV9xIw2O_Y,11629
426
+ unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=cohF7gBj0opSGKXlENSdGfTtyIKMHd1pwu4ydeb7JAY,10605
419
427
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=2CiO2ZZiZ1Y1-nB7wcDlDVcpW2B7ut9wCj66rkkqho0,11616
420
428
  unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=Ndn2Wm7RupfjAtlLxxQwJueeE0V8aGMbNVPuFq9nqdQ,19730
421
429
  unstructured_ingest/v2/processes/connectors/slack.py,sha256=Z73VmQ3oUY09KoLEi5OBdQeDt4ONEY_02SglWQc6HXE,9252
@@ -427,11 +435,11 @@ unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py,sha256=P
427
435
  unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py,sha256=UUotY_-HpgSEJkvdQfZTlbxY7CRLZ4ctL8TlryeFvxk,2790
428
436
  unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py,sha256=Wk7s2_u5G0BOV5slvGc8IlUf7ivznY9PrgPqe6nlJKM,2897
429
437
  unstructured_ingest/v2/processes/connectors/duckdb/__init__.py,sha256=5sVvJCWhU-YkjHIwk4W6BZCanFYK5W4xTpWtQ8xzeB4,561
430
- unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=lfkakl9bQKC6i08WgaZ6yEzDODv8PAp8gr5IIFMCASw,2459
431
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=-8JIekK-ErcKrsp6VhPniKjx7v68_x5JFNC3CzN59UQ,3947
432
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=GR4hhzBrpgDyO4dVMERzgMcX8v6zjhezDDv3pmhMHpQ,4736
438
+ unstructured_ingest/v2/processes/connectors/duckdb/base.py,sha256=FVblIddorGCh9D9GZ8zLVUm8n39PJA5JLoJeWd-tSy8,2610
439
+ unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py,sha256=oUHHaLpO2pWW2Lu4Mc-XFjrA0ze97205WQ_xP95ua4M,4296
440
+ unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py,sha256=mU5x6SnbFgRsVicNGh4y4gtR6ek7eQFinI0dQQmzMds,4481
433
441
  unstructured_ingest/v2/processes/connectors/elasticsearch/__init__.py,sha256=Zzc0JNPP-eFqpwWw1Gp-XC8H-s__IgkYKzoagECycZY,829
434
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=skoeUpoIQVQBzHCMLB6smUeDIc5ooZmn5n6XJqw5lMo,19808
442
+ unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py,sha256=OOU6vXx7EdO_OtqvrCAWPz9V6z3UiYIWxPyxie-fDlc,18862
435
443
  unstructured_ingest/v2/processes/connectors/elasticsearch/opensearch.py,sha256=qRz8Fyr2RSZIPZGkhPeme6AZxM0aX-c_xOa1ZtSr2Kg,6781
436
444
  unstructured_ingest/v2/processes/connectors/fsspec/__init__.py,sha256=TtdeImM7Ypl_n6sl7I1JqX6bGSG0t_FqvCqE3Cy24og,1846
437
445
  unstructured_ingest/v2/processes/connectors/fsspec/azure.py,sha256=JLIzoXGVW_3pVs-4kshey0pek6wkpWb3I_HJg8g6qzw,7119
@@ -444,34 +452,34 @@ unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=rlG4uAnOeMFht9
444
452
  unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
445
453
  unstructured_ingest/v2/processes/connectors/kafka/__init__.py,sha256=mQJ9Ex-QCfhz-BB5YWTfbPf7xGLd1i7FpjRr0ukbhNw,754
446
454
  unstructured_ingest/v2/processes/connectors/kafka/cloud.py,sha256=1SqNdY8Q8JwwB57wk9efxKv_BCeUkxZJ2HJ526wuCMw,3294
447
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py,sha256=Ejuc04z3hXl4m8xgxneMFR93ghDwwKEiuprPpNqnIFY,9772
455
+ unstructured_ingest/v2/processes/connectors/kafka/kafka.py,sha256=a-LWqYeJAK-g32UPgvvDt6W7dJp85N66aR_EKSR66RU,9685
448
456
  unstructured_ingest/v2/processes/connectors/kafka/local.py,sha256=lUkmfbTxyQW87CXxbJaijIT6foV09Gi-IG9o08OgiEs,2581
449
457
  unstructured_ingest/v2/processes/connectors/lancedb/__init__.py,sha256=LW37xZrn48JeHluRNulLTreUPdaF-ZU81F7MCUHcCv8,1253
450
458
  unstructured_ingest/v2/processes/connectors/lancedb/aws.py,sha256=eeXWsh8UeVm1Ur53C4MEnpLplfO8U91KYgk--0kk5pE,1413
451
459
  unstructured_ingest/v2/processes/connectors/lancedb/azure.py,sha256=Ms5vQVRIpTF1Q2qBl_bET9wbgaf4diPaH-iR8kJlr4E,1461
452
460
  unstructured_ingest/v2/processes/connectors/lancedb/cloud.py,sha256=BFy0gW2OZ_qaZJM97m-tNsFaJPi9zOKrrd2y4thcNP0,1341
453
461
  unstructured_ingest/v2/processes/connectors/lancedb/gcp.py,sha256=p5BPaFtS3y3Yh8PIr3tUqsAXrUYu4QYYAWQNh5W2ucE,1361
454
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=7FODnesYu8cFx1PeQJZxXij-8Dei4Kk3Bs0oxoUGBtI,5745
462
+ unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py,sha256=oQbRZfocnRWqc9VIHgloYbEsfV0Ei_s1_-TKmRnTdYg,5714
455
463
  unstructured_ingest/v2/processes/connectors/lancedb/local.py,sha256=_7-6iO6B60gAWwJUUrmlsRzYMFIBeZgu_QT3mhw5L0I,1272
456
464
  unstructured_ingest/v2/processes/connectors/qdrant/__init__.py,sha256=xM19uYzAuGizVoZIM_hnVZ5AcBN69aOBGpqZcpWPtuE,760
457
465
  unstructured_ingest/v2/processes/connectors/qdrant/cloud.py,sha256=accJ4sNWBVWV-KiVBDBDBYYx5A9CUoikP5NCErRmfik,1624
458
466
  unstructured_ingest/v2/processes/connectors/qdrant/local.py,sha256=cGEyv3Oy6y4BQ4DU8yhJWMpL82QYwBVdPTxxNuV127U,1588
459
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=QZTa4obcH-1kpaOn8GnguA_9ELr3xQnwjp3DkMmzUHg,5513
467
+ unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py,sha256=ITRYXKYEFhlagSe-AKKGRvC8jzyWmhQLfHbFb0ax8o8,5438
460
468
  unstructured_ingest/v2/processes/connectors/qdrant/server.py,sha256=odvCZWZp8DmRxLXMR7tHhW-c7UQbix1_zpFdfXfCvKI,1613
461
469
  unstructured_ingest/v2/processes/connectors/sql/__init__.py,sha256=E16CXRBw8fZKTuXIECns5wif_I07oncBHskVxHC4p7w,1448
462
470
  unstructured_ingest/v2/processes/connectors/sql/postgres.py,sha256=rHaSb1MtdWMY6eQL2i2cWSL4w0VApFTChzmWtyfvFTI,5140
463
471
  unstructured_ingest/v2/processes/connectors/sql/singlestore.py,sha256=Jk55MwFtVgQSGDuwY71miru9FBdLejnA9t54yU_W_BY,5481
464
472
  unstructured_ingest/v2/processes/connectors/sql/snowflake.py,sha256=hzfCtQNp_uYRseJ8k7hhEsJOdqVlWAU33Wwjj6_66lM,7428
465
- unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=0l_x_6ZntEPhMPlDidJNh9aPK2Wbo-n8_nYlAlnsgDk,15034
473
+ unstructured_ingest/v2/processes/connectors/sql/sql.py,sha256=Iq2D6jhfNx5u14IlIlcWXwQ9SlFQkstgzJhfdXEQi_E,15003
466
474
  unstructured_ingest/v2/processes/connectors/sql/sqlite.py,sha256=ydrdTgyWNjDS1SwLCQiw7az7QQdiijzBTzej-QsL7Bo,5178
467
475
  unstructured_ingest/v2/processes/connectors/weaviate/__init__.py,sha256=NMiwnVWan69KnzVELvaqX34tMhCytIa-C8EDsXVKsEo,856
468
476
  unstructured_ingest/v2/processes/connectors/weaviate/cloud.py,sha256=bXtfEYLquR-BszZ5S_lQ4JbETNs9Vozgpfm8x9egAmE,6251
469
477
  unstructured_ingest/v2/processes/connectors/weaviate/embedded.py,sha256=S8Zg8StuZT-k7tCg1D5YShO1-vJYYk9-M1bE1fIqx64,3014
470
478
  unstructured_ingest/v2/processes/connectors/weaviate/local.py,sha256=LuTBKPseVewsz8VqxRPRLfGEm3BeI9nBZxpy7ZU5tOA,2201
471
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=dBDC_M8GVKupl7i9UMRCZyRIUv6gTkq8bJE_SILydAc,11291
472
- unstructured_ingest-0.3.8.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
473
- unstructured_ingest-0.3.8.dist-info/METADATA,sha256=HZRSyzQt9UJplhIiXEEsDrZGljY77nMG57SWRR0F3v8,7457
474
- unstructured_ingest-0.3.8.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
475
- unstructured_ingest-0.3.8.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
476
- unstructured_ingest-0.3.8.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
477
- unstructured_ingest-0.3.8.dist-info/RECORD,,
479
+ unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py,sha256=X1yv1H_orDQ-J965EMXhR2XaURqe8vovSi9n1fk85B4,10499
480
+ unstructured_ingest-0.3.9.dist-info/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
481
+ unstructured_ingest-0.3.9.dist-info/METADATA,sha256=laaXS-GL2jXN8qS76UB4zG7PxckZtwvpSGAnr2B4fpA,7580
482
+ unstructured_ingest-0.3.9.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
483
+ unstructured_ingest-0.3.9.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
484
+ unstructured_ingest-0.3.9.dist-info/top_level.txt,sha256=DMuDMHZRMdeay8v8Kdi855muIv92F0OkutvBCaBEW6M,25
485
+ unstructured_ingest-0.3.9.dist-info/RECORD,,