unstructured-ingest 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (64) hide show
  1. test/integration/chunkers/test_chunkers.py +0 -11
  2. test/integration/connectors/conftest.py +11 -1
  3. test/integration/connectors/databricks_tests/test_volumes_native.py +4 -3
  4. test/integration/connectors/duckdb/conftest.py +14 -0
  5. test/integration/connectors/duckdb/test_duckdb.py +51 -44
  6. test/integration/connectors/duckdb/test_motherduck.py +37 -48
  7. test/integration/connectors/elasticsearch/test_elasticsearch.py +26 -4
  8. test/integration/connectors/elasticsearch/test_opensearch.py +26 -3
  9. test/integration/connectors/sql/test_postgres.py +102 -91
  10. test/integration/connectors/sql/test_singlestore.py +111 -99
  11. test/integration/connectors/sql/test_snowflake.py +142 -117
  12. test/integration/connectors/sql/test_sqlite.py +86 -75
  13. test/integration/connectors/test_astradb.py +22 -1
  14. test/integration/connectors/test_azure_ai_search.py +25 -3
  15. test/integration/connectors/test_chroma.py +120 -0
  16. test/integration/connectors/test_confluence.py +4 -4
  17. test/integration/connectors/test_delta_table.py +1 -0
  18. test/integration/connectors/test_kafka.py +4 -4
  19. test/integration/connectors/test_milvus.py +21 -0
  20. test/integration/connectors/test_mongodb.py +3 -3
  21. test/integration/connectors/test_neo4j.py +236 -0
  22. test/integration/connectors/test_pinecone.py +25 -1
  23. test/integration/connectors/test_qdrant.py +25 -2
  24. test/integration/connectors/test_s3.py +9 -6
  25. test/integration/connectors/utils/docker.py +6 -0
  26. test/integration/connectors/utils/validation/__init__.py +0 -0
  27. test/integration/connectors/utils/validation/destination.py +88 -0
  28. test/integration/connectors/utils/validation/equality.py +75 -0
  29. test/integration/connectors/utils/{validation.py → validation/source.py} +15 -91
  30. test/integration/connectors/utils/validation/utils.py +36 -0
  31. unstructured_ingest/__version__.py +1 -1
  32. unstructured_ingest/utils/chunking.py +11 -0
  33. unstructured_ingest/utils/data_prep.py +36 -0
  34. unstructured_ingest/v2/interfaces/upload_stager.py +70 -6
  35. unstructured_ingest/v2/interfaces/uploader.py +11 -2
  36. unstructured_ingest/v2/pipeline/steps/stage.py +3 -1
  37. unstructured_ingest/v2/processes/connectors/astradb.py +8 -30
  38. unstructured_ingest/v2/processes/connectors/azure_ai_search.py +16 -40
  39. unstructured_ingest/v2/processes/connectors/chroma.py +36 -59
  40. unstructured_ingest/v2/processes/connectors/couchbase.py +42 -52
  41. unstructured_ingest/v2/processes/connectors/delta_table.py +11 -33
  42. unstructured_ingest/v2/processes/connectors/duckdb/base.py +26 -26
  43. unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +29 -20
  44. unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +37 -44
  45. unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +5 -30
  46. unstructured_ingest/v2/processes/connectors/gitlab.py +32 -31
  47. unstructured_ingest/v2/processes/connectors/google_drive.py +32 -29
  48. unstructured_ingest/v2/processes/connectors/kafka/kafka.py +2 -4
  49. unstructured_ingest/v2/processes/connectors/kdbai.py +44 -70
  50. unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +8 -10
  51. unstructured_ingest/v2/processes/connectors/local.py +13 -2
  52. unstructured_ingest/v2/processes/connectors/milvus.py +16 -57
  53. unstructured_ingest/v2/processes/connectors/mongodb.py +4 -8
  54. unstructured_ingest/v2/processes/connectors/neo4j.py +381 -0
  55. unstructured_ingest/v2/processes/connectors/pinecone.py +23 -65
  56. unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +32 -41
  57. unstructured_ingest/v2/processes/connectors/sql/sql.py +41 -40
  58. unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +9 -31
  59. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/METADATA +21 -17
  60. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/RECORD +64 -56
  61. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/LICENSE.md +0 -0
  62. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/WHEEL +0 -0
  63. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/entry_points.txt +0 -0
  64. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/top_level.txt +0 -0
@@ -1,10 +1,9 @@
1
1
  import asyncio
2
2
  import json
3
3
  from abc import ABC, abstractmethod
4
- from contextlib import asynccontextmanager
4
+ from contextlib import asynccontextmanager, contextmanager
5
5
  from dataclasses import dataclass, field
6
- from pathlib import Path
7
- from typing import TYPE_CHECKING, Any, AsyncGenerator, Optional
6
+ from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Optional
8
7
 
9
8
  from pydantic import Field, Secret
10
9
 
@@ -24,7 +23,7 @@ from unstructured_ingest.v2.logger import logger
24
23
  from unstructured_ingest.v2.utils import get_enhanced_element_id
25
24
 
26
25
  if TYPE_CHECKING:
27
- from qdrant_client import AsyncQdrantClient
26
+ from qdrant_client import AsyncQdrantClient, QdrantClient
28
27
 
29
28
 
30
29
  class QdrantAccessConfig(AccessConfig, ABC):
@@ -42,8 +41,8 @@ class QdrantConnectionConfig(ConnectionConfig, ABC):
42
41
 
43
42
  @requires_dependencies(["qdrant_client"], extras="qdrant")
44
43
  @asynccontextmanager
45
- async def get_client(self) -> AsyncGenerator["AsyncQdrantClient", None]:
46
- from qdrant_client.async_qdrant_client import AsyncQdrantClient
44
+ async def get_async_client(self) -> AsyncGenerator["AsyncQdrantClient", None]:
45
+ from qdrant_client import AsyncQdrantClient
47
46
 
48
47
  client_kwargs = self.get_client_kwargs()
49
48
  client = AsyncQdrantClient(**client_kwargs)
@@ -52,6 +51,18 @@ class QdrantConnectionConfig(ConnectionConfig, ABC):
52
51
  finally:
53
52
  await client.close()
54
53
 
54
+ @requires_dependencies(["qdrant_client"], extras="qdrant")
55
+ @contextmanager
56
+ def get_client(self) -> Generator["QdrantClient", None, None]:
57
+ from qdrant_client import QdrantClient
58
+
59
+ client_kwargs = self.get_client_kwargs()
60
+ client = QdrantClient(**client_kwargs)
61
+ try:
62
+ yield client
63
+ finally:
64
+ client.close()
65
+
55
66
 
56
67
  class QdrantUploadStagerConfig(UploadStagerConfig):
57
68
  pass
@@ -63,9 +74,9 @@ class QdrantUploadStager(UploadStager, ABC):
63
74
  default_factory=lambda: QdrantUploadStagerConfig()
64
75
  )
65
76
 
66
- @staticmethod
67
- def conform_dict(data: dict, file_data: FileData) -> dict:
77
+ def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
68
78
  """Prepares dictionary in the format that Chroma requires"""
79
+ data = element_dict.copy()
69
80
  return {
70
81
  "id": get_enhanced_element_id(element_dict=data, file_data=file_data),
71
82
  "vector": data.pop("embeddings", {}),
@@ -80,26 +91,6 @@ class QdrantUploadStager(UploadStager, ABC):
80
91
  },
81
92
  }
82
93
 
83
- def run(
84
- self,
85
- elements_filepath: Path,
86
- file_data: FileData,
87
- output_dir: Path,
88
- output_filename: str,
89
- **kwargs: Any,
90
- ) -> Path:
91
- with open(elements_filepath) as elements_file:
92
- elements_contents = json.load(elements_file)
93
-
94
- conformed_elements = [
95
- self.conform_dict(data=element, file_data=file_data) for element in elements_contents
96
- ]
97
- output_path = Path(output_dir) / Path(f"{output_filename}.json")
98
-
99
- with open(output_path, "w") as output_file:
100
- json.dump(conformed_elements, output_file)
101
- return output_path
102
-
103
94
 
104
95
  class QdrantUploaderConfig(UploaderConfig):
105
96
  collection_name: str = Field(description="Name of the collection.")
@@ -118,27 +109,27 @@ class QdrantUploader(Uploader, ABC):
118
109
 
119
110
  @DestinationConnectionError.wrap
120
111
  def precheck(self) -> None:
121
- async def check_connection():
122
- async with self.connection_config.get_client() as async_client:
123
- await async_client.get_collections()
124
-
125
- asyncio.run(check_connection())
112
+ with self.connection_config.get_client() as client:
113
+ collections_response = client.get_collections()
114
+ collection_names = [c.name for c in collections_response.collections]
115
+ if self.upload_config.collection_name not in collection_names:
116
+ raise DestinationConnectionError(
117
+ "collection '{}' not found: {}".format(
118
+ self.upload_config.collection_name, ", ".join(collection_names)
119
+ )
120
+ )
126
121
 
127
122
  def is_async(self):
128
123
  return True
129
124
 
130
- async def run_async(
125
+ async def run_data_async(
131
126
  self,
132
- path: Path,
127
+ data: list[dict],
133
128
  file_data: FileData,
134
129
  **kwargs: Any,
135
130
  ) -> None:
136
- with path.open("r") as file:
137
- elements: list[dict] = json.load(file)
138
-
139
- logger.debug("Loaded %i elements from %s", len(elements), path)
140
131
 
141
- batches = list(batch_generator(elements, batch_size=self.upload_config.batch_size))
132
+ batches = list(batch_generator(data, batch_size=self.upload_config.batch_size))
142
133
  logger.debug(
143
134
  "Elements split into %i batches of size %i.",
144
135
  len(batches),
@@ -156,7 +147,7 @@ class QdrantUploader(Uploader, ABC):
156
147
  len(points),
157
148
  self.upload_config.collection_name,
158
149
  )
159
- async with self.connection_config.get_client() as async_client:
150
+ async with self.connection_config.get_async_client() as async_client:
160
151
  await async_client.upsert(
161
152
  self.upload_config.collection_name, points=points, wait=True
162
153
  )
@@ -15,7 +15,7 @@ from dateutil import parser
15
15
  from pydantic import Field, Secret
16
16
 
17
17
  from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
18
- from unstructured_ingest.utils.data_prep import split_dataframe
18
+ from unstructured_ingest.utils.data_prep import get_data_df, split_dataframe
19
19
  from unstructured_ingest.v2.constants import RECORD_ID_LABEL
20
20
  from unstructured_ingest.v2.interfaces import (
21
21
  AccessConfig,
@@ -238,27 +238,24 @@ class SQLUploadStagerConfig(UploadStagerConfig):
238
238
  class SQLUploadStager(UploadStager):
239
239
  upload_stager_config: SQLUploadStagerConfig = field(default_factory=SQLUploadStagerConfig)
240
240
 
241
- @staticmethod
242
- def conform_dict(data: dict, file_data: FileData) -> pd.DataFrame:
243
- working_data = data.copy()
244
- output = []
245
- for element in working_data:
246
- metadata: dict[str, Any] = element.pop("metadata", {})
247
- data_source = metadata.pop("data_source", {})
248
- coordinates = metadata.pop("coordinates", {})
241
+ def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
242
+ data = element_dict.copy()
243
+ metadata: dict[str, Any] = data.pop("metadata", {})
244
+ data_source = metadata.pop("data_source", {})
245
+ coordinates = metadata.pop("coordinates", {})
249
246
 
250
- element.update(metadata)
251
- element.update(data_source)
252
- element.update(coordinates)
247
+ data.update(metadata)
248
+ data.update(data_source)
249
+ data.update(coordinates)
253
250
 
254
- element["id"] = get_enhanced_element_id(element_dict=element, file_data=file_data)
251
+ data["id"] = get_enhanced_element_id(element_dict=data, file_data=file_data)
255
252
 
256
- # remove extraneous, not supported columns
257
- element = {k: v for k, v in element.items() if k in _COLUMNS}
258
- element[RECORD_ID_LABEL] = file_data.identifier
259
- output.append(element)
253
+ # remove extraneous, not supported columns
254
+ element = {k: v for k, v in data.items() if k in _COLUMNS}
255
+ element[RECORD_ID_LABEL] = file_data.identifier
256
+ return element
260
257
 
261
- df = pd.DataFrame.from_dict(output)
258
+ def conform_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
262
259
  for column in filter(lambda x: x in df.columns, _DATE_COLUMNS):
263
260
  df[column] = df[column].apply(parse_date_string)
264
261
  for column in filter(
@@ -283,19 +280,19 @@ class SQLUploadStager(UploadStager):
283
280
  output_filename: str,
284
281
  **kwargs: Any,
285
282
  ) -> Path:
286
- with open(elements_filepath) as elements_file:
287
- elements_contents: list[dict] = json.load(elements_file)
283
+ elements_contents = self.get_data(elements_filepath=elements_filepath)
288
284
 
289
- df = self.conform_dict(data=elements_contents, file_data=file_data)
290
- if Path(output_filename).suffix != ".json":
291
- output_filename = f"{output_filename}.json"
292
- else:
293
- output_filename = f"{Path(output_filename).stem}.json"
294
- output_path = Path(output_dir) / Path(f"{output_filename}")
295
- output_path.parent.mkdir(parents=True, exist_ok=True)
285
+ df = pd.DataFrame(
286
+ data=[
287
+ self.conform_dict(element_dict=element_dict, file_data=file_data)
288
+ for element_dict in elements_contents
289
+ ]
290
+ )
291
+ df = self.conform_dataframe(df=df)
296
292
 
297
- with output_path.open("w") as output_file:
298
- df.to_json(output_file, orient="records", lines=True)
293
+ output_path = self.get_output_path(output_filename=output_filename, output_dir=output_dir)
294
+
295
+ self.write_output(output_path=output_path, data=df.to_dict(orient="records"))
299
296
  return output_path
300
297
 
301
298
 
@@ -361,8 +358,15 @@ class SQLUploader(Uploader):
361
358
  for column in missing_columns:
362
359
  df[column] = pd.Series()
363
360
 
364
- def upload_contents(self, path: Path) -> None:
365
- df = pd.read_json(path, orient="records", lines=True)
361
+ def upload_dataframe(self, df: pd.DataFrame, file_data: FileData) -> None:
362
+ if self.can_delete():
363
+ self.delete_by_record_id(file_data=file_data)
364
+ else:
365
+ logger.warning(
366
+ f"table doesn't contain expected "
367
+ f"record id column "
368
+ f"{self.upload_config.record_id_key}, skipping delete"
369
+ )
366
370
  df.replace({np.nan: None}, inplace=True)
367
371
  self._fit_to_schema(df=df, columns=self.get_table_columns())
368
372
 
@@ -411,13 +415,10 @@ class SQLUploader(Uploader):
411
415
  rowcount = cursor.rowcount
412
416
  logger.info(f"deleted {rowcount} rows from table {self.upload_config.table_name}")
413
417
 
418
+ def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
419
+ df = pd.DataFrame(data)
420
+ self.upload_dataframe(df=df, file_data=file_data)
421
+
414
422
  def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
415
- if self.can_delete():
416
- self.delete_by_record_id(file_data=file_data)
417
- else:
418
- logger.warning(
419
- f"table doesn't contain expected "
420
- f"record id column "
421
- f"{self.upload_config.record_id_key}, skipping delete"
422
- )
423
- self.upload_contents(path=path)
423
+ df = get_data_df(path=path)
424
+ self.upload_dataframe(df=df, file_data=file_data)
@@ -3,7 +3,6 @@ from abc import ABC, abstractmethod
3
3
  from contextlib import contextmanager
4
4
  from dataclasses import dataclass, field
5
5
  from datetime import date, datetime
6
- from pathlib import Path
7
6
  from typing import TYPE_CHECKING, Any, Generator, Optional
8
7
 
9
8
  from dateutil import parser
@@ -74,11 +73,11 @@ class WeaviateUploadStager(UploadStager):
74
73
  logger.debug(f"date {date_string} string not a timestamp: {e}")
75
74
  return parser.parse(date_string)
76
75
 
77
- @classmethod
78
- def conform_dict(cls, data: dict, file_data: FileData) -> dict:
76
+ def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
79
77
  """
80
78
  Updates the element dictionary to conform to the Weaviate schema
81
79
  """
80
+ data = element_dict.copy()
82
81
  working_data = data.copy()
83
82
  # Dict as string formatting
84
83
  if (
@@ -111,7 +110,7 @@ class WeaviateUploadStager(UploadStager):
111
110
  .get("data_source", {})
112
111
  .get("date_created")
113
112
  ):
114
- working_data["metadata"]["data_source"]["date_created"] = cls.parse_date_string(
113
+ working_data["metadata"]["data_source"]["date_created"] = self.parse_date_string(
115
114
  date_created
116
115
  ).strftime(
117
116
  "%Y-%m-%dT%H:%M:%S.%fZ",
@@ -122,7 +121,7 @@ class WeaviateUploadStager(UploadStager):
122
121
  .get("data_source", {})
123
122
  .get("date_modified")
124
123
  ):
125
- working_data["metadata"]["data_source"]["date_modified"] = cls.parse_date_string(
124
+ working_data["metadata"]["data_source"]["date_modified"] = self.parse_date_string(
126
125
  date_modified
127
126
  ).strftime(
128
127
  "%Y-%m-%dT%H:%M:%S.%fZ",
@@ -133,14 +132,14 @@ class WeaviateUploadStager(UploadStager):
133
132
  .get("data_source", {})
134
133
  .get("date_processed")
135
134
  ):
136
- working_data["metadata"]["data_source"]["date_processed"] = cls.parse_date_string(
135
+ working_data["metadata"]["data_source"]["date_processed"] = self.parse_date_string(
137
136
  date_processed
138
137
  ).strftime(
139
138
  "%Y-%m-%dT%H:%M:%S.%fZ",
140
139
  )
141
140
 
142
141
  if last_modified := working_data.get("metadata", {}).get("last_modified"):
143
- working_data["metadata"]["last_modified"] = cls.parse_date_string(
142
+ working_data["metadata"]["last_modified"] = self.parse_date_string(
144
143
  last_modified
145
144
  ).strftime(
146
145
  "%Y-%m-%dT%H:%M:%S.%fZ",
@@ -159,25 +158,6 @@ class WeaviateUploadStager(UploadStager):
159
158
  working_data[RECORD_ID_LABEL] = file_data.identifier
160
159
  return working_data
161
160
 
162
- def run(
163
- self,
164
- elements_filepath: Path,
165
- file_data: FileData,
166
- output_dir: Path,
167
- output_filename: str,
168
- **kwargs: Any,
169
- ) -> Path:
170
- with open(elements_filepath) as elements_file:
171
- elements_contents = json.load(elements_file)
172
- updated_elements = [
173
- self.conform_dict(data=element, file_data=file_data) for element in elements_contents
174
- ]
175
- output_path = Path(output_dir) / Path(f"{output_filename}.json")
176
- output_path.parent.mkdir(parents=True, exist_ok=True)
177
- with open(output_path, "w") as output_file:
178
- json.dump(updated_elements, output_file, indent=2)
179
- return output_path
180
-
181
161
 
182
162
  class WeaviateUploaderConfig(UploaderConfig):
183
163
  collection: str = Field(description="The name of the collection this object belongs to")
@@ -268,18 +248,16 @@ class WeaviateUploader(Uploader, ABC):
268
248
  if not resp.failed and not resp.successful:
269
249
  break
270
250
 
271
- def run(self, path: Path, file_data: FileData, **kwargs: Any) -> None:
272
- with path.open("r") as file:
273
- elements_dict = json.load(file)
251
+ def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None:
274
252
  logger.info(
275
- f"writing {len(elements_dict)} objects to destination "
253
+ f"writing {len(data)} objects to destination "
276
254
  f"class {self.connection_config.access_config} "
277
255
  )
278
256
 
279
257
  with self.connection_config.get_client() as weaviate_client:
280
258
  self.delete_by_record_id(client=weaviate_client, file_data=file_data)
281
259
  with self.upload_config.get_batch_client(client=weaviate_client) as batch_client:
282
- for e in elements_dict:
260
+ for e in data:
283
261
  vector = e.pop("embeddings", None)
284
262
  batch_client.add_object(
285
263
  collection=self.upload_config.collection,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.3.7
3
+ Version: 0.3.9
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,13 +22,14 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.13
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
- Requires-Dist: pandas
26
- Requires-Dist: tqdm
25
+ Requires-Dist: click
27
26
  Requires-Dist: pydantic>=2.7
28
- Requires-Dist: opentelemetry-sdk
27
+ Requires-Dist: pandas
29
28
  Requires-Dist: dataclasses-json
30
- Requires-Dist: click
31
29
  Requires-Dist: python-dateutil
30
+ Requires-Dist: tqdm
31
+ Requires-Dist: ndjson
32
+ Requires-Dist: opentelemetry-sdk
32
33
  Provides-Extra: airtable
33
34
  Requires-Dist: pyairtable; extra == "airtable"
34
35
  Provides-Extra: astradb
@@ -41,8 +42,8 @@ Requires-Dist: azure-search-documents; extra == "azure-ai-search"
41
42
  Provides-Extra: bedrock
42
43
  Requires-Dist: boto3; extra == "bedrock"
43
44
  Provides-Extra: biomed
44
- Requires-Dist: requests; extra == "biomed"
45
45
  Requires-Dist: bs4; extra == "biomed"
46
+ Requires-Dist: requests; extra == "biomed"
46
47
  Provides-Extra: box
47
48
  Requires-Dist: fsspec; extra == "box"
48
49
  Requires-Dist: boxfs; extra == "box"
@@ -69,8 +70,8 @@ Requires-Dist: unstructured[docx]; extra == "doc"
69
70
  Provides-Extra: docx
70
71
  Requires-Dist: unstructured[docx]; extra == "docx"
71
72
  Provides-Extra: dropbox
72
- Requires-Dist: dropboxdrivefs; extra == "dropbox"
73
73
  Requires-Dist: fsspec; extra == "dropbox"
74
+ Requires-Dist: dropboxdrivefs; extra == "dropbox"
74
75
  Provides-Extra: duckdb
75
76
  Requires-Dist: duckdb; extra == "duckdb"
76
77
  Provides-Extra: elasticsearch
@@ -80,8 +81,8 @@ Requires-Dist: sentence-transformers; extra == "embed-huggingface"
80
81
  Provides-Extra: embed-mixedbreadai
81
82
  Requires-Dist: mixedbread-ai; extra == "embed-mixedbreadai"
82
83
  Provides-Extra: embed-octoai
83
- Requires-Dist: tiktoken; extra == "embed-octoai"
84
84
  Requires-Dist: openai; extra == "embed-octoai"
85
+ Requires-Dist: tiktoken; extra == "embed-octoai"
85
86
  Provides-Extra: embed-vertexai
86
87
  Requires-Dist: vertexai; extra == "embed-vertexai"
87
88
  Provides-Extra: embed-voyageai
@@ -89,19 +90,19 @@ Requires-Dist: voyageai; extra == "embed-voyageai"
89
90
  Provides-Extra: epub
90
91
  Requires-Dist: unstructured[epub]; extra == "epub"
91
92
  Provides-Extra: gcs
92
- Requires-Dist: gcsfs; extra == "gcs"
93
93
  Requires-Dist: fsspec; extra == "gcs"
94
94
  Requires-Dist: bs4; extra == "gcs"
95
+ Requires-Dist: gcsfs; extra == "gcs"
95
96
  Provides-Extra: github
96
- Requires-Dist: requests; extra == "github"
97
97
  Requires-Dist: pygithub>1.58.0; extra == "github"
98
+ Requires-Dist: requests; extra == "github"
98
99
  Provides-Extra: gitlab
99
100
  Requires-Dist: python-gitlab; extra == "gitlab"
100
101
  Provides-Extra: google-drive
101
102
  Requires-Dist: google-api-python-client; extra == "google-drive"
102
103
  Provides-Extra: hubspot
103
- Requires-Dist: hubspot-api-client; extra == "hubspot"
104
104
  Requires-Dist: urllib3; extra == "hubspot"
105
+ Requires-Dist: hubspot-api-client; extra == "hubspot"
105
106
  Provides-Extra: jira
106
107
  Requires-Dist: atlassian-python-api; extra == "jira"
107
108
  Provides-Extra: kafka
@@ -118,11 +119,14 @@ Provides-Extra: mongodb
118
119
  Requires-Dist: pymongo; extra == "mongodb"
119
120
  Provides-Extra: msg
120
121
  Requires-Dist: unstructured[msg]; extra == "msg"
122
+ Provides-Extra: neo4j
123
+ Requires-Dist: neo4j; extra == "neo4j"
124
+ Requires-Dist: cymple; extra == "neo4j"
121
125
  Provides-Extra: notion
122
- Requires-Dist: notion-client; extra == "notion"
123
- Requires-Dist: backoff; extra == "notion"
124
126
  Requires-Dist: htmlBuilder; extra == "notion"
125
127
  Requires-Dist: httpx; extra == "notion"
128
+ Requires-Dist: backoff; extra == "notion"
129
+ Requires-Dist: notion-client; extra == "notion"
126
130
  Provides-Extra: odt
127
131
  Requires-Dist: unstructured[odt]; extra == "odt"
128
132
  Provides-Extra: onedrive
@@ -130,8 +134,8 @@ Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
130
134
  Requires-Dist: msal; extra == "onedrive"
131
135
  Requires-Dist: bs4; extra == "onedrive"
132
136
  Provides-Extra: openai
133
- Requires-Dist: tiktoken; extra == "openai"
134
137
  Requires-Dist: openai; extra == "openai"
138
+ Requires-Dist: tiktoken; extra == "openai"
135
139
  Provides-Extra: opensearch
136
140
  Requires-Dist: opensearch-py; extra == "opensearch"
137
141
  Provides-Extra: org
@@ -160,13 +164,13 @@ Requires-Dist: unstructured[rst]; extra == "rst"
160
164
  Provides-Extra: rtf
161
165
  Requires-Dist: unstructured[rtf]; extra == "rtf"
162
166
  Provides-Extra: s3
163
- Requires-Dist: s3fs; extra == "s3"
164
167
  Requires-Dist: fsspec; extra == "s3"
168
+ Requires-Dist: s3fs; extra == "s3"
165
169
  Provides-Extra: salesforce
166
170
  Requires-Dist: simple-salesforce; extra == "salesforce"
167
171
  Provides-Extra: sftp
168
- Requires-Dist: paramiko; extra == "sftp"
169
172
  Requires-Dist: fsspec; extra == "sftp"
173
+ Requires-Dist: paramiko; extra == "sftp"
170
174
  Provides-Extra: sharepoint
171
175
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
172
176
  Requires-Dist: msal; extra == "sharepoint"
@@ -175,8 +179,8 @@ Requires-Dist: singlestoredb; extra == "singlestore"
175
179
  Provides-Extra: slack
176
180
  Requires-Dist: slack-sdk[optional]; extra == "slack"
177
181
  Provides-Extra: snowflake
178
- Requires-Dist: psycopg2-binary; extra == "snowflake"
179
182
  Requires-Dist: snowflake-connector-python; extra == "snowflake"
183
+ Requires-Dist: psycopg2-binary; extra == "snowflake"
180
184
  Provides-Extra: togetherai
181
185
  Requires-Dist: together; extra == "togetherai"
182
186
  Provides-Extra: tsv