unstructured-ingest 0.3.13__py3-none-any.whl → 0.3.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/databricks/test_volumes_native.py +10 -6
- test/integration/connectors/discord/test_discord.py +4 -4
- test/integration/connectors/duckdb/test_duckdb.py +3 -2
- test/integration/connectors/duckdb/test_motherduck.py +2 -2
- test/integration/connectors/elasticsearch/test_elasticsearch.py +8 -7
- test/integration/connectors/elasticsearch/test_opensearch.py +8 -7
- test/integration/connectors/sql/test_postgres.py +9 -3
- test/integration/connectors/sql/test_singlestore.py +9 -3
- test/integration/connectors/sql/test_snowflake.py +9 -3
- test/integration/connectors/sql/test_sqlite.py +9 -3
- test/integration/connectors/test_astradb.py +25 -9
- test/integration/connectors/test_azure_ai_search.py +3 -4
- test/integration/connectors/test_chroma.py +4 -6
- test/integration/connectors/test_confluence.py +3 -5
- test/integration/connectors/test_delta_table.py +4 -6
- test/integration/connectors/test_lancedb.py +3 -3
- test/integration/connectors/test_milvus.py +10 -5
- test/integration/connectors/test_mongodb.py +9 -9
- test/integration/connectors/test_neo4j.py +16 -8
- test/integration/connectors/test_notion.py +7 -0
- test/integration/connectors/test_onedrive.py +2 -4
- test/integration/connectors/test_pinecone.py +5 -6
- test/integration/connectors/test_qdrant.py +5 -4
- test/integration/connectors/test_redis.py +3 -3
- test/integration/connectors/test_s3.py +7 -6
- test/integration/connectors/test_vectara.py +2 -2
- test/integration/connectors/utils/constants.py +6 -0
- test/integration/connectors/utils/docker.py +2 -2
- test/integration/connectors/weaviate/test_cloud.py +5 -0
- test/integration/connectors/weaviate/test_local.py +2 -2
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/processes/connectors/neo4j.py +12 -12
- {unstructured_ingest-0.3.13.dist-info → unstructured_ingest-0.3.14.dist-info}/METADATA +18 -18
- {unstructured_ingest-0.3.13.dist-info → unstructured_ingest-0.3.14.dist-info}/RECORD +38 -38
- {unstructured_ingest-0.3.13.dist-info → unstructured_ingest-0.3.14.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.3.13.dist-info → unstructured_ingest-0.3.14.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.3.13.dist-info → unstructured_ingest-0.3.14.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.3.13.dist-info → unstructured_ingest-0.3.14.dist-info}/top_level.txt +0 -0
|
@@ -11,7 +11,7 @@ import pytest_asyncio
|
|
|
11
11
|
from lancedb import AsyncConnection
|
|
12
12
|
from upath import UPath
|
|
13
13
|
|
|
14
|
-
from test.integration.connectors.utils.constants import DESTINATION_TAG
|
|
14
|
+
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
15
15
|
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
16
16
|
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
17
17
|
from unstructured_ingest.v2.processes.connectors.lancedb.aws import (
|
|
@@ -106,7 +106,7 @@ async def connection_with_uri(request, tmp_path: Path):
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
@pytest.mark.asyncio
|
|
109
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
109
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
110
110
|
@pytest.mark.parametrize("connection_with_uri", ["local", "s3", "gcs", "az"], indirect=True)
|
|
111
111
|
async def test_lancedb_destination(
|
|
112
112
|
upload_file: Path,
|
|
@@ -164,7 +164,7 @@ async def test_lancedb_destination(
|
|
|
164
164
|
|
|
165
165
|
|
|
166
166
|
class TestPrecheck:
|
|
167
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
167
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
168
168
|
@pytest.mark.parametrize("connection_with_uri", ["local", "s3", "gcs", "az"], indirect=True)
|
|
169
169
|
def test_succeeds(
|
|
170
170
|
self,
|
|
@@ -13,7 +13,11 @@ from pymilvus import (
|
|
|
13
13
|
)
|
|
14
14
|
from pymilvus.milvus_client import IndexParams
|
|
15
15
|
|
|
16
|
-
from test.integration.connectors.utils.constants import
|
|
16
|
+
from test.integration.connectors.utils.constants import (
|
|
17
|
+
DESTINATION_TAG,
|
|
18
|
+
VECTOR_DB_TAG,
|
|
19
|
+
env_setup_path,
|
|
20
|
+
)
|
|
17
21
|
from test.integration.connectors.utils.docker import healthcheck_wait
|
|
18
22
|
from test.integration.connectors.utils.docker_compose import docker_compose_context
|
|
19
23
|
from test.integration.connectors.utils.validation.destination import (
|
|
@@ -112,7 +116,7 @@ def validate_count(
|
|
|
112
116
|
|
|
113
117
|
|
|
114
118
|
@pytest.mark.asyncio
|
|
115
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
119
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
116
120
|
async def test_milvus_destination(
|
|
117
121
|
upload_file: Path,
|
|
118
122
|
collection: str,
|
|
@@ -150,7 +154,7 @@ async def test_milvus_destination(
|
|
|
150
154
|
validate_count(client=client, expected_count=expected_count)
|
|
151
155
|
|
|
152
156
|
|
|
153
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
157
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
154
158
|
def test_precheck_succeeds(collection: str):
|
|
155
159
|
uploader = MilvusUploader(
|
|
156
160
|
connection_config=MilvusConnectionConfig(uri=DB_URI),
|
|
@@ -159,7 +163,7 @@ def test_precheck_succeeds(collection: str):
|
|
|
159
163
|
uploader.precheck()
|
|
160
164
|
|
|
161
165
|
|
|
162
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
166
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
163
167
|
def test_precheck_fails_on_nonexistent_collection(collection: str):
|
|
164
168
|
uploader = MilvusUploader(
|
|
165
169
|
connection_config=MilvusConnectionConfig(uri=DB_URI),
|
|
@@ -174,7 +178,7 @@ def test_precheck_fails_on_nonexistent_collection(collection: str):
|
|
|
174
178
|
uploader.precheck()
|
|
175
179
|
|
|
176
180
|
|
|
177
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
181
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
178
182
|
def test_precheck_fails_on_nonexisting_db(collection: str):
|
|
179
183
|
uploader = MilvusUploader(
|
|
180
184
|
connection_config=MilvusConnectionConfig(uri=DB_URI),
|
|
@@ -187,6 +191,7 @@ def test_precheck_fails_on_nonexisting_db(collection: str):
|
|
|
187
191
|
uploader.precheck()
|
|
188
192
|
|
|
189
193
|
|
|
194
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
190
195
|
@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
|
|
191
196
|
def test_milvus_stager(
|
|
192
197
|
request: TopRequest,
|
|
@@ -13,7 +13,7 @@ from pymongo.database import Database
|
|
|
13
13
|
from pymongo.mongo_client import MongoClient
|
|
14
14
|
from pymongo.operations import SearchIndexModel
|
|
15
15
|
|
|
16
|
-
from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG
|
|
16
|
+
from test.integration.connectors.utils.constants import DESTINATION_TAG, NOSQL_TAG, SOURCE_TAG
|
|
17
17
|
from test.integration.connectors.utils.validation.source import (
|
|
18
18
|
SourceValidationConfigs,
|
|
19
19
|
source_connector_validation,
|
|
@@ -180,7 +180,7 @@ def validate_collection_vector(
|
|
|
180
180
|
|
|
181
181
|
|
|
182
182
|
@pytest.mark.asyncio
|
|
183
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
183
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, NOSQL_TAG)
|
|
184
184
|
@requires_env("MONGODB_URI", "MONGODB_DATABASE")
|
|
185
185
|
async def test_mongodb_source(temp_dir: Path):
|
|
186
186
|
env_data = get_env_data()
|
|
@@ -205,7 +205,7 @@ async def test_mongodb_source(temp_dir: Path):
|
|
|
205
205
|
)
|
|
206
206
|
|
|
207
207
|
|
|
208
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
208
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, NOSQL_TAG)
|
|
209
209
|
def test_mongodb_indexer_precheck_fail_no_host():
|
|
210
210
|
indexer_config = MongoDBIndexerConfig(
|
|
211
211
|
database="non-existent-database", collection="non-existent-database"
|
|
@@ -218,7 +218,7 @@ def test_mongodb_indexer_precheck_fail_no_host():
|
|
|
218
218
|
indexer.precheck()
|
|
219
219
|
|
|
220
220
|
|
|
221
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
221
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, NOSQL_TAG)
|
|
222
222
|
@requires_env("MONGODB_URI", "MONGODB_DATABASE")
|
|
223
223
|
def test_mongodb_indexer_precheck_fail_no_database():
|
|
224
224
|
env_data = get_env_data()
|
|
@@ -233,7 +233,7 @@ def test_mongodb_indexer_precheck_fail_no_database():
|
|
|
233
233
|
indexer.precheck()
|
|
234
234
|
|
|
235
235
|
|
|
236
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
236
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, NOSQL_TAG)
|
|
237
237
|
@requires_env("MONGODB_URI", "MONGODB_DATABASE")
|
|
238
238
|
def test_mongodb_indexer_precheck_fail_no_collection():
|
|
239
239
|
env_data = get_env_data()
|
|
@@ -249,7 +249,7 @@ def test_mongodb_indexer_precheck_fail_no_collection():
|
|
|
249
249
|
|
|
250
250
|
|
|
251
251
|
@pytest.mark.asyncio
|
|
252
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
252
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, NOSQL_TAG)
|
|
253
253
|
@requires_env("MONGODB_URI", "MONGODB_DATABASE")
|
|
254
254
|
async def test_mongodb_destination(
|
|
255
255
|
upload_file: Path,
|
|
@@ -289,7 +289,7 @@ async def test_mongodb_destination(
|
|
|
289
289
|
validate_collection_count(collection=destination_collection, expected_records=expected_records)
|
|
290
290
|
|
|
291
291
|
|
|
292
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
292
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, NOSQL_TAG)
|
|
293
293
|
def test_mongodb_uploader_precheck_fail_no_host():
|
|
294
294
|
upload_config = MongoDBUploaderConfig(
|
|
295
295
|
database="database",
|
|
@@ -303,7 +303,7 @@ def test_mongodb_uploader_precheck_fail_no_host():
|
|
|
303
303
|
uploader.precheck()
|
|
304
304
|
|
|
305
305
|
|
|
306
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
306
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, NOSQL_TAG)
|
|
307
307
|
@requires_env("MONGODB_URI", "MONGODB_DATABASE")
|
|
308
308
|
def test_mongodb_uploader_precheck_fail_no_database():
|
|
309
309
|
env_data = get_env_data()
|
|
@@ -319,7 +319,7 @@ def test_mongodb_uploader_precheck_fail_no_database():
|
|
|
319
319
|
uploader.precheck()
|
|
320
320
|
|
|
321
321
|
|
|
322
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
322
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, NOSQL_TAG)
|
|
323
323
|
@requires_env("MONGODB_URI", "MONGODB_DATABASE")
|
|
324
324
|
def test_mongodb_uploader_precheck_fail_no_collection():
|
|
325
325
|
env_data = get_env_data()
|
|
@@ -9,7 +9,7 @@ from neo4j import AsyncGraphDatabase, Driver, GraphDatabase
|
|
|
9
9
|
from neo4j.exceptions import ServiceUnavailable
|
|
10
10
|
from pytest_check import check
|
|
11
11
|
|
|
12
|
-
from test.integration.connectors.utils.constants import DESTINATION_TAG
|
|
12
|
+
from test.integration.connectors.utils.constants import DESTINATION_TAG, GRAPH_DB_TAG
|
|
13
13
|
from test.integration.connectors.utils.docker import container_context
|
|
14
14
|
from unstructured_ingest.error import DestinationConnectionError
|
|
15
15
|
from unstructured_ingest.utils.chunking import elements_from_base64_gzipped_json
|
|
@@ -51,7 +51,7 @@ def _neo4j_server():
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
@pytest.mark.asyncio
|
|
54
|
-
@pytest.mark.tags(DESTINATION_TAG, CONNECTOR_TYPE)
|
|
54
|
+
@pytest.mark.tags(DESTINATION_TAG, CONNECTOR_TYPE, GRAPH_DB_TAG)
|
|
55
55
|
async def test_neo4j_destination(upload_file: Path, tmp_path: Path):
|
|
56
56
|
stager = Neo4jUploadStager()
|
|
57
57
|
uploader = Neo4jUploader(
|
|
@@ -104,7 +104,7 @@ async def test_neo4j_destination(upload_file: Path, tmp_path: Path):
|
|
|
104
104
|
await validate_uploaded_graph(modified_upload_file)
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
@pytest.mark.tags(DESTINATION_TAG, CONNECTOR_TYPE)
|
|
107
|
+
@pytest.mark.tags(DESTINATION_TAG, CONNECTOR_TYPE, GRAPH_DB_TAG)
|
|
108
108
|
class TestPrecheck:
|
|
109
109
|
@pytest.fixture
|
|
110
110
|
def configured_uploader(self) -> Neo4jUploader:
|
|
@@ -199,13 +199,15 @@ async def validate_uploaded_graph(upload_file: Path):
|
|
|
199
199
|
try:
|
|
200
200
|
nodes_count = len((await driver.execute_query("MATCH (n) RETURN n"))[0])
|
|
201
201
|
chunk_nodes_count = len(
|
|
202
|
-
(await driver.execute_query(f"MATCH (n: {Label.CHUNK}) RETURN n"))[0]
|
|
202
|
+
(await driver.execute_query(f"MATCH (n: {Label.CHUNK.value}) RETURN n"))[0]
|
|
203
203
|
)
|
|
204
204
|
document_nodes_count = len(
|
|
205
|
-
(await driver.execute_query(f"MATCH (n: {Label.DOCUMENT}) RETURN n"))[0]
|
|
205
|
+
(await driver.execute_query(f"MATCH (n: {Label.DOCUMENT.value}) RETURN n"))[0]
|
|
206
206
|
)
|
|
207
207
|
element_nodes_count = len(
|
|
208
|
-
(await driver.execute_query(f"MATCH (n: {Label.UNSTRUCTURED_ELEMENT}) RETURN n"))[
|
|
208
|
+
(await driver.execute_query(f"MATCH (n: {Label.UNSTRUCTURED_ELEMENT.value}) RETURN n"))[
|
|
209
|
+
0
|
|
210
|
+
]
|
|
209
211
|
)
|
|
210
212
|
with check:
|
|
211
213
|
assert nodes_count == expected_nodes_count
|
|
@@ -217,12 +219,18 @@ async def validate_uploaded_graph(upload_file: Path):
|
|
|
217
219
|
assert element_nodes_count == expected_element_count
|
|
218
220
|
|
|
219
221
|
records, _, _ = await driver.execute_query(
|
|
220
|
-
f"
|
|
222
|
+
f"""
|
|
223
|
+
MATCH ()-[r:{Relationship.PART_OF_DOCUMENT.value}]->(:{Label.DOCUMENT.value})
|
|
224
|
+
RETURN r
|
|
225
|
+
"""
|
|
221
226
|
)
|
|
222
227
|
part_of_document_count = len(records)
|
|
223
228
|
|
|
224
229
|
records, _, _ = await driver.execute_query(
|
|
225
|
-
f"
|
|
230
|
+
f"""
|
|
231
|
+
MATCH (:{Label.CHUNK.value})-[r:{Relationship.NEXT_CHUNK.value}]->(:{Label.CHUNK.value})
|
|
232
|
+
RETURN r
|
|
233
|
+
"""
|
|
226
234
|
)
|
|
227
235
|
next_chunk_count = len(records)
|
|
228
236
|
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from test.integration.connectors.utils.constants import SOURCE_TAG, UNCATEGORIZED_TAG
|
|
3
6
|
from test.integration.connectors.utils.validation.source import (
|
|
4
7
|
SourceValidationConfigs,
|
|
5
8
|
get_all_file_data,
|
|
@@ -8,6 +11,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
8
11
|
)
|
|
9
12
|
from unstructured_ingest.v2.interfaces import Downloader, Indexer
|
|
10
13
|
from unstructured_ingest.v2.processes.connectors.notion.connector import (
|
|
14
|
+
CONNECTOR_TYPE,
|
|
11
15
|
NotionAccessConfig,
|
|
12
16
|
NotionConnectionConfig,
|
|
13
17
|
NotionDownloader,
|
|
@@ -17,6 +21,7 @@ from unstructured_ingest.v2.processes.connectors.notion.connector import (
|
|
|
17
21
|
)
|
|
18
22
|
|
|
19
23
|
|
|
24
|
+
@pytest.mark.tags(SOURCE_TAG, CONNECTOR_TYPE, UNCATEGORIZED_TAG)
|
|
20
25
|
def test_notion_source_database(temp_dir):
|
|
21
26
|
# Retrieve environment variables
|
|
22
27
|
notion_api_key = os.environ["NOTION_API_KEY"]
|
|
@@ -55,6 +60,7 @@ def test_notion_source_database(temp_dir):
|
|
|
55
60
|
)
|
|
56
61
|
|
|
57
62
|
|
|
63
|
+
@pytest.mark.tags(SOURCE_TAG, CONNECTOR_TYPE, UNCATEGORIZED_TAG)
|
|
58
64
|
def test_notion_source_page(temp_dir):
|
|
59
65
|
# Retrieve environment variables
|
|
60
66
|
notion_api_key = os.environ["NOTION_API_KEY"]
|
|
@@ -93,6 +99,7 @@ def test_notion_source_page(temp_dir):
|
|
|
93
99
|
)
|
|
94
100
|
|
|
95
101
|
|
|
102
|
+
@pytest.mark.tags(SOURCE_TAG, CONNECTOR_TYPE, UNCATEGORIZED_TAG)
|
|
96
103
|
def source_connector_validation(
|
|
97
104
|
indexer: Indexer,
|
|
98
105
|
downloader: Downloader,
|
|
@@ -5,9 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
import pytest
|
|
6
6
|
from office365.graph_client import GraphClient
|
|
7
7
|
|
|
8
|
-
from test.integration.connectors.utils.constants import
|
|
9
|
-
DESTINATION_TAG,
|
|
10
|
-
)
|
|
8
|
+
from test.integration.connectors.utils.constants import BLOB_STORAGE_TAG, DESTINATION_TAG
|
|
11
9
|
from test.integration.utils import requires_env
|
|
12
10
|
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
13
11
|
from unstructured_ingest.v2.processes.connectors.onedrive import (
|
|
@@ -67,7 +65,7 @@ def get_connection_config():
|
|
|
67
65
|
return connection_config
|
|
68
66
|
|
|
69
67
|
|
|
70
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
68
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, BLOB_STORAGE_TAG)
|
|
71
69
|
@requires_env("MS_CLIENT_CRED", "MS_CLIENT_ID", "MS_TENANT_ID", "MS_USER_PNAME")
|
|
72
70
|
@pytest.mark.xfail(
|
|
73
71
|
reason="Issues with test setup on the provider side."
|
|
@@ -12,9 +12,7 @@ from _pytest.fixtures import TopRequest
|
|
|
12
12
|
from pinecone import Pinecone, ServerlessSpec
|
|
13
13
|
from pinecone.core.openapi.shared.exceptions import NotFoundException
|
|
14
14
|
|
|
15
|
-
from test.integration.connectors.utils.constants import
|
|
16
|
-
DESTINATION_TAG,
|
|
17
|
-
)
|
|
15
|
+
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
18
16
|
from test.integration.connectors.utils.validation.destination import (
|
|
19
17
|
StagerValidationConfigs,
|
|
20
18
|
stager_validation,
|
|
@@ -133,7 +131,7 @@ def validate_pinecone_index(
|
|
|
133
131
|
|
|
134
132
|
@requires_env(API_KEY)
|
|
135
133
|
@pytest.mark.asyncio
|
|
136
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
134
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
137
135
|
async def test_pinecone_destination(pinecone_index: str, upload_file: Path, temp_dir: Path):
|
|
138
136
|
file_data = FileData(
|
|
139
137
|
source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
|
|
@@ -176,7 +174,7 @@ async def test_pinecone_destination(pinecone_index: str, upload_file: Path, temp
|
|
|
176
174
|
|
|
177
175
|
@requires_env(API_KEY)
|
|
178
176
|
@pytest.mark.asyncio
|
|
179
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
177
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
180
178
|
@pytest.mark.skip(reason="TODO: get this to work")
|
|
181
179
|
async def test_pinecone_destination_large_index(
|
|
182
180
|
pinecone_index: str, upload_file: Path, temp_dir: Path
|
|
@@ -227,7 +225,7 @@ async def test_pinecone_destination_large_index(
|
|
|
227
225
|
|
|
228
226
|
|
|
229
227
|
@requires_env(API_KEY)
|
|
230
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
228
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
231
229
|
def test_large_metadata(pinecone_index: str, tmp_path: Path, upload_file: Path):
|
|
232
230
|
stager = PineconeUploadStager()
|
|
233
231
|
uploader = PineconeUploader(
|
|
@@ -272,6 +270,7 @@ def test_large_metadata(pinecone_index: str, tmp_path: Path, upload_file: Path):
|
|
|
272
270
|
validate_pinecone_index(pinecone_index, 1, interval=5)
|
|
273
271
|
|
|
274
272
|
|
|
273
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
275
274
|
@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
|
|
276
275
|
def test_pinecone_stager(
|
|
277
276
|
request: TopRequest,
|
|
@@ -9,7 +9,7 @@ import pytest
|
|
|
9
9
|
from _pytest.fixtures import TopRequest
|
|
10
10
|
from qdrant_client import AsyncQdrantClient
|
|
11
11
|
|
|
12
|
-
from test.integration.connectors.utils.constants import DESTINATION_TAG
|
|
12
|
+
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
13
13
|
from test.integration.connectors.utils.docker import container_context
|
|
14
14
|
from test.integration.connectors.utils.validation.destination import (
|
|
15
15
|
StagerValidationConfigs,
|
|
@@ -75,7 +75,7 @@ async def validate_upload(client: AsyncQdrantClient, upload_file: Path):
|
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
@pytest.mark.asyncio
|
|
78
|
-
@pytest.mark.tags(LOCAL_CONNECTOR_TYPE, DESTINATION_TAG, "qdrant")
|
|
78
|
+
@pytest.mark.tags(LOCAL_CONNECTOR_TYPE, DESTINATION_TAG, "qdrant", VECTOR_DB_TAG)
|
|
79
79
|
async def test_qdrant_destination_local(upload_file: Path, tmp_path: Path):
|
|
80
80
|
connection_kwargs = {"path": str(tmp_path / "qdrant")}
|
|
81
81
|
async with qdrant_client(connection_kwargs) as client:
|
|
@@ -117,7 +117,7 @@ def docker_context():
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
@pytest.mark.asyncio
|
|
120
|
-
@pytest.mark.tags(SERVER_CONNECTOR_TYPE, DESTINATION_TAG, "qdrant")
|
|
120
|
+
@pytest.mark.tags(SERVER_CONNECTOR_TYPE, DESTINATION_TAG, "qdrant", VECTOR_DB_TAG)
|
|
121
121
|
async def test_qdrant_destination_server(upload_file: Path, tmp_path: Path, docker_context):
|
|
122
122
|
connection_kwargs = {"location": "http://localhost:6333"}
|
|
123
123
|
async with qdrant_client(connection_kwargs) as client:
|
|
@@ -153,7 +153,7 @@ async def test_qdrant_destination_server(upload_file: Path, tmp_path: Path, dock
|
|
|
153
153
|
|
|
154
154
|
|
|
155
155
|
@pytest.mark.asyncio
|
|
156
|
-
@pytest.mark.tags(SERVER_CONNECTOR_TYPE, DESTINATION_TAG, "qdrant")
|
|
156
|
+
@pytest.mark.tags(SERVER_CONNECTOR_TYPE, DESTINATION_TAG, "qdrant", VECTOR_DB_TAG)
|
|
157
157
|
@requires_env("QDRANT_API_KEY", "QDRANT_SERVER_URL")
|
|
158
158
|
async def test_qdrant_destination_cloud(upload_file: Path, tmp_path: Path):
|
|
159
159
|
server_url = os.environ["QDRANT_SERVER_URL"]
|
|
@@ -197,6 +197,7 @@ async def test_qdrant_destination_cloud(upload_file: Path, tmp_path: Path):
|
|
|
197
197
|
await validate_upload(client=client, upload_file=upload_file)
|
|
198
198
|
|
|
199
199
|
|
|
200
|
+
@pytest.mark.tags(SERVER_CONNECTOR_TYPE, DESTINATION_TAG, "qdrant", VECTOR_DB_TAG)
|
|
200
201
|
@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
|
|
201
202
|
def test_qdrant_stager(
|
|
202
203
|
request: TopRequest,
|
|
@@ -9,7 +9,7 @@ import pytest
|
|
|
9
9
|
from redis import exceptions as redis_exceptions
|
|
10
10
|
from redis.asyncio import Redis, from_url
|
|
11
11
|
|
|
12
|
-
from test.integration.connectors.utils.constants import DESTINATION_TAG
|
|
12
|
+
from test.integration.connectors.utils.constants import DESTINATION_TAG, NOSQL_TAG
|
|
13
13
|
from test.integration.utils import requires_env
|
|
14
14
|
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
15
15
|
from unstructured_ingest.v2.processes.connectors.redisdb import (
|
|
@@ -96,7 +96,7 @@ async def redis_destination_test(
|
|
|
96
96
|
|
|
97
97
|
|
|
98
98
|
@pytest.mark.asyncio
|
|
99
|
-
@pytest.mark.tags(REDIS_CONNECTOR_TYPE, DESTINATION_TAG)
|
|
99
|
+
@pytest.mark.tags(REDIS_CONNECTOR_TYPE, DESTINATION_TAG, NOSQL_TAG)
|
|
100
100
|
@requires_env("AZURE_REDIS_INGEST_TEST_PASSWORD")
|
|
101
101
|
async def test_redis_destination_azure_with_password(upload_file: Path, tmp_path: Path):
|
|
102
102
|
connection_kwargs = {
|
|
@@ -110,7 +110,7 @@ async def test_redis_destination_azure_with_password(upload_file: Path, tmp_path
|
|
|
110
110
|
|
|
111
111
|
|
|
112
112
|
@pytest.mark.asyncio
|
|
113
|
-
@pytest.mark.tags(REDIS_CONNECTOR_TYPE, DESTINATION_TAG, "redis")
|
|
113
|
+
@pytest.mark.tags(REDIS_CONNECTOR_TYPE, DESTINATION_TAG, "redis", NOSQL_TAG)
|
|
114
114
|
@requires_env("AZURE_REDIS_INGEST_TEST_PASSWORD")
|
|
115
115
|
async def test_redis_destination_azure_with_uri(upload_file: Path, tmp_path: Path):
|
|
116
116
|
connection_kwargs = {}
|
|
@@ -6,6 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
import pytest
|
|
7
7
|
|
|
8
8
|
from test.integration.connectors.utils.constants import (
|
|
9
|
+
BLOB_STORAGE_TAG,
|
|
9
10
|
DESTINATION_TAG,
|
|
10
11
|
SOURCE_TAG,
|
|
11
12
|
env_setup_path,
|
|
@@ -47,7 +48,7 @@ def anon_connection_config() -> S3ConnectionConfig:
|
|
|
47
48
|
|
|
48
49
|
|
|
49
50
|
@pytest.mark.asyncio
|
|
50
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
51
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
51
52
|
async def test_s3_source(anon_connection_config: S3ConnectionConfig):
|
|
52
53
|
indexer_config = S3IndexerConfig(remote_url="s3://utic-dev-tech-fixtures/small-pdf-set/")
|
|
53
54
|
with tempfile.TemporaryDirectory() as tempdir:
|
|
@@ -70,7 +71,7 @@ async def test_s3_source(anon_connection_config: S3ConnectionConfig):
|
|
|
70
71
|
|
|
71
72
|
|
|
72
73
|
@pytest.mark.asyncio
|
|
73
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
74
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
74
75
|
async def test_s3_source_special_char(anon_connection_config: S3ConnectionConfig):
|
|
75
76
|
indexer_config = S3IndexerConfig(remote_url="s3://utic-dev-tech-fixtures/special-characters/")
|
|
76
77
|
with tempfile.TemporaryDirectory() as tempdir:
|
|
@@ -92,7 +93,7 @@ async def test_s3_source_special_char(anon_connection_config: S3ConnectionConfig
|
|
|
92
93
|
)
|
|
93
94
|
|
|
94
95
|
|
|
95
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
96
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
96
97
|
def test_s3_source_no_access(anon_connection_config: S3ConnectionConfig):
|
|
97
98
|
indexer_config = S3IndexerConfig(remote_url="s3://utic-ingest-test-fixtures/destination/")
|
|
98
99
|
indexer = S3Indexer(connection_config=anon_connection_config, index_config=indexer_config)
|
|
@@ -100,7 +101,7 @@ def test_s3_source_no_access(anon_connection_config: S3ConnectionConfig):
|
|
|
100
101
|
indexer.precheck()
|
|
101
102
|
|
|
102
103
|
|
|
103
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG)
|
|
104
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
104
105
|
def test_s3_source_no_bucket(anon_connection_config: S3ConnectionConfig):
|
|
105
106
|
indexer_config = S3IndexerConfig(remote_url="s3://fake-bucket")
|
|
106
107
|
indexer = S3Indexer(connection_config=anon_connection_config, index_config=indexer_config)
|
|
@@ -109,7 +110,7 @@ def test_s3_source_no_bucket(anon_connection_config: S3ConnectionConfig):
|
|
|
109
110
|
|
|
110
111
|
|
|
111
112
|
@pytest.mark.asyncio
|
|
112
|
-
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "minio")
|
|
113
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "minio", BLOB_STORAGE_TAG)
|
|
113
114
|
async def test_s3_minio_source(anon_connection_config: S3ConnectionConfig):
|
|
114
115
|
anon_connection_config.endpoint_url = "http://localhost:9000"
|
|
115
116
|
indexer_config = S3IndexerConfig(remote_url="s3://utic-dev-tech-fixtures/")
|
|
@@ -149,7 +150,7 @@ def get_aws_credentials() -> dict:
|
|
|
149
150
|
|
|
150
151
|
|
|
151
152
|
@pytest.mark.asyncio
|
|
152
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
153
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, BLOB_STORAGE_TAG)
|
|
153
154
|
@requires_env("S3_INGEST_TEST_ACCESS_KEY", "S3_INGEST_TEST_SECRET_KEY")
|
|
154
155
|
async def test_s3_destination(upload_file: Path):
|
|
155
156
|
aws_credentials = get_aws_credentials()
|
|
@@ -8,7 +8,7 @@ from uuid import uuid4
|
|
|
8
8
|
import pytest
|
|
9
9
|
import requests
|
|
10
10
|
|
|
11
|
-
from test.integration.connectors.utils.constants import DESTINATION_TAG
|
|
11
|
+
from test.integration.connectors.utils.constants import DESTINATION_TAG, NOSQL_TAG
|
|
12
12
|
from test.integration.utils import requires_env
|
|
13
13
|
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
14
14
|
from unstructured_ingest.v2.logger import logger
|
|
@@ -211,7 +211,7 @@ def corpora_util() -> Generator[str, None, None]:
|
|
|
211
211
|
|
|
212
212
|
|
|
213
213
|
@pytest.mark.asyncio
|
|
214
|
-
@pytest.mark.tags(VECTARA_CONNECTOR_TYPE, DESTINATION_TAG, "vectara")
|
|
214
|
+
@pytest.mark.tags(VECTARA_CONNECTOR_TYPE, DESTINATION_TAG, "vectara", NOSQL_TAG)
|
|
215
215
|
@requires_env("VECTARA_OAUTH_CLIENT_ID", "VECTARA_OAUTH_SECRET", "VECTARA_CUSTOMER_ID")
|
|
216
216
|
async def test_vectara_destination(
|
|
217
217
|
upload_file: Path, tmp_path: Path, corpora_util: str, retries=30, interval=10
|
|
@@ -2,6 +2,12 @@ from pathlib import Path
|
|
|
2
2
|
|
|
3
3
|
SOURCE_TAG = "source"
|
|
4
4
|
DESTINATION_TAG = "destination"
|
|
5
|
+
BLOB_STORAGE_TAG = "blob_storage"
|
|
6
|
+
SQL_TAG = "sql"
|
|
7
|
+
NOSQL_TAG = "nosql"
|
|
8
|
+
VECTOR_DB_TAG = "vector_db"
|
|
9
|
+
GRAPH_DB_TAG = "graph_db"
|
|
10
|
+
UNCATEGORIZED_TAG = "uncategorized"
|
|
5
11
|
|
|
6
12
|
env_setup_path = Path(__file__).parents[1] / "env_setup"
|
|
7
13
|
expected_results_path = Path(__file__).parents[1] / "expected_results"
|
|
@@ -44,7 +44,7 @@ def get_container(
|
|
|
44
44
|
docker_client: docker.DockerClient,
|
|
45
45
|
image: str,
|
|
46
46
|
ports: dict,
|
|
47
|
-
name: Optional[str] =
|
|
47
|
+
name: Optional[str] = None,
|
|
48
48
|
environment: Optional[dict] = None,
|
|
49
49
|
volumes: Optional[dict] = None,
|
|
50
50
|
healthcheck: Optional[HealthCheck] = None,
|
|
@@ -115,7 +115,7 @@ def container_context(
|
|
|
115
115
|
healthcheck: Optional[HealthCheck] = None,
|
|
116
116
|
healthcheck_retries: int = 30,
|
|
117
117
|
docker_client: Optional[docker.DockerClient] = None,
|
|
118
|
-
name: Optional[str] =
|
|
118
|
+
name: Optional[str] = None,
|
|
119
119
|
):
|
|
120
120
|
docker_client = docker_client or docker.from_env()
|
|
121
121
|
print(f"pulling image {image}")
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import pytest
|
|
2
2
|
from pydantic import ValidationError
|
|
3
3
|
|
|
4
|
+
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
4
5
|
from unstructured_ingest.v2.processes.connectors.weaviate.cloud import (
|
|
6
|
+
CONNECTOR_TYPE,
|
|
5
7
|
CloudWeaviateAccessConfig,
|
|
6
8
|
CloudWeaviateConnectionConfig,
|
|
7
9
|
)
|
|
8
10
|
|
|
9
11
|
|
|
12
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
10
13
|
def test_weaviate_failing_connection_config():
|
|
11
14
|
with pytest.raises(ValidationError):
|
|
12
15
|
CloudWeaviateConnectionConfig(
|
|
@@ -16,6 +19,7 @@ def test_weaviate_failing_connection_config():
|
|
|
16
19
|
)
|
|
17
20
|
|
|
18
21
|
|
|
22
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
19
23
|
def test_weaviate_connection_config_happy_path():
|
|
20
24
|
CloudWeaviateConnectionConfig(
|
|
21
25
|
access_config=CloudWeaviateAccessConfig(
|
|
@@ -25,6 +29,7 @@ def test_weaviate_connection_config_happy_path():
|
|
|
25
29
|
)
|
|
26
30
|
|
|
27
31
|
|
|
32
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
28
33
|
def test_weaviate_connection_config_anonymous():
|
|
29
34
|
CloudWeaviateConnectionConfig(
|
|
30
35
|
access_config=CloudWeaviateAccessConfig(api_key="my key", password="password"),
|
|
@@ -7,7 +7,7 @@ import requests
|
|
|
7
7
|
import weaviate
|
|
8
8
|
from weaviate.client import WeaviateClient
|
|
9
9
|
|
|
10
|
-
from test.integration.connectors.utils.constants import DESTINATION_TAG
|
|
10
|
+
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
11
11
|
from test.integration.connectors.utils.docker import container_context
|
|
12
12
|
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
13
13
|
from unstructured_ingest.v2.processes.connectors.weaviate.local import (
|
|
@@ -74,7 +74,7 @@ def run_uploader_and_validate(
|
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
@pytest.mark.asyncio
|
|
77
|
-
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG)
|
|
77
|
+
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, VECTOR_DB_TAG)
|
|
78
78
|
def test_weaviate_local_destination(upload_file: Path, collection: str, tmp_path: Path):
|
|
79
79
|
file_data = FileData(
|
|
80
80
|
source_identifiers=SourceIdentifiers(fullpath=upload_file.name, filename=upload_file.name),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.3.
|
|
1
|
+
__version__ = "0.3.14" # pragma: no cover
|
|
@@ -105,7 +105,7 @@ class Neo4jUploadStager(UploadStager):
|
|
|
105
105
|
output_filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
106
106
|
|
|
107
107
|
with open(output_filepath, "w") as file:
|
|
108
|
-
|
|
108
|
+
file.write(_GraphData.from_nx(nx_graph).model_dump_json())
|
|
109
109
|
|
|
110
110
|
return output_filepath
|
|
111
111
|
|
|
@@ -196,7 +196,7 @@ class _GraphData(BaseModel):
|
|
|
196
196
|
|
|
197
197
|
|
|
198
198
|
class _Node(BaseModel):
|
|
199
|
-
model_config = ConfigDict(
|
|
199
|
+
model_config = ConfigDict()
|
|
200
200
|
|
|
201
201
|
id_: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
202
202
|
labels: list[Label] = Field(default_factory=list)
|
|
@@ -207,20 +207,20 @@ class _Node(BaseModel):
|
|
|
207
207
|
|
|
208
208
|
|
|
209
209
|
class _Edge(BaseModel):
|
|
210
|
-
model_config = ConfigDict(
|
|
210
|
+
model_config = ConfigDict()
|
|
211
211
|
|
|
212
212
|
source_id: str
|
|
213
213
|
destination_id: str
|
|
214
214
|
relationship: Relationship
|
|
215
215
|
|
|
216
216
|
|
|
217
|
-
class Label(
|
|
217
|
+
class Label(Enum):
|
|
218
218
|
UNSTRUCTURED_ELEMENT = "UnstructuredElement"
|
|
219
219
|
CHUNK = "Chunk"
|
|
220
220
|
DOCUMENT = "Document"
|
|
221
221
|
|
|
222
222
|
|
|
223
|
-
class Relationship(
|
|
223
|
+
class Relationship(Enum):
|
|
224
224
|
PART_OF_DOCUMENT = "PART_OF_DOCUMENT"
|
|
225
225
|
PART_OF_CHUNK = "PART_OF_CHUNK"
|
|
226
226
|
NEXT_CHUNK = "NEXT_CHUNK"
|
|
@@ -263,14 +263,14 @@ class Neo4jUploader(Uploader):
|
|
|
263
263
|
async def _create_uniqueness_constraints(self, client: AsyncDriver) -> None:
|
|
264
264
|
for label in Label:
|
|
265
265
|
logger.info(
|
|
266
|
-
f"Adding id uniqueness constraint for nodes labeled '{label}'"
|
|
266
|
+
f"Adding id uniqueness constraint for nodes labeled '{label.value}'"
|
|
267
267
|
" if it does not already exist."
|
|
268
268
|
)
|
|
269
|
-
constraint_name = f"{label.lower()}_id"
|
|
269
|
+
constraint_name = f"{label.value.lower()}_id"
|
|
270
270
|
await client.execute_query(
|
|
271
271
|
f"""
|
|
272
272
|
CREATE CONSTRAINT {constraint_name} IF NOT EXISTS
|
|
273
|
-
FOR (n: {label}) REQUIRE n.id IS UNIQUE
|
|
273
|
+
FOR (n: {label.value}) REQUIRE n.id IS UNIQUE
|
|
274
274
|
"""
|
|
275
275
|
)
|
|
276
276
|
|
|
@@ -278,8 +278,8 @@ class Neo4jUploader(Uploader):
|
|
|
278
278
|
logger.info(f"Deleting old data for the record '{file_data.identifier}' (if present).")
|
|
279
279
|
_, summary, _ = await client.execute_query(
|
|
280
280
|
f"""
|
|
281
|
-
MATCH (n: {Label.DOCUMENT} {{id: $identifier}})
|
|
282
|
-
MATCH (n)--(m: {Label.CHUNK}|{Label.UNSTRUCTURED_ELEMENT})
|
|
281
|
+
MATCH (n: {Label.DOCUMENT.value} {{id: $identifier}})
|
|
282
|
+
MATCH (n)--(m: {Label.CHUNK.value}|{Label.UNSTRUCTURED_ELEMENT.value})
|
|
283
283
|
DETACH DELETE m""",
|
|
284
284
|
identifier=file_data.identifier,
|
|
285
285
|
)
|
|
@@ -349,7 +349,7 @@ class Neo4jUploader(Uploader):
|
|
|
349
349
|
|
|
350
350
|
@staticmethod
|
|
351
351
|
def _create_nodes_query(nodes: list[_Node], labels: tuple[Label, ...]) -> tuple[str, dict]:
|
|
352
|
-
labels_string = ", ".join(labels)
|
|
352
|
+
labels_string = ", ".join([label.value for label in labels])
|
|
353
353
|
logger.info(f"Preparing MERGE query for {len(nodes)} nodes labeled '{labels_string}'.")
|
|
354
354
|
query_string = f"""
|
|
355
355
|
UNWIND $nodes AS node
|
|
@@ -366,7 +366,7 @@ class Neo4jUploader(Uploader):
|
|
|
366
366
|
UNWIND $edges AS edge
|
|
367
367
|
MATCH (u {{id: edge.source}})
|
|
368
368
|
MATCH (v {{id: edge.destination}})
|
|
369
|
-
MERGE (u)-[:{relationship}]->(v)
|
|
369
|
+
MERGE (u)-[:{relationship.value}]->(v)
|
|
370
370
|
"""
|
|
371
371
|
parameters = {
|
|
372
372
|
"edges": [
|