unstructured-ingest 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/chunkers/test_chunkers.py +0 -11
- test/integration/connectors/conftest.py +11 -1
- test/integration/connectors/databricks_tests/test_volumes_native.py +4 -3
- test/integration/connectors/duckdb/conftest.py +14 -0
- test/integration/connectors/duckdb/test_duckdb.py +51 -44
- test/integration/connectors/duckdb/test_motherduck.py +37 -48
- test/integration/connectors/elasticsearch/test_elasticsearch.py +26 -4
- test/integration/connectors/elasticsearch/test_opensearch.py +26 -3
- test/integration/connectors/sql/test_postgres.py +103 -92
- test/integration/connectors/sql/test_singlestore.py +112 -100
- test/integration/connectors/sql/test_snowflake.py +142 -117
- test/integration/connectors/sql/test_sqlite.py +87 -76
- test/integration/connectors/test_astradb.py +62 -1
- test/integration/connectors/test_azure_ai_search.py +25 -3
- test/integration/connectors/test_chroma.py +120 -0
- test/integration/connectors/test_confluence.py +4 -4
- test/integration/connectors/test_delta_table.py +1 -0
- test/integration/connectors/test_kafka.py +6 -6
- test/integration/connectors/test_milvus.py +21 -0
- test/integration/connectors/test_mongodb.py +7 -4
- test/integration/connectors/test_neo4j.py +236 -0
- test/integration/connectors/test_pinecone.py +25 -1
- test/integration/connectors/test_qdrant.py +25 -2
- test/integration/connectors/test_s3.py +9 -6
- test/integration/connectors/utils/docker.py +6 -0
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +88 -0
- test/integration/connectors/utils/validation/equality.py +75 -0
- test/integration/connectors/utils/{validation.py → validation/source.py} +42 -98
- test/integration/connectors/utils/validation/utils.py +36 -0
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/utils/chunking.py +11 -0
- unstructured_ingest/utils/data_prep.py +36 -0
- unstructured_ingest/v2/interfaces/__init__.py +3 -1
- unstructured_ingest/v2/interfaces/file_data.py +58 -14
- unstructured_ingest/v2/interfaces/upload_stager.py +70 -6
- unstructured_ingest/v2/interfaces/uploader.py +11 -2
- unstructured_ingest/v2/pipeline/steps/chunk.py +2 -1
- unstructured_ingest/v2/pipeline/steps/download.py +5 -4
- unstructured_ingest/v2/pipeline/steps/embed.py +2 -1
- unstructured_ingest/v2/pipeline/steps/filter.py +2 -2
- unstructured_ingest/v2/pipeline/steps/index.py +4 -4
- unstructured_ingest/v2/pipeline/steps/partition.py +3 -2
- unstructured_ingest/v2/pipeline/steps/stage.py +5 -3
- unstructured_ingest/v2/pipeline/steps/uncompress.py +2 -2
- unstructured_ingest/v2/pipeline/steps/upload.py +3 -3
- unstructured_ingest/v2/processes/connectors/__init__.py +3 -0
- unstructured_ingest/v2/processes/connectors/astradb.py +43 -63
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +16 -40
- unstructured_ingest/v2/processes/connectors/chroma.py +36 -59
- unstructured_ingest/v2/processes/connectors/couchbase.py +92 -93
- unstructured_ingest/v2/processes/connectors/delta_table.py +11 -33
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +26 -26
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +29 -20
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +37 -44
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +46 -75
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +12 -35
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +12 -35
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +15 -42
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +33 -29
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +12 -34
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +13 -37
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +19 -33
- unstructured_ingest/v2/processes/connectors/gitlab.py +32 -31
- unstructured_ingest/v2/processes/connectors/google_drive.py +32 -29
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +2 -4
- unstructured_ingest/v2/processes/connectors/kdbai.py +44 -70
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +8 -10
- unstructured_ingest/v2/processes/connectors/local.py +13 -2
- unstructured_ingest/v2/processes/connectors/milvus.py +16 -57
- unstructured_ingest/v2/processes/connectors/mongodb.py +99 -108
- unstructured_ingest/v2/processes/connectors/neo4j.py +383 -0
- unstructured_ingest/v2/processes/connectors/onedrive.py +1 -1
- unstructured_ingest/v2/processes/connectors/pinecone.py +3 -33
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +32 -41
- unstructured_ingest/v2/processes/connectors/sql/postgres.py +5 -5
- unstructured_ingest/v2/processes/connectors/sql/singlestore.py +5 -5
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +5 -5
- unstructured_ingest/v2/processes/connectors/sql/sql.py +72 -66
- unstructured_ingest/v2/processes/connectors/sql/sqlite.py +5 -5
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +9 -31
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/METADATA +20 -15
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/RECORD +87 -79
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
import
|
|
2
|
-
from contextlib import contextmanager
|
|
1
|
+
import json
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
|
-
import pandas as pd
|
|
6
4
|
import pytest
|
|
5
|
+
from _pytest.fixtures import TopRequest
|
|
7
6
|
from psycopg2 import connect
|
|
8
7
|
|
|
9
8
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG, env_setup_path
|
|
10
9
|
from test.integration.connectors.utils.docker_compose import docker_compose_context
|
|
11
|
-
from test.integration.connectors.utils.validation import (
|
|
12
|
-
|
|
10
|
+
from test.integration.connectors.utils.validation.destination import (
|
|
11
|
+
StagerValidationConfigs,
|
|
12
|
+
stager_validation,
|
|
13
|
+
)
|
|
14
|
+
from test.integration.connectors.utils.validation.source import (
|
|
15
|
+
SourceValidationConfigs,
|
|
13
16
|
source_connector_validation,
|
|
14
17
|
)
|
|
15
18
|
from unstructured_ingest.v2.interfaces import FileData
|
|
@@ -25,16 +28,17 @@ from unstructured_ingest.v2.processes.connectors.sql.postgres import (
|
|
|
25
28
|
PostgresUploadStager,
|
|
26
29
|
)
|
|
27
30
|
|
|
28
|
-
SEED_DATA_ROWS =
|
|
31
|
+
SEED_DATA_ROWS = 10
|
|
29
32
|
|
|
30
33
|
|
|
31
|
-
@
|
|
32
|
-
def
|
|
34
|
+
@pytest.fixture
|
|
35
|
+
def source_database_setup() -> str:
|
|
36
|
+
db_name = "test_db"
|
|
33
37
|
with docker_compose_context(docker_compose_path=env_setup_path / "sql" / "postgres" / "source"):
|
|
34
38
|
connection = connect(
|
|
35
39
|
user="unstructured",
|
|
36
40
|
password="test",
|
|
37
|
-
dbname=
|
|
41
|
+
dbname=db_name,
|
|
38
42
|
host="localhost",
|
|
39
43
|
port=5433,
|
|
40
44
|
)
|
|
@@ -43,12 +47,12 @@ def postgres_download_setup() -> None:
|
|
|
43
47
|
sql_statment = f"INSERT INTO cars (brand, price) VALUES " f"('brand_{i}', {i})"
|
|
44
48
|
cursor.execute(sql_statment)
|
|
45
49
|
connection.commit()
|
|
46
|
-
yield
|
|
50
|
+
yield db_name
|
|
47
51
|
|
|
48
52
|
|
|
49
53
|
@pytest.mark.asyncio
|
|
50
54
|
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "sql")
|
|
51
|
-
async def test_postgres_source():
|
|
55
|
+
async def test_postgres_source(temp_dir: Path, source_database_setup: str):
|
|
52
56
|
connect_params = {
|
|
53
57
|
"host": "localhost",
|
|
54
58
|
"port": 5433,
|
|
@@ -56,37 +60,31 @@ async def test_postgres_source():
|
|
|
56
60
|
"user": "unstructured",
|
|
57
61
|
"password": "test",
|
|
58
62
|
}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
test_id="postgres",
|
|
85
|
-
expected_num_files=SEED_DATA_ROWS,
|
|
86
|
-
expected_number_indexed_file_data=4,
|
|
87
|
-
validate_downloaded_files=True,
|
|
88
|
-
),
|
|
89
|
-
)
|
|
63
|
+
connection_config = PostgresConnectionConfig(
|
|
64
|
+
host=connect_params["host"],
|
|
65
|
+
port=connect_params["port"],
|
|
66
|
+
database=connect_params["database"],
|
|
67
|
+
username=connect_params["user"],
|
|
68
|
+
access_config=PostgresAccessConfig(password=connect_params["password"]),
|
|
69
|
+
)
|
|
70
|
+
indexer = PostgresIndexer(
|
|
71
|
+
connection_config=connection_config,
|
|
72
|
+
index_config=PostgresIndexerConfig(table_name="cars", id_column="car_id", batch_size=6),
|
|
73
|
+
)
|
|
74
|
+
downloader = PostgresDownloader(
|
|
75
|
+
connection_config=connection_config,
|
|
76
|
+
download_config=PostgresDownloaderConfig(fields=["car_id", "brand"], download_dir=temp_dir),
|
|
77
|
+
)
|
|
78
|
+
await source_connector_validation(
|
|
79
|
+
indexer=indexer,
|
|
80
|
+
downloader=downloader,
|
|
81
|
+
configs=SourceValidationConfigs(
|
|
82
|
+
test_id="postgres",
|
|
83
|
+
expected_num_files=SEED_DATA_ROWS,
|
|
84
|
+
expected_number_indexed_file_data=2,
|
|
85
|
+
validate_downloaded_files=True,
|
|
86
|
+
),
|
|
87
|
+
)
|
|
90
88
|
|
|
91
89
|
|
|
92
90
|
def validate_destination(
|
|
@@ -118,63 +116,76 @@ def validate_destination(
|
|
|
118
116
|
|
|
119
117
|
@pytest.mark.asyncio
|
|
120
118
|
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
|
|
121
|
-
async def test_postgres_destination(upload_file: Path):
|
|
119
|
+
async def test_postgres_destination(upload_file: Path, temp_dir: Path):
|
|
122
120
|
# the postgres destination connector doesn't leverage the file data but is required as an input,
|
|
123
121
|
# mocking it with arbitrary values to meet the base requirements:
|
|
124
122
|
mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
|
|
125
123
|
with docker_compose_context(
|
|
126
124
|
docker_compose_path=env_setup_path / "sql" / "postgres" / "destination"
|
|
127
125
|
):
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
}
|
|
136
|
-
if stager.is_async():
|
|
137
|
-
staged_path = await stager.run_async(**stager_params)
|
|
138
|
-
else:
|
|
139
|
-
staged_path = stager.run(**stager_params)
|
|
140
|
-
|
|
141
|
-
# The stager should append the `.json` suffix to the output filename passed in.
|
|
142
|
-
assert staged_path.name == "test_db.json"
|
|
143
|
-
|
|
144
|
-
connect_params = {
|
|
145
|
-
"host": "localhost",
|
|
146
|
-
"port": 5433,
|
|
147
|
-
"database": "elements",
|
|
148
|
-
"user": "unstructured",
|
|
149
|
-
"password": "test",
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
uploader = PostgresUploader(
|
|
153
|
-
connection_config=PostgresConnectionConfig(
|
|
154
|
-
host=connect_params["host"],
|
|
155
|
-
port=connect_params["port"],
|
|
156
|
-
database=connect_params["database"],
|
|
157
|
-
username=connect_params["user"],
|
|
158
|
-
access_config=PostgresAccessConfig(password=connect_params["password"]),
|
|
159
|
-
)
|
|
160
|
-
)
|
|
126
|
+
stager = PostgresUploadStager()
|
|
127
|
+
staged_path = stager.run(
|
|
128
|
+
elements_filepath=upload_file,
|
|
129
|
+
file_data=mock_file_data,
|
|
130
|
+
output_dir=temp_dir,
|
|
131
|
+
output_filename=upload_file.name,
|
|
132
|
+
)
|
|
161
133
|
|
|
162
|
-
|
|
134
|
+
# The stager should append the `.json` suffix to the output filename passed in.
|
|
135
|
+
assert staged_path.suffix == upload_file.suffix
|
|
163
136
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
test_embedding=sample_element["embeddings"],
|
|
172
|
-
)
|
|
137
|
+
connect_params = {
|
|
138
|
+
"host": "localhost",
|
|
139
|
+
"port": 5433,
|
|
140
|
+
"database": "elements",
|
|
141
|
+
"user": "unstructured",
|
|
142
|
+
"password": "test",
|
|
143
|
+
}
|
|
173
144
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
145
|
+
uploader = PostgresUploader(
|
|
146
|
+
connection_config=PostgresConnectionConfig(
|
|
147
|
+
host=connect_params["host"],
|
|
148
|
+
port=connect_params["port"],
|
|
149
|
+
database=connect_params["database"],
|
|
150
|
+
username=connect_params["user"],
|
|
151
|
+
access_config=PostgresAccessConfig(password=connect_params["password"]),
|
|
180
152
|
)
|
|
153
|
+
)
|
|
154
|
+
uploader.precheck()
|
|
155
|
+
uploader.run(path=staged_path, file_data=mock_file_data)
|
|
156
|
+
|
|
157
|
+
with staged_path.open("r") as f:
|
|
158
|
+
staged_data = json.load(f)
|
|
159
|
+
|
|
160
|
+
sample_element = staged_data[0]
|
|
161
|
+
expected_num_elements = len(staged_data)
|
|
162
|
+
validate_destination(
|
|
163
|
+
connect_params=connect_params,
|
|
164
|
+
expected_num_elements=expected_num_elements,
|
|
165
|
+
expected_text=sample_element["text"],
|
|
166
|
+
test_embedding=sample_element["embeddings"],
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
uploader.run(path=staged_path, file_data=mock_file_data)
|
|
170
|
+
validate_destination(
|
|
171
|
+
connect_params=connect_params,
|
|
172
|
+
expected_num_elements=expected_num_elements,
|
|
173
|
+
expected_text=sample_element["text"],
|
|
174
|
+
test_embedding=sample_element["embeddings"],
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
|
|
179
|
+
def test_postgres_stager(
|
|
180
|
+
request: TopRequest,
|
|
181
|
+
upload_file_str: str,
|
|
182
|
+
tmp_path: Path,
|
|
183
|
+
):
|
|
184
|
+
upload_file: Path = request.getfixturevalue(upload_file_str)
|
|
185
|
+
stager = PostgresUploadStager()
|
|
186
|
+
stager_validation(
|
|
187
|
+
configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
|
|
188
|
+
input_file=upload_file,
|
|
189
|
+
stager=stager,
|
|
190
|
+
tmp_dir=tmp_path,
|
|
191
|
+
)
|
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
import
|
|
2
|
-
from contextlib import contextmanager
|
|
1
|
+
import json
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
|
-
import pandas as pd
|
|
6
4
|
import pytest
|
|
7
5
|
import singlestoredb as s2
|
|
6
|
+
from _pytest.fixtures import TopRequest
|
|
8
7
|
|
|
9
8
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG, env_setup_path
|
|
10
9
|
from test.integration.connectors.utils.docker_compose import docker_compose_context
|
|
11
|
-
from test.integration.connectors.utils.validation import (
|
|
12
|
-
|
|
10
|
+
from test.integration.connectors.utils.validation.destination import (
|
|
11
|
+
StagerValidationConfigs,
|
|
12
|
+
stager_validation,
|
|
13
|
+
)
|
|
14
|
+
from test.integration.connectors.utils.validation.source import (
|
|
15
|
+
SourceValidationConfigs,
|
|
13
16
|
source_connector_validation,
|
|
14
17
|
)
|
|
15
18
|
from unstructured_ingest.v2.interfaces import FileData
|
|
@@ -26,11 +29,18 @@ from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
|
|
|
26
29
|
SingleStoreUploadStager,
|
|
27
30
|
)
|
|
28
31
|
|
|
29
|
-
SEED_DATA_ROWS =
|
|
32
|
+
SEED_DATA_ROWS = 10
|
|
30
33
|
|
|
31
34
|
|
|
32
|
-
@
|
|
33
|
-
def
|
|
35
|
+
@pytest.fixture
|
|
36
|
+
def source_database_setup() -> dict:
|
|
37
|
+
connect_params = {
|
|
38
|
+
"host": "localhost",
|
|
39
|
+
"port": 3306,
|
|
40
|
+
"database": "ingest_test",
|
|
41
|
+
"user": "root",
|
|
42
|
+
"password": "password",
|
|
43
|
+
}
|
|
34
44
|
with docker_compose_context(
|
|
35
45
|
docker_compose_path=env_setup_path / "sql" / "singlestore" / "source"
|
|
36
46
|
):
|
|
@@ -40,50 +50,40 @@ def singlestore_download_setup(connect_params: dict) -> None:
|
|
|
40
50
|
sql_statment = f"INSERT INTO cars (brand, price) VALUES " f"('brand_{i}', {i})"
|
|
41
51
|
cursor.execute(sql_statment)
|
|
42
52
|
connection.commit()
|
|
43
|
-
yield
|
|
53
|
+
yield connect_params
|
|
44
54
|
|
|
45
55
|
|
|
46
56
|
@pytest.mark.asyncio
|
|
47
57
|
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "sql")
|
|
48
|
-
async def test_singlestore_source():
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
"
|
|
52
|
-
"
|
|
53
|
-
"
|
|
54
|
-
"
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
await source_connector_validation(
|
|
78
|
-
indexer=indexer,
|
|
79
|
-
downloader=downloader,
|
|
80
|
-
configs=ValidationConfigs(
|
|
81
|
-
test_id="singlestore",
|
|
82
|
-
expected_num_files=SEED_DATA_ROWS,
|
|
83
|
-
expected_number_indexed_file_data=4,
|
|
84
|
-
validate_downloaded_files=True,
|
|
85
|
-
),
|
|
86
|
-
)
|
|
58
|
+
async def test_singlestore_source(temp_dir: Path, source_database_setup: dict):
|
|
59
|
+
|
|
60
|
+
connection_config = SingleStoreConnectionConfig(
|
|
61
|
+
host=source_database_setup["host"],
|
|
62
|
+
port=source_database_setup["port"],
|
|
63
|
+
database=source_database_setup["database"],
|
|
64
|
+
user=source_database_setup["user"],
|
|
65
|
+
access_config=SingleStoreAccessConfig(password=source_database_setup["password"]),
|
|
66
|
+
)
|
|
67
|
+
indexer = SingleStoreIndexer(
|
|
68
|
+
connection_config=connection_config,
|
|
69
|
+
index_config=SingleStoreIndexerConfig(table_name="cars", id_column="car_id", batch_size=6),
|
|
70
|
+
)
|
|
71
|
+
downloader = SingleStoreDownloader(
|
|
72
|
+
connection_config=connection_config,
|
|
73
|
+
download_config=SingleStoreDownloaderConfig(
|
|
74
|
+
fields=["car_id", "brand"], download_dir=temp_dir
|
|
75
|
+
),
|
|
76
|
+
)
|
|
77
|
+
await source_connector_validation(
|
|
78
|
+
indexer=indexer,
|
|
79
|
+
downloader=downloader,
|
|
80
|
+
configs=SourceValidationConfigs(
|
|
81
|
+
test_id="singlestore",
|
|
82
|
+
expected_num_files=SEED_DATA_ROWS,
|
|
83
|
+
expected_number_indexed_file_data=2,
|
|
84
|
+
validate_downloaded_files=True,
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
def validate_destination(
|
|
@@ -102,59 +102,71 @@ def validate_destination(
|
|
|
102
102
|
|
|
103
103
|
@pytest.mark.asyncio
|
|
104
104
|
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
|
|
105
|
-
async def test_singlestore_destination(upload_file: Path):
|
|
105
|
+
async def test_singlestore_destination(upload_file: Path, temp_dir: Path):
|
|
106
106
|
mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
|
|
107
107
|
with docker_compose_context(
|
|
108
108
|
docker_compose_path=env_setup_path / "sql" / "singlestore" / "destination"
|
|
109
109
|
):
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
"
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
110
|
+
stager = SingleStoreUploadStager()
|
|
111
|
+
staged_path = stager.run(
|
|
112
|
+
elements_filepath=upload_file,
|
|
113
|
+
file_data=mock_file_data,
|
|
114
|
+
output_dir=temp_dir,
|
|
115
|
+
output_filename=upload_file.name,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# The stager should append the `.json` suffix to the output filename passed in.
|
|
119
|
+
assert staged_path.suffix == upload_file.suffix
|
|
120
|
+
|
|
121
|
+
connect_params = {
|
|
122
|
+
"host": "localhost",
|
|
123
|
+
"port": 3306,
|
|
124
|
+
"database": "ingest_test",
|
|
125
|
+
"user": "root",
|
|
126
|
+
"password": "password",
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
uploader = SingleStoreUploader(
|
|
130
|
+
connection_config=SingleStoreConnectionConfig(
|
|
131
|
+
host=connect_params["host"],
|
|
132
|
+
port=connect_params["port"],
|
|
133
|
+
database=connect_params["database"],
|
|
134
|
+
user=connect_params["user"],
|
|
135
|
+
access_config=SingleStoreAccessConfig(password=connect_params["password"]),
|
|
136
|
+
),
|
|
137
|
+
upload_config=SingleStoreUploaderConfig(
|
|
138
|
+
table_name="elements",
|
|
139
|
+
),
|
|
140
|
+
)
|
|
141
|
+
uploader.precheck()
|
|
142
|
+
uploader.run(path=staged_path, file_data=mock_file_data)
|
|
143
|
+
|
|
144
|
+
with staged_path.open("r") as f:
|
|
145
|
+
staged_data = json.load(f)
|
|
146
|
+
expected_num_elements = len(staged_data)
|
|
147
|
+
validate_destination(
|
|
148
|
+
connect_params=connect_params,
|
|
149
|
+
expected_num_elements=expected_num_elements,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
uploader.run(path=staged_path, file_data=mock_file_data)
|
|
153
|
+
validate_destination(
|
|
154
|
+
connect_params=connect_params,
|
|
155
|
+
expected_num_elements=expected_num_elements,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
|
|
160
|
+
def test_singlestore_stager(
|
|
161
|
+
request: TopRequest,
|
|
162
|
+
upload_file_str: str,
|
|
163
|
+
tmp_path: Path,
|
|
164
|
+
):
|
|
165
|
+
upload_file: Path = request.getfixturevalue(upload_file_str)
|
|
166
|
+
stager = SingleStoreUploadStager()
|
|
167
|
+
stager_validation(
|
|
168
|
+
configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
|
|
169
|
+
input_file=upload_file,
|
|
170
|
+
stager=stager,
|
|
171
|
+
tmp_dir=tmp_path,
|
|
172
|
+
)
|