unstructured-ingest 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/chunkers/test_chunkers.py +0 -11
- test/integration/connectors/conftest.py +11 -1
- test/integration/connectors/databricks_tests/test_volumes_native.py +4 -3
- test/integration/connectors/duckdb/conftest.py +14 -0
- test/integration/connectors/duckdb/test_duckdb.py +51 -44
- test/integration/connectors/duckdb/test_motherduck.py +37 -48
- test/integration/connectors/elasticsearch/test_elasticsearch.py +26 -4
- test/integration/connectors/elasticsearch/test_opensearch.py +26 -3
- test/integration/connectors/sql/test_postgres.py +102 -91
- test/integration/connectors/sql/test_singlestore.py +111 -99
- test/integration/connectors/sql/test_snowflake.py +142 -117
- test/integration/connectors/sql/test_sqlite.py +86 -75
- test/integration/connectors/test_astradb.py +22 -1
- test/integration/connectors/test_azure_ai_search.py +25 -3
- test/integration/connectors/test_chroma.py +120 -0
- test/integration/connectors/test_confluence.py +4 -4
- test/integration/connectors/test_delta_table.py +1 -0
- test/integration/connectors/test_kafka.py +4 -4
- test/integration/connectors/test_milvus.py +21 -0
- test/integration/connectors/test_mongodb.py +3 -3
- test/integration/connectors/test_neo4j.py +236 -0
- test/integration/connectors/test_pinecone.py +25 -1
- test/integration/connectors/test_qdrant.py +25 -2
- test/integration/connectors/test_s3.py +9 -6
- test/integration/connectors/utils/docker.py +6 -0
- test/integration/connectors/utils/validation/__init__.py +0 -0
- test/integration/connectors/utils/validation/destination.py +88 -0
- test/integration/connectors/utils/validation/equality.py +75 -0
- test/integration/connectors/utils/{validation.py → validation/source.py} +15 -91
- test/integration/connectors/utils/validation/utils.py +36 -0
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/utils/chunking.py +11 -0
- unstructured_ingest/utils/data_prep.py +36 -0
- unstructured_ingest/v2/interfaces/upload_stager.py +70 -6
- unstructured_ingest/v2/interfaces/uploader.py +11 -2
- unstructured_ingest/v2/pipeline/steps/stage.py +3 -1
- unstructured_ingest/v2/processes/connectors/astradb.py +8 -30
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +16 -40
- unstructured_ingest/v2/processes/connectors/chroma.py +36 -59
- unstructured_ingest/v2/processes/connectors/couchbase.py +42 -52
- unstructured_ingest/v2/processes/connectors/delta_table.py +11 -33
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +26 -26
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +29 -20
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +37 -44
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +5 -30
- unstructured_ingest/v2/processes/connectors/gitlab.py +32 -31
- unstructured_ingest/v2/processes/connectors/google_drive.py +32 -29
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +2 -4
- unstructured_ingest/v2/processes/connectors/kdbai.py +44 -70
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +8 -10
- unstructured_ingest/v2/processes/connectors/local.py +13 -2
- unstructured_ingest/v2/processes/connectors/milvus.py +16 -57
- unstructured_ingest/v2/processes/connectors/mongodb.py +4 -8
- unstructured_ingest/v2/processes/connectors/neo4j.py +381 -0
- unstructured_ingest/v2/processes/connectors/pinecone.py +3 -33
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +32 -41
- unstructured_ingest/v2/processes/connectors/sql/sql.py +41 -40
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +9 -31
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/METADATA +18 -14
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/RECORD +64 -56
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.9.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
import
|
|
2
|
-
from contextlib import contextmanager
|
|
1
|
+
import json
|
|
3
2
|
from pathlib import Path
|
|
4
3
|
|
|
5
|
-
import pandas as pd
|
|
6
4
|
import pytest
|
|
7
5
|
import singlestoredb as s2
|
|
6
|
+
from _pytest.fixtures import TopRequest
|
|
8
7
|
|
|
9
8
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG, env_setup_path
|
|
10
9
|
from test.integration.connectors.utils.docker_compose import docker_compose_context
|
|
11
|
-
from test.integration.connectors.utils.validation import (
|
|
12
|
-
|
|
10
|
+
from test.integration.connectors.utils.validation.destination import (
|
|
11
|
+
StagerValidationConfigs,
|
|
12
|
+
stager_validation,
|
|
13
|
+
)
|
|
14
|
+
from test.integration.connectors.utils.validation.source import (
|
|
15
|
+
SourceValidationConfigs,
|
|
13
16
|
source_connector_validation,
|
|
14
17
|
)
|
|
15
18
|
from unstructured_ingest.v2.interfaces import FileData
|
|
@@ -29,8 +32,15 @@ from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
|
|
|
29
32
|
SEED_DATA_ROWS = 20
|
|
30
33
|
|
|
31
34
|
|
|
32
|
-
@
|
|
33
|
-
def
|
|
35
|
+
@pytest.fixture
|
|
36
|
+
def source_database_setup() -> dict:
|
|
37
|
+
connect_params = {
|
|
38
|
+
"host": "localhost",
|
|
39
|
+
"port": 3306,
|
|
40
|
+
"database": "ingest_test",
|
|
41
|
+
"user": "root",
|
|
42
|
+
"password": "password",
|
|
43
|
+
}
|
|
34
44
|
with docker_compose_context(
|
|
35
45
|
docker_compose_path=env_setup_path / "sql" / "singlestore" / "source"
|
|
36
46
|
):
|
|
@@ -40,50 +50,40 @@ def singlestore_download_setup(connect_params: dict) -> None:
|
|
|
40
50
|
sql_statment = f"INSERT INTO cars (brand, price) VALUES " f"('brand_{i}', {i})"
|
|
41
51
|
cursor.execute(sql_statment)
|
|
42
52
|
connection.commit()
|
|
43
|
-
yield
|
|
53
|
+
yield connect_params
|
|
44
54
|
|
|
45
55
|
|
|
46
56
|
@pytest.mark.asyncio
|
|
47
57
|
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "sql")
|
|
48
|
-
async def test_singlestore_source():
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
"
|
|
52
|
-
"
|
|
53
|
-
"
|
|
54
|
-
"
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
await source_connector_validation(
|
|
78
|
-
indexer=indexer,
|
|
79
|
-
downloader=downloader,
|
|
80
|
-
configs=ValidationConfigs(
|
|
81
|
-
test_id="singlestore",
|
|
82
|
-
expected_num_files=SEED_DATA_ROWS,
|
|
83
|
-
expected_number_indexed_file_data=4,
|
|
84
|
-
validate_downloaded_files=True,
|
|
85
|
-
),
|
|
86
|
-
)
|
|
58
|
+
async def test_singlestore_source(temp_dir: Path, source_database_setup: dict):
|
|
59
|
+
|
|
60
|
+
connection_config = SingleStoreConnectionConfig(
|
|
61
|
+
host=source_database_setup["host"],
|
|
62
|
+
port=source_database_setup["port"],
|
|
63
|
+
database=source_database_setup["database"],
|
|
64
|
+
user=source_database_setup["user"],
|
|
65
|
+
access_config=SingleStoreAccessConfig(password=source_database_setup["password"]),
|
|
66
|
+
)
|
|
67
|
+
indexer = SingleStoreIndexer(
|
|
68
|
+
connection_config=connection_config,
|
|
69
|
+
index_config=SingleStoreIndexerConfig(table_name="cars", id_column="car_id", batch_size=5),
|
|
70
|
+
)
|
|
71
|
+
downloader = SingleStoreDownloader(
|
|
72
|
+
connection_config=connection_config,
|
|
73
|
+
download_config=SingleStoreDownloaderConfig(
|
|
74
|
+
fields=["car_id", "brand"], download_dir=temp_dir
|
|
75
|
+
),
|
|
76
|
+
)
|
|
77
|
+
await source_connector_validation(
|
|
78
|
+
indexer=indexer,
|
|
79
|
+
downloader=downloader,
|
|
80
|
+
configs=SourceValidationConfigs(
|
|
81
|
+
test_id="singlestore",
|
|
82
|
+
expected_num_files=SEED_DATA_ROWS,
|
|
83
|
+
expected_number_indexed_file_data=4,
|
|
84
|
+
validate_downloaded_files=True,
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
87
|
|
|
88
88
|
|
|
89
89
|
def validate_destination(
|
|
@@ -102,59 +102,71 @@ def validate_destination(
|
|
|
102
102
|
|
|
103
103
|
@pytest.mark.asyncio
|
|
104
104
|
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
|
|
105
|
-
async def test_singlestore_destination(upload_file: Path):
|
|
105
|
+
async def test_singlestore_destination(upload_file: Path, temp_dir: Path):
|
|
106
106
|
mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
|
|
107
107
|
with docker_compose_context(
|
|
108
108
|
docker_compose_path=env_setup_path / "sql" / "singlestore" / "destination"
|
|
109
109
|
):
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
"
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
110
|
+
stager = SingleStoreUploadStager()
|
|
111
|
+
staged_path = stager.run(
|
|
112
|
+
elements_filepath=upload_file,
|
|
113
|
+
file_data=mock_file_data,
|
|
114
|
+
output_dir=temp_dir,
|
|
115
|
+
output_filename=upload_file.name,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# The stager should append the `.json` suffix to the output filename passed in.
|
|
119
|
+
assert staged_path.suffix == upload_file.suffix
|
|
120
|
+
|
|
121
|
+
connect_params = {
|
|
122
|
+
"host": "localhost",
|
|
123
|
+
"port": 3306,
|
|
124
|
+
"database": "ingest_test",
|
|
125
|
+
"user": "root",
|
|
126
|
+
"password": "password",
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
uploader = SingleStoreUploader(
|
|
130
|
+
connection_config=SingleStoreConnectionConfig(
|
|
131
|
+
host=connect_params["host"],
|
|
132
|
+
port=connect_params["port"],
|
|
133
|
+
database=connect_params["database"],
|
|
134
|
+
user=connect_params["user"],
|
|
135
|
+
access_config=SingleStoreAccessConfig(password=connect_params["password"]),
|
|
136
|
+
),
|
|
137
|
+
upload_config=SingleStoreUploaderConfig(
|
|
138
|
+
table_name="elements",
|
|
139
|
+
),
|
|
140
|
+
)
|
|
141
|
+
uploader.precheck()
|
|
142
|
+
uploader.run(path=staged_path, file_data=mock_file_data)
|
|
143
|
+
|
|
144
|
+
with staged_path.open("r") as f:
|
|
145
|
+
staged_data = json.load(f)
|
|
146
|
+
expected_num_elements = len(staged_data)
|
|
147
|
+
validate_destination(
|
|
148
|
+
connect_params=connect_params,
|
|
149
|
+
expected_num_elements=expected_num_elements,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
uploader.run(path=staged_path, file_data=mock_file_data)
|
|
153
|
+
validate_destination(
|
|
154
|
+
connect_params=connect_params,
|
|
155
|
+
expected_num_elements=expected_num_elements,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
|
|
160
|
+
def test_singlestore_stager(
|
|
161
|
+
request: TopRequest,
|
|
162
|
+
upload_file_str: str,
|
|
163
|
+
tmp_path: Path,
|
|
164
|
+
):
|
|
165
|
+
upload_file: Path = request.getfixturevalue(upload_file_str)
|
|
166
|
+
stager = SingleStoreUploadStager()
|
|
167
|
+
stager_validation(
|
|
168
|
+
configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
|
|
169
|
+
input_file=upload_file,
|
|
170
|
+
stager=stager,
|
|
171
|
+
tmp_dir=tmp_path,
|
|
172
|
+
)
|
|
@@ -1,16 +1,19 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import os
|
|
2
|
-
import tempfile
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
|
|
5
|
-
import docker
|
|
6
|
-
import pandas as pd
|
|
7
5
|
import pytest
|
|
8
6
|
import snowflake.connector as sf
|
|
7
|
+
from _pytest.fixtures import TopRequest
|
|
9
8
|
|
|
10
9
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG, env_setup_path
|
|
11
10
|
from test.integration.connectors.utils.docker import container_context
|
|
12
|
-
from test.integration.connectors.utils.validation import (
|
|
13
|
-
|
|
11
|
+
from test.integration.connectors.utils.validation.destination import (
|
|
12
|
+
StagerValidationConfigs,
|
|
13
|
+
stager_validation,
|
|
14
|
+
)
|
|
15
|
+
from test.integration.connectors.utils.validation.source import (
|
|
16
|
+
SourceValidationConfigs,
|
|
14
17
|
source_connector_validation,
|
|
15
18
|
)
|
|
16
19
|
from test.integration.utils import requires_env
|
|
@@ -30,14 +33,15 @@ from unstructured_ingest.v2.processes.connectors.sql.snowflake import (
|
|
|
30
33
|
SEED_DATA_ROWS = 20
|
|
31
34
|
|
|
32
35
|
|
|
33
|
-
def seed_data():
|
|
34
|
-
|
|
35
|
-
user
|
|
36
|
-
password
|
|
37
|
-
account
|
|
38
|
-
database
|
|
39
|
-
host
|
|
40
|
-
|
|
36
|
+
def seed_data() -> dict:
|
|
37
|
+
connect_params = {
|
|
38
|
+
"user": "test",
|
|
39
|
+
"password": "test",
|
|
40
|
+
"account": "test",
|
|
41
|
+
"database": "test",
|
|
42
|
+
"host": "snowflake.localhost.localstack.cloud",
|
|
43
|
+
}
|
|
44
|
+
conn = sf.connect(**connect_params)
|
|
41
45
|
|
|
42
46
|
file = Path(env_setup_path / "sql" / "snowflake" / "source" / "snowflake-schema.sql")
|
|
43
47
|
|
|
@@ -52,16 +56,31 @@ def seed_data():
|
|
|
52
56
|
|
|
53
57
|
cur.close()
|
|
54
58
|
conn.close()
|
|
59
|
+
return connect_params
|
|
55
60
|
|
|
56
61
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
62
|
+
@pytest.fixture
|
|
63
|
+
def source_database_setup() -> dict:
|
|
64
|
+
token = os.getenv("LOCALSTACK_AUTH_TOKEN")
|
|
65
|
+
with container_context(
|
|
66
|
+
image="localstack/snowflake",
|
|
67
|
+
environment={"LOCALSTACK_AUTH_TOKEN": token, "EXTRA_CORS_ALLOWED_ORIGINS": "*"},
|
|
68
|
+
ports={4566: 4566, 443: 443},
|
|
69
|
+
healthcheck_retries=30,
|
|
70
|
+
):
|
|
71
|
+
connect_params = seed_data()
|
|
72
|
+
yield connect_params
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def init_db_destination() -> dict:
|
|
76
|
+
connect_params = {
|
|
77
|
+
"user": "test",
|
|
78
|
+
"password": "test",
|
|
79
|
+
"account": "test",
|
|
80
|
+
"database": "test",
|
|
81
|
+
"host": "snowflake.localhost.localstack.cloud",
|
|
82
|
+
}
|
|
83
|
+
conn = sf.connect(**connect_params)
|
|
65
84
|
|
|
66
85
|
file = Path(env_setup_path / "sql" / "snowflake" / "destination" / "snowflake-schema.sql")
|
|
67
86
|
|
|
@@ -73,52 +92,53 @@ def init_db_destination():
|
|
|
73
92
|
|
|
74
93
|
cur.close()
|
|
75
94
|
conn.close()
|
|
95
|
+
return connect_params
|
|
76
96
|
|
|
77
97
|
|
|
78
|
-
@pytest.
|
|
79
|
-
|
|
80
|
-
@requires_env("LOCALSTACK_AUTH_TOKEN")
|
|
81
|
-
async def test_snowflake_source():
|
|
82
|
-
docker_client = docker.from_env()
|
|
98
|
+
@pytest.fixture
|
|
99
|
+
def destination_database_setup() -> dict:
|
|
83
100
|
token = os.getenv("LOCALSTACK_AUTH_TOKEN")
|
|
84
101
|
with container_context(
|
|
85
|
-
docker_client=docker_client,
|
|
86
102
|
image="localstack/snowflake",
|
|
87
103
|
environment={"LOCALSTACK_AUTH_TOKEN": token, "EXTRA_CORS_ALLOWED_ORIGINS": "*"},
|
|
88
104
|
ports={4566: 4566, 443: 443},
|
|
89
105
|
healthcheck_retries=30,
|
|
90
106
|
):
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
107
|
+
connect_params = init_db_destination()
|
|
108
|
+
yield connect_params
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@pytest.mark.asyncio
|
|
112
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "sql")
|
|
113
|
+
@requires_env("LOCALSTACK_AUTH_TOKEN")
|
|
114
|
+
async def test_snowflake_source(temp_dir: Path, source_database_setup: dict):
|
|
115
|
+
connection_config = SnowflakeConnectionConfig(
|
|
116
|
+
access_config=SnowflakeAccessConfig(password="test"),
|
|
117
|
+
account="test",
|
|
118
|
+
user="test",
|
|
119
|
+
database="test",
|
|
120
|
+
host="snowflake.localhost.localstack.cloud",
|
|
121
|
+
)
|
|
122
|
+
indexer = SnowflakeIndexer(
|
|
123
|
+
connection_config=connection_config,
|
|
124
|
+
index_config=SnowflakeIndexerConfig(table_name="cars", id_column="CAR_ID", batch_size=5),
|
|
125
|
+
)
|
|
126
|
+
downloader = SnowflakeDownloader(
|
|
127
|
+
connection_config=connection_config,
|
|
128
|
+
download_config=SnowflakeDownloaderConfig(
|
|
129
|
+
fields=["CAR_ID", "BRAND"], download_dir=temp_dir
|
|
130
|
+
),
|
|
131
|
+
)
|
|
132
|
+
await source_connector_validation(
|
|
133
|
+
indexer=indexer,
|
|
134
|
+
downloader=downloader,
|
|
135
|
+
configs=SourceValidationConfigs(
|
|
136
|
+
test_id="snowflake",
|
|
137
|
+
expected_num_files=SEED_DATA_ROWS,
|
|
138
|
+
expected_number_indexed_file_data=4,
|
|
139
|
+
validate_downloaded_files=True,
|
|
140
|
+
),
|
|
141
|
+
)
|
|
122
142
|
|
|
123
143
|
|
|
124
144
|
def validate_destination(
|
|
@@ -145,65 +165,70 @@ def validate_destination(
|
|
|
145
165
|
@pytest.mark.asyncio
|
|
146
166
|
@pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
|
|
147
167
|
@requires_env("LOCALSTACK_AUTH_TOKEN")
|
|
148
|
-
async def test_snowflake_destination(
|
|
168
|
+
async def test_snowflake_destination(
|
|
169
|
+
upload_file: Path, temp_dir: Path, destination_database_setup: dict
|
|
170
|
+
):
|
|
149
171
|
# the postgres destination connector doesn't leverage the file data but is required as an input,
|
|
150
172
|
# mocking it with arbitrary values to meet the base requirements:
|
|
151
173
|
mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
connect_params
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
174
|
+
init_db_destination()
|
|
175
|
+
stager = SnowflakeUploadStager()
|
|
176
|
+
staged_path = stager.run(
|
|
177
|
+
elements_filepath=upload_file,
|
|
178
|
+
file_data=mock_file_data,
|
|
179
|
+
output_dir=temp_dir,
|
|
180
|
+
output_filename=upload_file.name,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# The stager should append the `.json` suffix to the output filename passed in.
|
|
184
|
+
assert staged_path.suffix == upload_file.suffix
|
|
185
|
+
|
|
186
|
+
connect_params = {
|
|
187
|
+
"user": "test",
|
|
188
|
+
"password": "test",
|
|
189
|
+
"account": "test",
|
|
190
|
+
"database": "test",
|
|
191
|
+
"host": "snowflake.localhost.localstack.cloud",
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
uploader = SnowflakeUploader(
|
|
195
|
+
connection_config=SnowflakeConnectionConfig(
|
|
196
|
+
access_config=SnowflakeAccessConfig(password=connect_params["password"]),
|
|
197
|
+
account=connect_params["account"],
|
|
198
|
+
user=connect_params["user"],
|
|
199
|
+
database=connect_params["database"],
|
|
200
|
+
host=connect_params["host"],
|
|
201
|
+
)
|
|
202
|
+
)
|
|
203
|
+
uploader.precheck()
|
|
204
|
+
uploader.run(path=staged_path, file_data=mock_file_data)
|
|
205
|
+
|
|
206
|
+
with staged_path.open("r") as f:
|
|
207
|
+
staged_data = json.load(f)
|
|
208
|
+
expected_num_elements = len(staged_data)
|
|
209
|
+
validate_destination(
|
|
210
|
+
connect_params=connect_params,
|
|
211
|
+
expected_num_elements=expected_num_elements,
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
uploader.run(path=staged_path, file_data=mock_file_data)
|
|
215
|
+
validate_destination(
|
|
216
|
+
connect_params=connect_params,
|
|
217
|
+
expected_num_elements=expected_num_elements,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
@pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
|
|
222
|
+
def test_snowflake_stager(
|
|
223
|
+
request: TopRequest,
|
|
224
|
+
upload_file_str: str,
|
|
225
|
+
tmp_path: Path,
|
|
226
|
+
):
|
|
227
|
+
upload_file: Path = request.getfixturevalue(upload_file_str)
|
|
228
|
+
stager = SnowflakeUploadStager()
|
|
229
|
+
stager_validation(
|
|
230
|
+
configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
|
|
231
|
+
input_file=upload_file,
|
|
232
|
+
stager=stager,
|
|
233
|
+
tmp_dir=tmp_path,
|
|
234
|
+
)
|