unstructured-ingest 0.3.7__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (64) hide show
  1. test/integration/chunkers/test_chunkers.py +0 -11
  2. test/integration/connectors/conftest.py +11 -1
  3. test/integration/connectors/databricks_tests/test_volumes_native.py +4 -3
  4. test/integration/connectors/duckdb/conftest.py +14 -0
  5. test/integration/connectors/duckdb/test_duckdb.py +51 -44
  6. test/integration/connectors/duckdb/test_motherduck.py +37 -48
  7. test/integration/connectors/elasticsearch/test_elasticsearch.py +26 -4
  8. test/integration/connectors/elasticsearch/test_opensearch.py +26 -3
  9. test/integration/connectors/sql/test_postgres.py +102 -91
  10. test/integration/connectors/sql/test_singlestore.py +111 -99
  11. test/integration/connectors/sql/test_snowflake.py +142 -117
  12. test/integration/connectors/sql/test_sqlite.py +86 -75
  13. test/integration/connectors/test_astradb.py +22 -1
  14. test/integration/connectors/test_azure_ai_search.py +25 -3
  15. test/integration/connectors/test_chroma.py +120 -0
  16. test/integration/connectors/test_confluence.py +4 -4
  17. test/integration/connectors/test_delta_table.py +1 -0
  18. test/integration/connectors/test_kafka.py +4 -4
  19. test/integration/connectors/test_milvus.py +21 -0
  20. test/integration/connectors/test_mongodb.py +3 -3
  21. test/integration/connectors/test_neo4j.py +236 -0
  22. test/integration/connectors/test_pinecone.py +25 -1
  23. test/integration/connectors/test_qdrant.py +25 -2
  24. test/integration/connectors/test_s3.py +9 -6
  25. test/integration/connectors/utils/docker.py +6 -0
  26. test/integration/connectors/utils/validation/__init__.py +0 -0
  27. test/integration/connectors/utils/validation/destination.py +88 -0
  28. test/integration/connectors/utils/validation/equality.py +75 -0
  29. test/integration/connectors/utils/{validation.py → validation/source.py} +15 -91
  30. test/integration/connectors/utils/validation/utils.py +36 -0
  31. unstructured_ingest/__version__.py +1 -1
  32. unstructured_ingest/utils/chunking.py +11 -0
  33. unstructured_ingest/utils/data_prep.py +36 -0
  34. unstructured_ingest/v2/interfaces/upload_stager.py +70 -6
  35. unstructured_ingest/v2/interfaces/uploader.py +11 -2
  36. unstructured_ingest/v2/pipeline/steps/stage.py +3 -1
  37. unstructured_ingest/v2/processes/connectors/astradb.py +8 -30
  38. unstructured_ingest/v2/processes/connectors/azure_ai_search.py +16 -40
  39. unstructured_ingest/v2/processes/connectors/chroma.py +36 -59
  40. unstructured_ingest/v2/processes/connectors/couchbase.py +42 -52
  41. unstructured_ingest/v2/processes/connectors/delta_table.py +11 -33
  42. unstructured_ingest/v2/processes/connectors/duckdb/base.py +26 -26
  43. unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +29 -20
  44. unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +37 -44
  45. unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +5 -30
  46. unstructured_ingest/v2/processes/connectors/gitlab.py +32 -31
  47. unstructured_ingest/v2/processes/connectors/google_drive.py +32 -29
  48. unstructured_ingest/v2/processes/connectors/kafka/kafka.py +2 -4
  49. unstructured_ingest/v2/processes/connectors/kdbai.py +44 -70
  50. unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +8 -10
  51. unstructured_ingest/v2/processes/connectors/local.py +13 -2
  52. unstructured_ingest/v2/processes/connectors/milvus.py +16 -57
  53. unstructured_ingest/v2/processes/connectors/mongodb.py +4 -8
  54. unstructured_ingest/v2/processes/connectors/neo4j.py +381 -0
  55. unstructured_ingest/v2/processes/connectors/pinecone.py +23 -65
  56. unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +32 -41
  57. unstructured_ingest/v2/processes/connectors/sql/sql.py +41 -40
  58. unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +9 -31
  59. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/METADATA +21 -17
  60. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/RECORD +64 -56
  61. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/LICENSE.md +0 -0
  62. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/WHEEL +0 -0
  63. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/entry_points.txt +0 -0
  64. {unstructured_ingest-0.3.7.dist-info → unstructured_ingest-0.3.9.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,18 @@
1
- import tempfile
2
- from contextlib import contextmanager
1
+ import json
3
2
  from pathlib import Path
4
3
 
5
- import pandas as pd
6
4
  import pytest
7
5
  import singlestoredb as s2
6
+ from _pytest.fixtures import TopRequest
8
7
 
9
8
  from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG, env_setup_path
10
9
  from test.integration.connectors.utils.docker_compose import docker_compose_context
11
- from test.integration.connectors.utils.validation import (
12
- ValidationConfigs,
10
+ from test.integration.connectors.utils.validation.destination import (
11
+ StagerValidationConfigs,
12
+ stager_validation,
13
+ )
14
+ from test.integration.connectors.utils.validation.source import (
15
+ SourceValidationConfigs,
13
16
  source_connector_validation,
14
17
  )
15
18
  from unstructured_ingest.v2.interfaces import FileData
@@ -29,8 +32,15 @@ from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
29
32
  SEED_DATA_ROWS = 20
30
33
 
31
34
 
32
- @contextmanager
33
- def singlestore_download_setup(connect_params: dict) -> None:
35
+ @pytest.fixture
36
+ def source_database_setup() -> dict:
37
+ connect_params = {
38
+ "host": "localhost",
39
+ "port": 3306,
40
+ "database": "ingest_test",
41
+ "user": "root",
42
+ "password": "password",
43
+ }
34
44
  with docker_compose_context(
35
45
  docker_compose_path=env_setup_path / "sql" / "singlestore" / "source"
36
46
  ):
@@ -40,50 +50,40 @@ def singlestore_download_setup(connect_params: dict) -> None:
40
50
  sql_statment = f"INSERT INTO cars (brand, price) VALUES " f"('brand_{i}', {i})"
41
51
  cursor.execute(sql_statment)
42
52
  connection.commit()
43
- yield
53
+ yield connect_params
44
54
 
45
55
 
46
56
  @pytest.mark.asyncio
47
57
  @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "sql")
48
- async def test_singlestore_source():
49
- connect_params = {
50
- "host": "localhost",
51
- "port": 3306,
52
- "database": "ingest_test",
53
- "user": "root",
54
- "password": "password",
55
- }
56
- with singlestore_download_setup(connect_params=connect_params):
57
- with tempfile.TemporaryDirectory() as tmpdir:
58
- connection_config = SingleStoreConnectionConfig(
59
- host=connect_params["host"],
60
- port=connect_params["port"],
61
- database=connect_params["database"],
62
- user=connect_params["user"],
63
- access_config=SingleStoreAccessConfig(password=connect_params["password"]),
64
- )
65
- indexer = SingleStoreIndexer(
66
- connection_config=connection_config,
67
- index_config=SingleStoreIndexerConfig(
68
- table_name="cars", id_column="car_id", batch_size=5
69
- ),
70
- )
71
- downloader = SingleStoreDownloader(
72
- connection_config=connection_config,
73
- download_config=SingleStoreDownloaderConfig(
74
- fields=["car_id", "brand"], download_dir=Path(tmpdir)
75
- ),
76
- )
77
- await source_connector_validation(
78
- indexer=indexer,
79
- downloader=downloader,
80
- configs=ValidationConfigs(
81
- test_id="singlestore",
82
- expected_num_files=SEED_DATA_ROWS,
83
- expected_number_indexed_file_data=4,
84
- validate_downloaded_files=True,
85
- ),
86
- )
58
+ async def test_singlestore_source(temp_dir: Path, source_database_setup: dict):
59
+
60
+ connection_config = SingleStoreConnectionConfig(
61
+ host=source_database_setup["host"],
62
+ port=source_database_setup["port"],
63
+ database=source_database_setup["database"],
64
+ user=source_database_setup["user"],
65
+ access_config=SingleStoreAccessConfig(password=source_database_setup["password"]),
66
+ )
67
+ indexer = SingleStoreIndexer(
68
+ connection_config=connection_config,
69
+ index_config=SingleStoreIndexerConfig(table_name="cars", id_column="car_id", batch_size=5),
70
+ )
71
+ downloader = SingleStoreDownloader(
72
+ connection_config=connection_config,
73
+ download_config=SingleStoreDownloaderConfig(
74
+ fields=["car_id", "brand"], download_dir=temp_dir
75
+ ),
76
+ )
77
+ await source_connector_validation(
78
+ indexer=indexer,
79
+ downloader=downloader,
80
+ configs=SourceValidationConfigs(
81
+ test_id="singlestore",
82
+ expected_num_files=SEED_DATA_ROWS,
83
+ expected_number_indexed_file_data=4,
84
+ validate_downloaded_files=True,
85
+ ),
86
+ )
87
87
 
88
88
 
89
89
  def validate_destination(
@@ -102,59 +102,71 @@ def validate_destination(
102
102
 
103
103
  @pytest.mark.asyncio
104
104
  @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
105
- async def test_singlestore_destination(upload_file: Path):
105
+ async def test_singlestore_destination(upload_file: Path, temp_dir: Path):
106
106
  mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
107
107
  with docker_compose_context(
108
108
  docker_compose_path=env_setup_path / "sql" / "singlestore" / "destination"
109
109
  ):
110
- with tempfile.TemporaryDirectory() as tmpdir:
111
- stager = SingleStoreUploadStager()
112
- stager_params = {
113
- "elements_filepath": upload_file,
114
- "file_data": mock_file_data,
115
- "output_dir": Path(tmpdir),
116
- "output_filename": "test_db",
117
- }
118
- if stager.is_async():
119
- staged_path = await stager.run_async(**stager_params)
120
- else:
121
- staged_path = stager.run(**stager_params)
122
-
123
- # The stager should append the `.json` suffix to the output filename passed in.
124
- assert staged_path.name == "test_db.json"
125
-
126
- connect_params = {
127
- "host": "localhost",
128
- "port": 3306,
129
- "database": "ingest_test",
130
- "user": "root",
131
- "password": "password",
132
- }
133
-
134
- uploader = SingleStoreUploader(
135
- connection_config=SingleStoreConnectionConfig(
136
- host=connect_params["host"],
137
- port=connect_params["port"],
138
- database=connect_params["database"],
139
- user=connect_params["user"],
140
- access_config=SingleStoreAccessConfig(password=connect_params["password"]),
141
- ),
142
- upload_config=SingleStoreUploaderConfig(
143
- table_name="elements",
144
- ),
145
- )
146
-
147
- uploader.run(path=staged_path, file_data=mock_file_data)
148
-
149
- staged_df = pd.read_json(staged_path, orient="records", lines=True)
150
- expected_num_elements = len(staged_df)
151
- validate_destination(
152
- connect_params=connect_params,
153
- expected_num_elements=expected_num_elements,
154
- )
155
-
156
- uploader.run(path=staged_path, file_data=mock_file_data)
157
- validate_destination(
158
- connect_params=connect_params,
159
- expected_num_elements=expected_num_elements,
160
- )
110
+ stager = SingleStoreUploadStager()
111
+ staged_path = stager.run(
112
+ elements_filepath=upload_file,
113
+ file_data=mock_file_data,
114
+ output_dir=temp_dir,
115
+ output_filename=upload_file.name,
116
+ )
117
+
118
+ # The stager should append the `.json` suffix to the output filename passed in.
119
+ assert staged_path.suffix == upload_file.suffix
120
+
121
+ connect_params = {
122
+ "host": "localhost",
123
+ "port": 3306,
124
+ "database": "ingest_test",
125
+ "user": "root",
126
+ "password": "password",
127
+ }
128
+
129
+ uploader = SingleStoreUploader(
130
+ connection_config=SingleStoreConnectionConfig(
131
+ host=connect_params["host"],
132
+ port=connect_params["port"],
133
+ database=connect_params["database"],
134
+ user=connect_params["user"],
135
+ access_config=SingleStoreAccessConfig(password=connect_params["password"]),
136
+ ),
137
+ upload_config=SingleStoreUploaderConfig(
138
+ table_name="elements",
139
+ ),
140
+ )
141
+ uploader.precheck()
142
+ uploader.run(path=staged_path, file_data=mock_file_data)
143
+
144
+ with staged_path.open("r") as f:
145
+ staged_data = json.load(f)
146
+ expected_num_elements = len(staged_data)
147
+ validate_destination(
148
+ connect_params=connect_params,
149
+ expected_num_elements=expected_num_elements,
150
+ )
151
+
152
+ uploader.run(path=staged_path, file_data=mock_file_data)
153
+ validate_destination(
154
+ connect_params=connect_params,
155
+ expected_num_elements=expected_num_elements,
156
+ )
157
+
158
+
159
+ @pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
160
+ def test_singlestore_stager(
161
+ request: TopRequest,
162
+ upload_file_str: str,
163
+ tmp_path: Path,
164
+ ):
165
+ upload_file: Path = request.getfixturevalue(upload_file_str)
166
+ stager = SingleStoreUploadStager()
167
+ stager_validation(
168
+ configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
169
+ input_file=upload_file,
170
+ stager=stager,
171
+ tmp_dir=tmp_path,
172
+ )
@@ -1,16 +1,19 @@
1
+ import json
1
2
  import os
2
- import tempfile
3
3
  from pathlib import Path
4
4
 
5
- import docker
6
- import pandas as pd
7
5
  import pytest
8
6
  import snowflake.connector as sf
7
+ from _pytest.fixtures import TopRequest
9
8
 
10
9
  from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG, env_setup_path
11
10
  from test.integration.connectors.utils.docker import container_context
12
- from test.integration.connectors.utils.validation import (
13
- ValidationConfigs,
11
+ from test.integration.connectors.utils.validation.destination import (
12
+ StagerValidationConfigs,
13
+ stager_validation,
14
+ )
15
+ from test.integration.connectors.utils.validation.source import (
16
+ SourceValidationConfigs,
14
17
  source_connector_validation,
15
18
  )
16
19
  from test.integration.utils import requires_env
@@ -30,14 +33,15 @@ from unstructured_ingest.v2.processes.connectors.sql.snowflake import (
30
33
  SEED_DATA_ROWS = 20
31
34
 
32
35
 
33
- def seed_data():
34
- conn = sf.connect(
35
- user="test",
36
- password="test",
37
- account="test",
38
- database="test",
39
- host="snowflake.localhost.localstack.cloud",
40
- )
36
+ def seed_data() -> dict:
37
+ connect_params = {
38
+ "user": "test",
39
+ "password": "test",
40
+ "account": "test",
41
+ "database": "test",
42
+ "host": "snowflake.localhost.localstack.cloud",
43
+ }
44
+ conn = sf.connect(**connect_params)
41
45
 
42
46
  file = Path(env_setup_path / "sql" / "snowflake" / "source" / "snowflake-schema.sql")
43
47
 
@@ -52,16 +56,31 @@ def seed_data():
52
56
 
53
57
  cur.close()
54
58
  conn.close()
59
+ return connect_params
55
60
 
56
61
 
57
- def init_db_destination():
58
- conn = sf.connect(
59
- user="test",
60
- password="test",
61
- account="test",
62
- database="test",
63
- host="snowflake.localhost.localstack.cloud",
64
- )
62
+ @pytest.fixture
63
+ def source_database_setup() -> dict:
64
+ token = os.getenv("LOCALSTACK_AUTH_TOKEN")
65
+ with container_context(
66
+ image="localstack/snowflake",
67
+ environment={"LOCALSTACK_AUTH_TOKEN": token, "EXTRA_CORS_ALLOWED_ORIGINS": "*"},
68
+ ports={4566: 4566, 443: 443},
69
+ healthcheck_retries=30,
70
+ ):
71
+ connect_params = seed_data()
72
+ yield connect_params
73
+
74
+
75
+ def init_db_destination() -> dict:
76
+ connect_params = {
77
+ "user": "test",
78
+ "password": "test",
79
+ "account": "test",
80
+ "database": "test",
81
+ "host": "snowflake.localhost.localstack.cloud",
82
+ }
83
+ conn = sf.connect(**connect_params)
65
84
 
66
85
  file = Path(env_setup_path / "sql" / "snowflake" / "destination" / "snowflake-schema.sql")
67
86
 
@@ -73,52 +92,53 @@ def init_db_destination():
73
92
 
74
93
  cur.close()
75
94
  conn.close()
95
+ return connect_params
76
96
 
77
97
 
78
- @pytest.mark.asyncio
79
- @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "sql")
80
- @requires_env("LOCALSTACK_AUTH_TOKEN")
81
- async def test_snowflake_source():
82
- docker_client = docker.from_env()
98
+ @pytest.fixture
99
+ def destination_database_setup() -> dict:
83
100
  token = os.getenv("LOCALSTACK_AUTH_TOKEN")
84
101
  with container_context(
85
- docker_client=docker_client,
86
102
  image="localstack/snowflake",
87
103
  environment={"LOCALSTACK_AUTH_TOKEN": token, "EXTRA_CORS_ALLOWED_ORIGINS": "*"},
88
104
  ports={4566: 4566, 443: 443},
89
105
  healthcheck_retries=30,
90
106
  ):
91
- seed_data()
92
- with tempfile.TemporaryDirectory() as tmpdir:
93
- connection_config = SnowflakeConnectionConfig(
94
- access_config=SnowflakeAccessConfig(password="test"),
95
- account="test",
96
- user="test",
97
- database="test",
98
- host="snowflake.localhost.localstack.cloud",
99
- )
100
- indexer = SnowflakeIndexer(
101
- connection_config=connection_config,
102
- index_config=SnowflakeIndexerConfig(
103
- table_name="cars", id_column="CAR_ID", batch_size=5
104
- ),
105
- )
106
- downloader = SnowflakeDownloader(
107
- connection_config=connection_config,
108
- download_config=SnowflakeDownloaderConfig(
109
- fields=["CAR_ID", "BRAND"], download_dir=Path(tmpdir)
110
- ),
111
- )
112
- await source_connector_validation(
113
- indexer=indexer,
114
- downloader=downloader,
115
- configs=ValidationConfigs(
116
- test_id="snowflake",
117
- expected_num_files=SEED_DATA_ROWS,
118
- expected_number_indexed_file_data=4,
119
- validate_downloaded_files=True,
120
- ),
121
- )
107
+ connect_params = init_db_destination()
108
+ yield connect_params
109
+
110
+
111
+ @pytest.mark.asyncio
112
+ @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "sql")
113
+ @requires_env("LOCALSTACK_AUTH_TOKEN")
114
+ async def test_snowflake_source(temp_dir: Path, source_database_setup: dict):
115
+ connection_config = SnowflakeConnectionConfig(
116
+ access_config=SnowflakeAccessConfig(password="test"),
117
+ account="test",
118
+ user="test",
119
+ database="test",
120
+ host="snowflake.localhost.localstack.cloud",
121
+ )
122
+ indexer = SnowflakeIndexer(
123
+ connection_config=connection_config,
124
+ index_config=SnowflakeIndexerConfig(table_name="cars", id_column="CAR_ID", batch_size=5),
125
+ )
126
+ downloader = SnowflakeDownloader(
127
+ connection_config=connection_config,
128
+ download_config=SnowflakeDownloaderConfig(
129
+ fields=["CAR_ID", "BRAND"], download_dir=temp_dir
130
+ ),
131
+ )
132
+ await source_connector_validation(
133
+ indexer=indexer,
134
+ downloader=downloader,
135
+ configs=SourceValidationConfigs(
136
+ test_id="snowflake",
137
+ expected_num_files=SEED_DATA_ROWS,
138
+ expected_number_indexed_file_data=4,
139
+ validate_downloaded_files=True,
140
+ ),
141
+ )
122
142
 
123
143
 
124
144
  def validate_destination(
@@ -145,65 +165,70 @@ def validate_destination(
145
165
  @pytest.mark.asyncio
146
166
  @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
147
167
  @requires_env("LOCALSTACK_AUTH_TOKEN")
148
- async def test_snowflake_destination(upload_file: Path):
168
+ async def test_snowflake_destination(
169
+ upload_file: Path, temp_dir: Path, destination_database_setup: dict
170
+ ):
149
171
  # the postgres destination connector doesn't leverage the file data but is required as an input,
150
172
  # mocking it with arbitrary values to meet the base requirements:
151
173
  mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
152
- docker_client = docker.from_env()
153
- token = os.getenv("LOCALSTACK_AUTH_TOKEN")
154
- with container_context(
155
- docker_client=docker_client,
156
- image="localstack/snowflake",
157
- environment={"LOCALSTACK_AUTH_TOKEN": token, "EXTRA_CORS_ALLOWED_ORIGINS": "*"},
158
- ports={4566: 4566, 443: 443},
159
- healthcheck_retries=30,
160
- ):
161
- init_db_destination()
162
- with tempfile.TemporaryDirectory() as tmpdir:
163
- stager = SnowflakeUploadStager()
164
- stager_params = {
165
- "elements_filepath": upload_file,
166
- "file_data": mock_file_data,
167
- "output_dir": Path(tmpdir),
168
- "output_filename": "test_db",
169
- }
170
- if stager.is_async():
171
- staged_path = await stager.run_async(**stager_params)
172
- else:
173
- staged_path = stager.run(**stager_params)
174
-
175
- # The stager should append the `.json` suffix to the output filename passed in.
176
- assert staged_path.name == "test_db.json"
177
-
178
- connect_params = {
179
- "user": "test",
180
- "password": "test",
181
- "account": "test",
182
- "database": "test",
183
- "host": "snowflake.localhost.localstack.cloud",
184
- }
185
-
186
- uploader = SnowflakeUploader(
187
- connection_config=SnowflakeConnectionConfig(
188
- access_config=SnowflakeAccessConfig(password=connect_params["password"]),
189
- account=connect_params["account"],
190
- user=connect_params["user"],
191
- database=connect_params["database"],
192
- host=connect_params["host"],
193
- )
194
- )
195
-
196
- uploader.run(path=staged_path, file_data=mock_file_data)
197
-
198
- staged_df = pd.read_json(staged_path, orient="records", lines=True)
199
- expected_num_elements = len(staged_df)
200
- validate_destination(
201
- connect_params=connect_params,
202
- expected_num_elements=expected_num_elements,
203
- )
204
-
205
- uploader.run(path=staged_path, file_data=mock_file_data)
206
- validate_destination(
207
- connect_params=connect_params,
208
- expected_num_elements=expected_num_elements,
209
- )
174
+ init_db_destination()
175
+ stager = SnowflakeUploadStager()
176
+ staged_path = stager.run(
177
+ elements_filepath=upload_file,
178
+ file_data=mock_file_data,
179
+ output_dir=temp_dir,
180
+ output_filename=upload_file.name,
181
+ )
182
+
183
+ # The stager should append the `.json` suffix to the output filename passed in.
184
+ assert staged_path.suffix == upload_file.suffix
185
+
186
+ connect_params = {
187
+ "user": "test",
188
+ "password": "test",
189
+ "account": "test",
190
+ "database": "test",
191
+ "host": "snowflake.localhost.localstack.cloud",
192
+ }
193
+
194
+ uploader = SnowflakeUploader(
195
+ connection_config=SnowflakeConnectionConfig(
196
+ access_config=SnowflakeAccessConfig(password=connect_params["password"]),
197
+ account=connect_params["account"],
198
+ user=connect_params["user"],
199
+ database=connect_params["database"],
200
+ host=connect_params["host"],
201
+ )
202
+ )
203
+ uploader.precheck()
204
+ uploader.run(path=staged_path, file_data=mock_file_data)
205
+
206
+ with staged_path.open("r") as f:
207
+ staged_data = json.load(f)
208
+ expected_num_elements = len(staged_data)
209
+ validate_destination(
210
+ connect_params=connect_params,
211
+ expected_num_elements=expected_num_elements,
212
+ )
213
+
214
+ uploader.run(path=staged_path, file_data=mock_file_data)
215
+ validate_destination(
216
+ connect_params=connect_params,
217
+ expected_num_elements=expected_num_elements,
218
+ )
219
+
220
+
221
+ @pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
222
+ def test_snowflake_stager(
223
+ request: TopRequest,
224
+ upload_file_str: str,
225
+ tmp_path: Path,
226
+ ):
227
+ upload_file: Path = request.getfixturevalue(upload_file_str)
228
+ stager = SnowflakeUploadStager()
229
+ stager_validation(
230
+ configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
231
+ input_file=upload_file,
232
+ stager=stager,
233
+ tmp_dir=tmp_path,
234
+ )