unstructured-ingest 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (87) hide show
  1. test/integration/chunkers/test_chunkers.py +0 -11
  2. test/integration/connectors/conftest.py +11 -1
  3. test/integration/connectors/databricks_tests/test_volumes_native.py +4 -3
  4. test/integration/connectors/duckdb/conftest.py +14 -0
  5. test/integration/connectors/duckdb/test_duckdb.py +51 -44
  6. test/integration/connectors/duckdb/test_motherduck.py +37 -48
  7. test/integration/connectors/elasticsearch/test_elasticsearch.py +26 -4
  8. test/integration/connectors/elasticsearch/test_opensearch.py +26 -3
  9. test/integration/connectors/sql/test_postgres.py +103 -92
  10. test/integration/connectors/sql/test_singlestore.py +112 -100
  11. test/integration/connectors/sql/test_snowflake.py +142 -117
  12. test/integration/connectors/sql/test_sqlite.py +87 -76
  13. test/integration/connectors/test_astradb.py +62 -1
  14. test/integration/connectors/test_azure_ai_search.py +25 -3
  15. test/integration/connectors/test_chroma.py +120 -0
  16. test/integration/connectors/test_confluence.py +4 -4
  17. test/integration/connectors/test_delta_table.py +1 -0
  18. test/integration/connectors/test_kafka.py +6 -6
  19. test/integration/connectors/test_milvus.py +21 -0
  20. test/integration/connectors/test_mongodb.py +7 -4
  21. test/integration/connectors/test_neo4j.py +236 -0
  22. test/integration/connectors/test_pinecone.py +25 -1
  23. test/integration/connectors/test_qdrant.py +25 -2
  24. test/integration/connectors/test_s3.py +9 -6
  25. test/integration/connectors/utils/docker.py +6 -0
  26. test/integration/connectors/utils/validation/__init__.py +0 -0
  27. test/integration/connectors/utils/validation/destination.py +88 -0
  28. test/integration/connectors/utils/validation/equality.py +75 -0
  29. test/integration/connectors/utils/{validation.py → validation/source.py} +42 -98
  30. test/integration/connectors/utils/validation/utils.py +36 -0
  31. unstructured_ingest/__version__.py +1 -1
  32. unstructured_ingest/utils/chunking.py +11 -0
  33. unstructured_ingest/utils/data_prep.py +36 -0
  34. unstructured_ingest/v2/interfaces/__init__.py +3 -1
  35. unstructured_ingest/v2/interfaces/file_data.py +58 -14
  36. unstructured_ingest/v2/interfaces/upload_stager.py +70 -6
  37. unstructured_ingest/v2/interfaces/uploader.py +11 -2
  38. unstructured_ingest/v2/pipeline/steps/chunk.py +2 -1
  39. unstructured_ingest/v2/pipeline/steps/download.py +5 -4
  40. unstructured_ingest/v2/pipeline/steps/embed.py +2 -1
  41. unstructured_ingest/v2/pipeline/steps/filter.py +2 -2
  42. unstructured_ingest/v2/pipeline/steps/index.py +4 -4
  43. unstructured_ingest/v2/pipeline/steps/partition.py +3 -2
  44. unstructured_ingest/v2/pipeline/steps/stage.py +5 -3
  45. unstructured_ingest/v2/pipeline/steps/uncompress.py +2 -2
  46. unstructured_ingest/v2/pipeline/steps/upload.py +3 -3
  47. unstructured_ingest/v2/processes/connectors/__init__.py +3 -0
  48. unstructured_ingest/v2/processes/connectors/astradb.py +43 -63
  49. unstructured_ingest/v2/processes/connectors/azure_ai_search.py +16 -40
  50. unstructured_ingest/v2/processes/connectors/chroma.py +36 -59
  51. unstructured_ingest/v2/processes/connectors/couchbase.py +92 -93
  52. unstructured_ingest/v2/processes/connectors/delta_table.py +11 -33
  53. unstructured_ingest/v2/processes/connectors/duckdb/base.py +26 -26
  54. unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +29 -20
  55. unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +37 -44
  56. unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +46 -75
  57. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +12 -35
  58. unstructured_ingest/v2/processes/connectors/fsspec/box.py +12 -35
  59. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +15 -42
  60. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +33 -29
  61. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +12 -34
  62. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +13 -37
  63. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +19 -33
  64. unstructured_ingest/v2/processes/connectors/gitlab.py +32 -31
  65. unstructured_ingest/v2/processes/connectors/google_drive.py +32 -29
  66. unstructured_ingest/v2/processes/connectors/kafka/kafka.py +2 -4
  67. unstructured_ingest/v2/processes/connectors/kdbai.py +44 -70
  68. unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +8 -10
  69. unstructured_ingest/v2/processes/connectors/local.py +13 -2
  70. unstructured_ingest/v2/processes/connectors/milvus.py +16 -57
  71. unstructured_ingest/v2/processes/connectors/mongodb.py +99 -108
  72. unstructured_ingest/v2/processes/connectors/neo4j.py +383 -0
  73. unstructured_ingest/v2/processes/connectors/onedrive.py +1 -1
  74. unstructured_ingest/v2/processes/connectors/pinecone.py +3 -33
  75. unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +32 -41
  76. unstructured_ingest/v2/processes/connectors/sql/postgres.py +5 -5
  77. unstructured_ingest/v2/processes/connectors/sql/singlestore.py +5 -5
  78. unstructured_ingest/v2/processes/connectors/sql/snowflake.py +5 -5
  79. unstructured_ingest/v2/processes/connectors/sql/sql.py +72 -66
  80. unstructured_ingest/v2/processes/connectors/sql/sqlite.py +5 -5
  81. unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +9 -31
  82. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/METADATA +20 -15
  83. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/RECORD +87 -79
  84. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/LICENSE.md +0 -0
  85. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/WHEEL +0 -0
  86. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/entry_points.txt +0 -0
  87. {unstructured_ingest-0.3.8.dist-info → unstructured_ingest-0.3.10.dist-info}/top_level.txt +0 -0
@@ -1,15 +1,18 @@
1
- import tempfile
2
- from contextlib import contextmanager
1
+ import json
3
2
  from pathlib import Path
4
3
 
5
- import pandas as pd
6
4
  import pytest
5
+ from _pytest.fixtures import TopRequest
7
6
  from psycopg2 import connect
8
7
 
9
8
  from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG, env_setup_path
10
9
  from test.integration.connectors.utils.docker_compose import docker_compose_context
11
- from test.integration.connectors.utils.validation import (
12
- ValidationConfigs,
10
+ from test.integration.connectors.utils.validation.destination import (
11
+ StagerValidationConfigs,
12
+ stager_validation,
13
+ )
14
+ from test.integration.connectors.utils.validation.source import (
15
+ SourceValidationConfigs,
13
16
  source_connector_validation,
14
17
  )
15
18
  from unstructured_ingest.v2.interfaces import FileData
@@ -25,16 +28,17 @@ from unstructured_ingest.v2.processes.connectors.sql.postgres import (
25
28
  PostgresUploadStager,
26
29
  )
27
30
 
28
- SEED_DATA_ROWS = 20
31
+ SEED_DATA_ROWS = 10
29
32
 
30
33
 
31
- @contextmanager
32
- def postgres_download_setup() -> None:
34
+ @pytest.fixture
35
+ def source_database_setup() -> str:
36
+ db_name = "test_db"
33
37
  with docker_compose_context(docker_compose_path=env_setup_path / "sql" / "postgres" / "source"):
34
38
  connection = connect(
35
39
  user="unstructured",
36
40
  password="test",
37
- dbname="test_db",
41
+ dbname=db_name,
38
42
  host="localhost",
39
43
  port=5433,
40
44
  )
@@ -43,12 +47,12 @@ def postgres_download_setup() -> None:
43
47
  sql_statment = f"INSERT INTO cars (brand, price) VALUES " f"('brand_{i}', {i})"
44
48
  cursor.execute(sql_statment)
45
49
  connection.commit()
46
- yield
50
+ yield db_name
47
51
 
48
52
 
49
53
  @pytest.mark.asyncio
50
54
  @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "sql")
51
- async def test_postgres_source():
55
+ async def test_postgres_source(temp_dir: Path, source_database_setup: str):
52
56
  connect_params = {
53
57
  "host": "localhost",
54
58
  "port": 5433,
@@ -56,37 +60,31 @@ async def test_postgres_source():
56
60
  "user": "unstructured",
57
61
  "password": "test",
58
62
  }
59
- with postgres_download_setup():
60
- with tempfile.TemporaryDirectory() as tmpdir:
61
- connection_config = PostgresConnectionConfig(
62
- host=connect_params["host"],
63
- port=connect_params["port"],
64
- database=connect_params["database"],
65
- username=connect_params["user"],
66
- access_config=PostgresAccessConfig(password=connect_params["password"]),
67
- )
68
- indexer = PostgresIndexer(
69
- connection_config=connection_config,
70
- index_config=PostgresIndexerConfig(
71
- table_name="cars", id_column="car_id", batch_size=5
72
- ),
73
- )
74
- downloader = PostgresDownloader(
75
- connection_config=connection_config,
76
- download_config=PostgresDownloaderConfig(
77
- fields=["car_id", "brand"], download_dir=Path(tmpdir)
78
- ),
79
- )
80
- await source_connector_validation(
81
- indexer=indexer,
82
- downloader=downloader,
83
- configs=ValidationConfigs(
84
- test_id="postgres",
85
- expected_num_files=SEED_DATA_ROWS,
86
- expected_number_indexed_file_data=4,
87
- validate_downloaded_files=True,
88
- ),
89
- )
63
+ connection_config = PostgresConnectionConfig(
64
+ host=connect_params["host"],
65
+ port=connect_params["port"],
66
+ database=connect_params["database"],
67
+ username=connect_params["user"],
68
+ access_config=PostgresAccessConfig(password=connect_params["password"]),
69
+ )
70
+ indexer = PostgresIndexer(
71
+ connection_config=connection_config,
72
+ index_config=PostgresIndexerConfig(table_name="cars", id_column="car_id", batch_size=6),
73
+ )
74
+ downloader = PostgresDownloader(
75
+ connection_config=connection_config,
76
+ download_config=PostgresDownloaderConfig(fields=["car_id", "brand"], download_dir=temp_dir),
77
+ )
78
+ await source_connector_validation(
79
+ indexer=indexer,
80
+ downloader=downloader,
81
+ configs=SourceValidationConfigs(
82
+ test_id="postgres",
83
+ expected_num_files=SEED_DATA_ROWS,
84
+ expected_number_indexed_file_data=2,
85
+ validate_downloaded_files=True,
86
+ ),
87
+ )
90
88
 
91
89
 
92
90
  def validate_destination(
@@ -118,63 +116,76 @@ def validate_destination(
118
116
 
119
117
  @pytest.mark.asyncio
120
118
  @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
121
- async def test_postgres_destination(upload_file: Path):
119
+ async def test_postgres_destination(upload_file: Path, temp_dir: Path):
122
120
  # the postgres destination connector doesn't leverage the file data but is required as an input,
123
121
  # mocking it with arbitrary values to meet the base requirements:
124
122
  mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
125
123
  with docker_compose_context(
126
124
  docker_compose_path=env_setup_path / "sql" / "postgres" / "destination"
127
125
  ):
128
- with tempfile.TemporaryDirectory() as tmpdir:
129
- stager = PostgresUploadStager()
130
- stager_params = {
131
- "elements_filepath": upload_file,
132
- "file_data": mock_file_data,
133
- "output_dir": Path(tmpdir),
134
- "output_filename": "test_db",
135
- }
136
- if stager.is_async():
137
- staged_path = await stager.run_async(**stager_params)
138
- else:
139
- staged_path = stager.run(**stager_params)
140
-
141
- # The stager should append the `.json` suffix to the output filename passed in.
142
- assert staged_path.name == "test_db.json"
143
-
144
- connect_params = {
145
- "host": "localhost",
146
- "port": 5433,
147
- "database": "elements",
148
- "user": "unstructured",
149
- "password": "test",
150
- }
151
-
152
- uploader = PostgresUploader(
153
- connection_config=PostgresConnectionConfig(
154
- host=connect_params["host"],
155
- port=connect_params["port"],
156
- database=connect_params["database"],
157
- username=connect_params["user"],
158
- access_config=PostgresAccessConfig(password=connect_params["password"]),
159
- )
160
- )
126
+ stager = PostgresUploadStager()
127
+ staged_path = stager.run(
128
+ elements_filepath=upload_file,
129
+ file_data=mock_file_data,
130
+ output_dir=temp_dir,
131
+ output_filename=upload_file.name,
132
+ )
161
133
 
162
- uploader.run(path=staged_path, file_data=mock_file_data)
134
+ # The stager should append the `.json` suffix to the output filename passed in.
135
+ assert staged_path.suffix == upload_file.suffix
163
136
 
164
- staged_df = pd.read_json(staged_path, orient="records", lines=True)
165
- sample_element = staged_df.iloc[0]
166
- expected_num_elements = len(staged_df)
167
- validate_destination(
168
- connect_params=connect_params,
169
- expected_num_elements=expected_num_elements,
170
- expected_text=sample_element["text"],
171
- test_embedding=sample_element["embeddings"],
172
- )
137
+ connect_params = {
138
+ "host": "localhost",
139
+ "port": 5433,
140
+ "database": "elements",
141
+ "user": "unstructured",
142
+ "password": "test",
143
+ }
173
144
 
174
- uploader.run(path=staged_path, file_data=mock_file_data)
175
- validate_destination(
176
- connect_params=connect_params,
177
- expected_num_elements=expected_num_elements,
178
- expected_text=sample_element["text"],
179
- test_embedding=sample_element["embeddings"],
145
+ uploader = PostgresUploader(
146
+ connection_config=PostgresConnectionConfig(
147
+ host=connect_params["host"],
148
+ port=connect_params["port"],
149
+ database=connect_params["database"],
150
+ username=connect_params["user"],
151
+ access_config=PostgresAccessConfig(password=connect_params["password"]),
180
152
  )
153
+ )
154
+ uploader.precheck()
155
+ uploader.run(path=staged_path, file_data=mock_file_data)
156
+
157
+ with staged_path.open("r") as f:
158
+ staged_data = json.load(f)
159
+
160
+ sample_element = staged_data[0]
161
+ expected_num_elements = len(staged_data)
162
+ validate_destination(
163
+ connect_params=connect_params,
164
+ expected_num_elements=expected_num_elements,
165
+ expected_text=sample_element["text"],
166
+ test_embedding=sample_element["embeddings"],
167
+ )
168
+
169
+ uploader.run(path=staged_path, file_data=mock_file_data)
170
+ validate_destination(
171
+ connect_params=connect_params,
172
+ expected_num_elements=expected_num_elements,
173
+ expected_text=sample_element["text"],
174
+ test_embedding=sample_element["embeddings"],
175
+ )
176
+
177
+
178
+ @pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
179
+ def test_postgres_stager(
180
+ request: TopRequest,
181
+ upload_file_str: str,
182
+ tmp_path: Path,
183
+ ):
184
+ upload_file: Path = request.getfixturevalue(upload_file_str)
185
+ stager = PostgresUploadStager()
186
+ stager_validation(
187
+ configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
188
+ input_file=upload_file,
189
+ stager=stager,
190
+ tmp_dir=tmp_path,
191
+ )
@@ -1,15 +1,18 @@
1
- import tempfile
2
- from contextlib import contextmanager
1
+ import json
3
2
  from pathlib import Path
4
3
 
5
- import pandas as pd
6
4
  import pytest
7
5
  import singlestoredb as s2
6
+ from _pytest.fixtures import TopRequest
8
7
 
9
8
  from test.integration.connectors.utils.constants import DESTINATION_TAG, SOURCE_TAG, env_setup_path
10
9
  from test.integration.connectors.utils.docker_compose import docker_compose_context
11
- from test.integration.connectors.utils.validation import (
12
- ValidationConfigs,
10
+ from test.integration.connectors.utils.validation.destination import (
11
+ StagerValidationConfigs,
12
+ stager_validation,
13
+ )
14
+ from test.integration.connectors.utils.validation.source import (
15
+ SourceValidationConfigs,
13
16
  source_connector_validation,
14
17
  )
15
18
  from unstructured_ingest.v2.interfaces import FileData
@@ -26,11 +29,18 @@ from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
26
29
  SingleStoreUploadStager,
27
30
  )
28
31
 
29
- SEED_DATA_ROWS = 20
32
+ SEED_DATA_ROWS = 10
30
33
 
31
34
 
32
- @contextmanager
33
- def singlestore_download_setup(connect_params: dict) -> None:
35
+ @pytest.fixture
36
+ def source_database_setup() -> dict:
37
+ connect_params = {
38
+ "host": "localhost",
39
+ "port": 3306,
40
+ "database": "ingest_test",
41
+ "user": "root",
42
+ "password": "password",
43
+ }
34
44
  with docker_compose_context(
35
45
  docker_compose_path=env_setup_path / "sql" / "singlestore" / "source"
36
46
  ):
@@ -40,50 +50,40 @@ def singlestore_download_setup(connect_params: dict) -> None:
40
50
  sql_statment = f"INSERT INTO cars (brand, price) VALUES " f"('brand_{i}', {i})"
41
51
  cursor.execute(sql_statment)
42
52
  connection.commit()
43
- yield
53
+ yield connect_params
44
54
 
45
55
 
46
56
  @pytest.mark.asyncio
47
57
  @pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, "sql")
48
- async def test_singlestore_source():
49
- connect_params = {
50
- "host": "localhost",
51
- "port": 3306,
52
- "database": "ingest_test",
53
- "user": "root",
54
- "password": "password",
55
- }
56
- with singlestore_download_setup(connect_params=connect_params):
57
- with tempfile.TemporaryDirectory() as tmpdir:
58
- connection_config = SingleStoreConnectionConfig(
59
- host=connect_params["host"],
60
- port=connect_params["port"],
61
- database=connect_params["database"],
62
- user=connect_params["user"],
63
- access_config=SingleStoreAccessConfig(password=connect_params["password"]),
64
- )
65
- indexer = SingleStoreIndexer(
66
- connection_config=connection_config,
67
- index_config=SingleStoreIndexerConfig(
68
- table_name="cars", id_column="car_id", batch_size=5
69
- ),
70
- )
71
- downloader = SingleStoreDownloader(
72
- connection_config=connection_config,
73
- download_config=SingleStoreDownloaderConfig(
74
- fields=["car_id", "brand"], download_dir=Path(tmpdir)
75
- ),
76
- )
77
- await source_connector_validation(
78
- indexer=indexer,
79
- downloader=downloader,
80
- configs=ValidationConfigs(
81
- test_id="singlestore",
82
- expected_num_files=SEED_DATA_ROWS,
83
- expected_number_indexed_file_data=4,
84
- validate_downloaded_files=True,
85
- ),
86
- )
58
+ async def test_singlestore_source(temp_dir: Path, source_database_setup: dict):
59
+
60
+ connection_config = SingleStoreConnectionConfig(
61
+ host=source_database_setup["host"],
62
+ port=source_database_setup["port"],
63
+ database=source_database_setup["database"],
64
+ user=source_database_setup["user"],
65
+ access_config=SingleStoreAccessConfig(password=source_database_setup["password"]),
66
+ )
67
+ indexer = SingleStoreIndexer(
68
+ connection_config=connection_config,
69
+ index_config=SingleStoreIndexerConfig(table_name="cars", id_column="car_id", batch_size=6),
70
+ )
71
+ downloader = SingleStoreDownloader(
72
+ connection_config=connection_config,
73
+ download_config=SingleStoreDownloaderConfig(
74
+ fields=["car_id", "brand"], download_dir=temp_dir
75
+ ),
76
+ )
77
+ await source_connector_validation(
78
+ indexer=indexer,
79
+ downloader=downloader,
80
+ configs=SourceValidationConfigs(
81
+ test_id="singlestore",
82
+ expected_num_files=SEED_DATA_ROWS,
83
+ expected_number_indexed_file_data=2,
84
+ validate_downloaded_files=True,
85
+ ),
86
+ )
87
87
 
88
88
 
89
89
  def validate_destination(
@@ -102,59 +102,71 @@ def validate_destination(
102
102
 
103
103
  @pytest.mark.asyncio
104
104
  @pytest.mark.tags(CONNECTOR_TYPE, DESTINATION_TAG, "sql")
105
- async def test_singlestore_destination(upload_file: Path):
105
+ async def test_singlestore_destination(upload_file: Path, temp_dir: Path):
106
106
  mock_file_data = FileData(identifier="mock file data", connector_type=CONNECTOR_TYPE)
107
107
  with docker_compose_context(
108
108
  docker_compose_path=env_setup_path / "sql" / "singlestore" / "destination"
109
109
  ):
110
- with tempfile.TemporaryDirectory() as tmpdir:
111
- stager = SingleStoreUploadStager()
112
- stager_params = {
113
- "elements_filepath": upload_file,
114
- "file_data": mock_file_data,
115
- "output_dir": Path(tmpdir),
116
- "output_filename": "test_db",
117
- }
118
- if stager.is_async():
119
- staged_path = await stager.run_async(**stager_params)
120
- else:
121
- staged_path = stager.run(**stager_params)
122
-
123
- # The stager should append the `.json` suffix to the output filename passed in.
124
- assert staged_path.name == "test_db.json"
125
-
126
- connect_params = {
127
- "host": "localhost",
128
- "port": 3306,
129
- "database": "ingest_test",
130
- "user": "root",
131
- "password": "password",
132
- }
133
-
134
- uploader = SingleStoreUploader(
135
- connection_config=SingleStoreConnectionConfig(
136
- host=connect_params["host"],
137
- port=connect_params["port"],
138
- database=connect_params["database"],
139
- user=connect_params["user"],
140
- access_config=SingleStoreAccessConfig(password=connect_params["password"]),
141
- ),
142
- upload_config=SingleStoreUploaderConfig(
143
- table_name="elements",
144
- ),
145
- )
146
-
147
- uploader.run(path=staged_path, file_data=mock_file_data)
148
-
149
- staged_df = pd.read_json(staged_path, orient="records", lines=True)
150
- expected_num_elements = len(staged_df)
151
- validate_destination(
152
- connect_params=connect_params,
153
- expected_num_elements=expected_num_elements,
154
- )
155
-
156
- uploader.run(path=staged_path, file_data=mock_file_data)
157
- validate_destination(
158
- connect_params=connect_params,
159
- expected_num_elements=expected_num_elements,
160
- )
110
+ stager = SingleStoreUploadStager()
111
+ staged_path = stager.run(
112
+ elements_filepath=upload_file,
113
+ file_data=mock_file_data,
114
+ output_dir=temp_dir,
115
+ output_filename=upload_file.name,
116
+ )
117
+
118
+ # The stager should append the `.json` suffix to the output filename passed in.
119
+ assert staged_path.suffix == upload_file.suffix
120
+
121
+ connect_params = {
122
+ "host": "localhost",
123
+ "port": 3306,
124
+ "database": "ingest_test",
125
+ "user": "root",
126
+ "password": "password",
127
+ }
128
+
129
+ uploader = SingleStoreUploader(
130
+ connection_config=SingleStoreConnectionConfig(
131
+ host=connect_params["host"],
132
+ port=connect_params["port"],
133
+ database=connect_params["database"],
134
+ user=connect_params["user"],
135
+ access_config=SingleStoreAccessConfig(password=connect_params["password"]),
136
+ ),
137
+ upload_config=SingleStoreUploaderConfig(
138
+ table_name="elements",
139
+ ),
140
+ )
141
+ uploader.precheck()
142
+ uploader.run(path=staged_path, file_data=mock_file_data)
143
+
144
+ with staged_path.open("r") as f:
145
+ staged_data = json.load(f)
146
+ expected_num_elements = len(staged_data)
147
+ validate_destination(
148
+ connect_params=connect_params,
149
+ expected_num_elements=expected_num_elements,
150
+ )
151
+
152
+ uploader.run(path=staged_path, file_data=mock_file_data)
153
+ validate_destination(
154
+ connect_params=connect_params,
155
+ expected_num_elements=expected_num_elements,
156
+ )
157
+
158
+
159
+ @pytest.mark.parametrize("upload_file_str", ["upload_file_ndjson", "upload_file"])
160
+ def test_singlestore_stager(
161
+ request: TopRequest,
162
+ upload_file_str: str,
163
+ tmp_path: Path,
164
+ ):
165
+ upload_file: Path = request.getfixturevalue(upload_file_str)
166
+ stager = SingleStoreUploadStager()
167
+ stager_validation(
168
+ configs=StagerValidationConfigs(test_id=CONNECTOR_TYPE, expected_count=22),
169
+ input_file=upload_file,
170
+ stager=stager,
171
+ tmp_dir=tmp_path,
172
+ )