sql-testing-library 0.13.0__tar.gz → 0.15.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/CHANGELOG.md +26 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/PKG-INFO +200 -38
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/README.md +196 -35
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/pyproject.toml +10 -3
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/__init__.py +3 -1
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_adapters/base.py +16 -0
- sql_testing_library-0.15.0/src/sql_testing_library/_adapters/bigquery.py +493 -0
- sql_testing_library-0.15.0/src/sql_testing_library/_adapters/duckdb.py +474 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_adapters/presto.py +2 -5
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_adapters/redshift.py +18 -25
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_adapters/snowflake.py +58 -51
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_core.py +193 -9
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_pytest_plugin.py +24 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_sql_utils.py +86 -4
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_types.py +35 -5
- sql_testing_library-0.13.0/src/sql_testing_library/_adapters/bigquery.py +0 -270
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/LICENSE +0 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_adapters/__init__.py +0 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_adapters/athena.py +0 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_adapters/trino.py +0 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_exceptions.py +0 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_mock_table.py +0 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/_sql_logger.py +0 -0
- {sql_testing_library-0.13.0 → sql_testing_library-0.15.0}/src/sql_testing_library/py.typed +0 -0
|
@@ -5,6 +5,32 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## 0.15.0 (2025-07-27)
|
|
9
|
+
|
|
10
|
+
### Feat
|
|
11
|
+
|
|
12
|
+
- implement duckdb integration (#117)
|
|
13
|
+
- integrate mocksmith for test data generation and simplify relea… (#112)
|
|
14
|
+
- integrate mocksmith for test data generation and simplify release workflow
|
|
15
|
+
|
|
16
|
+
### Fix
|
|
17
|
+
|
|
18
|
+
- added explicit dependency of faker
|
|
19
|
+
- upgrade mocksmith library version
|
|
20
|
+
|
|
21
|
+
## 0.14.0 (2025-06-30)
|
|
22
|
+
|
|
23
|
+
### Feat
|
|
24
|
+
|
|
25
|
+
- **bigquery**: add struct support with list fields (#109)
|
|
26
|
+
- **bigquery**: add struct support for big query (#108)
|
|
27
|
+
- add parallel table cleanup for improved performance (#107)
|
|
28
|
+
- add parallel table creation for physical tables mode (#106)
|
|
29
|
+
|
|
30
|
+
### Fix
|
|
31
|
+
|
|
32
|
+
- **athena**: handle mixed format structs with lists and maps (#111)
|
|
33
|
+
|
|
8
34
|
## 0.13.0 (2025-06-27)
|
|
9
35
|
|
|
10
36
|
### Feat
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sql-testing-library
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: A powerful Python framework for unit testing SQL queries across BigQuery, Snowflake, Redshift, Athena, and
|
|
3
|
+
Version: 0.15.0
|
|
4
|
+
Summary: A powerful Python framework for unit testing SQL queries across BigQuery, Snowflake, Redshift, Athena, Trino, and DuckDB with mock data
|
|
5
5
|
License: MIT
|
|
6
|
-
Keywords: sql,testing,unit-testing,mock-data,database-testing,bigquery,snowflake,redshift,athena,trino,data-engineering,etl-testing,sql-validation,query-testing
|
|
6
|
+
Keywords: sql,testing,unit-testing,mock-data,database-testing,bigquery,snowflake,redshift,athena,trino,duckdb,data-engineering,etl-testing,sql-validation,query-testing
|
|
7
7
|
Author: Gurmeet Saran
|
|
8
8
|
Author-email: gurmeetx@gmail.com
|
|
9
9
|
Maintainer: Gurmeet Saran
|
|
@@ -35,6 +35,7 @@ Classifier: Typing :: Typed
|
|
|
35
35
|
Provides-Extra: all
|
|
36
36
|
Provides-Extra: athena
|
|
37
37
|
Provides-Extra: bigquery
|
|
38
|
+
Provides-Extra: duckdb
|
|
38
39
|
Provides-Extra: redshift
|
|
39
40
|
Provides-Extra: snowflake
|
|
40
41
|
Provides-Extra: trino
|
|
@@ -57,7 +58,7 @@ Description-Content-Type: text/markdown
|
|
|
57
58
|
|
|
58
59
|
# SQL Testing Library
|
|
59
60
|
|
|
60
|
-
A powerful Python framework for unit testing SQL queries with mock data injection across BigQuery, Snowflake, Redshift, Athena, and
|
|
61
|
+
A powerful Python framework for unit testing SQL queries with mock data injection across BigQuery, Snowflake, Redshift, Athena, Trino, and DuckDB.
|
|
61
62
|
|
|
62
63
|
[](https://github.com/gurmeetsaran/sqltesting/actions/workflows/tests.yaml)
|
|
63
64
|
[](https://github.com/gurmeetsaran/sqltesting/actions/workflows/athena-integration.yml)
|
|
@@ -104,7 +105,7 @@ For more details on our journey and the engineering challenges we solved, read t
|
|
|
104
105
|
|
|
105
106
|
## Features
|
|
106
107
|
|
|
107
|
-
- **Multi-Database Support**: Test SQL across BigQuery, Athena, Redshift, Trino, and
|
|
108
|
+
- **Multi-Database Support**: Test SQL across BigQuery, Athena, Redshift, Trino, Snowflake, and DuckDB
|
|
108
109
|
- **Mock Data Injection**: Use Python dataclasses for type-safe test data
|
|
109
110
|
- **CTE or Physical Tables**: Automatic fallback for query size limits
|
|
110
111
|
- **Type-Safe Results**: Deserialize results to Pydantic models
|
|
@@ -117,45 +118,46 @@ The library supports different data types across database engines. All checkmark
|
|
|
117
118
|
|
|
118
119
|
### Primitive Types
|
|
119
120
|
|
|
120
|
-
| Data Type | Python Type | BigQuery | Athena | Redshift | Trino | Snowflake |
|
|
121
|
-
|
|
122
|
-
| **String** | `str` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
123
|
-
| **Integer** | `int` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
124
|
-
| **Float** | `float` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
125
|
-
| **Boolean** | `bool` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
126
|
-
| **Date** | `date` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
127
|
-
| **Datetime** | `datetime` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
128
|
-
| **Decimal** | `Decimal` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
129
|
-
| **Optional** | `Optional[T]` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
121
|
+
| Data Type | Python Type | BigQuery | Athena | Redshift | Trino | Snowflake | DuckDB |
|
|
122
|
+
|-----------|-------------|----------|--------|----------|-------|-----------|--------|
|
|
123
|
+
| **String** | `str` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
124
|
+
| **Integer** | `int` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
125
|
+
| **Float** | `float` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
126
|
+
| **Boolean** | `bool` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
127
|
+
| **Date** | `date` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
128
|
+
| **Datetime** | `datetime` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
129
|
+
| **Decimal** | `Decimal` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
130
|
+
| **Optional** | `Optional[T]` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
130
131
|
|
|
131
132
|
### Complex Types
|
|
132
133
|
|
|
133
|
-
| Data Type | Python Type | BigQuery | Athena | Redshift | Trino | Snowflake |
|
|
134
|
-
|
|
135
|
-
| **String Array** | `List[str]` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
136
|
-
| **Integer Array** | `List[int]` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
137
|
-
| **Decimal Array** | `List[Decimal]` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
138
|
-
| **Optional Array** | `Optional[List[T]]` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
139
|
-
| **Map/Dict** | `Dict[K, V]` | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
140
|
-
| **Struct/Record** | `dataclass` |
|
|
141
|
-
| **Nested Arrays** | `List[List[T]]` | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
134
|
+
| Data Type | Python Type | BigQuery | Athena | Redshift | Trino | Snowflake | DuckDB |
|
|
135
|
+
|-----------|-------------|----------|--------|----------|-------|-----------|--------|
|
|
136
|
+
| **String Array** | `List[str]` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
137
|
+
| **Integer Array** | `List[int]` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
138
|
+
| **Decimal Array** | `List[Decimal]` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
139
|
+
| **Optional Array** | `Optional[List[T]]` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
140
|
+
| **Map/Dict** | `Dict[K, V]` | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
141
|
+
| **Struct/Record** | `dataclass` | ✅ | ✅ | ❌ | ✅ | ❌ | ✅ |
|
|
142
|
+
| **Nested Arrays** | `List[List[T]]` | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
|
142
143
|
|
|
143
144
|
### Database-Specific Notes
|
|
144
145
|
|
|
145
|
-
- **BigQuery**: NULL arrays become empty arrays `[]`; uses scientific notation for large decimals; dict/map types stored as JSON strings; struct types
|
|
146
|
+
- **BigQuery**: NULL arrays become empty arrays `[]`; uses scientific notation for large decimals; dict/map types stored as JSON strings; struct types supported using `STRUCT` syntax with named fields (dataclasses and Pydantic models)
|
|
146
147
|
- **Athena**: 256KB query size limit; supports arrays and maps using `ARRAY[]` and `MAP(ARRAY[], ARRAY[])` syntax; supports struct types using `ROW` with named fields (dataclasses and Pydantic models)
|
|
147
148
|
- **Redshift**: Arrays and maps implemented via SUPER type (JSON parsing); 16MB query size limit; struct types not yet supported
|
|
148
149
|
- **Trino**: Memory catalog for testing; excellent decimal precision; supports arrays, maps, and struct types using `ROW` with named fields (dataclasses and Pydantic models)
|
|
149
150
|
- **Snowflake**: Column names normalized to lowercase; 1MB query size limit; dict/map types implemented via VARIANT type (JSON parsing); struct types not yet supported
|
|
151
|
+
- **DuckDB**: Fast embedded analytics database; excellent SQL standards compliance; supports arrays, maps, and struct types using `STRUCT` syntax with named fields (dataclasses and Pydantic models)
|
|
150
152
|
|
|
151
153
|
## Execution Modes Support
|
|
152
154
|
|
|
153
155
|
The library supports two execution modes for mock data injection. **CTE Mode is the default** and is automatically used unless Physical Tables mode is explicitly requested or required due to query size limits.
|
|
154
156
|
|
|
155
|
-
| Execution Mode | Description | BigQuery | Athena | Redshift | Trino | Snowflake |
|
|
156
|
-
|
|
157
|
-
| **CTE Mode** | Mock data injected as Common Table Expressions | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
158
|
-
| **Physical Tables** | Mock data created as temporary tables | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
157
|
+
| Execution Mode | Description | BigQuery | Athena | Redshift | Trino | Snowflake | DuckDB |
|
|
158
|
+
|----------------|-------------|----------|--------|----------|-------|-----------|--------|
|
|
159
|
+
| **CTE Mode** | Mock data injected as Common Table Expressions | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
160
|
+
| **Physical Tables** | Mock data created as temporary tables | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
|
159
161
|
|
|
160
162
|
### Execution Mode Details
|
|
161
163
|
|
|
@@ -179,14 +181,16 @@ The library supports two execution modes for mock data injection. **CTE Mode is
|
|
|
179
181
|
| **Redshift** | Temporary tables | Session-specific temp schema | Database automatic | Session end |
|
|
180
182
|
| **Trino** | Memory tables | `memory.default` schema | Library executes `DROP TABLE` | After each test |
|
|
181
183
|
| **Snowflake** | Temporary tables | Session-specific temp schema | Database automatic | Session end |
|
|
184
|
+
| **DuckDB** | Temporary tables | Database-specific temp schema | Library executes `DROP TABLE` | After each test |
|
|
182
185
|
|
|
183
186
|
#### **Cleanup Behavior Explained**
|
|
184
187
|
|
|
185
|
-
**Library-Managed Cleanup (BigQuery, Athena, Trino):**
|
|
188
|
+
**Library-Managed Cleanup (BigQuery, Athena, Trino, DuckDB):**
|
|
186
189
|
- The SQL Testing Library explicitly calls cleanup methods after each test
|
|
187
190
|
- **BigQuery**: Creates standard tables in your dataset, then deletes them via `client.delete_table()`
|
|
188
191
|
- **Athena**: Creates external tables backed by S3 data, then drops table metadata via `DROP TABLE IF EXISTS` (⚠️ **S3 data files remain and require separate cleanup**)
|
|
189
192
|
- **Trino**: Creates tables in memory catalog, then drops them via `DROP TABLE IF EXISTS`
|
|
193
|
+
- **DuckDB**: Creates temporary tables in the database, then drops them via `DROP TABLE IF EXISTS`
|
|
190
194
|
|
|
191
195
|
**Database-Managed Cleanup (Redshift, Snowflake):**
|
|
192
196
|
- These databases have built-in temporary table mechanisms
|
|
@@ -211,7 +215,7 @@ A: Trino's memory catalog doesn't automatically clean up tables when sessions en
|
|
|
211
215
|
A: BigQuery tables created by the library are **standard tables without TTL** - they persist until explicitly deleted. The library immediately calls `client.delete_table()` after each test. If you want to set TTL as a safety net, you can configure it at the dataset level (e.g., 24 hours) to auto-delete any orphaned tables.
|
|
212
216
|
|
|
213
217
|
**Q: Which databases leave artifacts if tests crash?**
|
|
214
|
-
- **BigQuery, Athena, Trino**: May leave tables if library crashes before cleanup
|
|
218
|
+
- **BigQuery, Athena, Trino, DuckDB**: May leave tables if library crashes before cleanup
|
|
215
219
|
- **Redshift, Snowflake**: No artifacts - temporary tables auto-cleanup on session end
|
|
216
220
|
|
|
217
221
|
**Q: How to manually clean up orphaned tables?**
|
|
@@ -227,6 +231,10 @@ DROP TABLE temp_table_name;
|
|
|
227
231
|
-- Trino: List and drop tables with temp prefix
|
|
228
232
|
SHOW TABLES FROM memory.default LIKE 'temp_%';
|
|
229
233
|
DROP TABLE memory.default.temp_table_name;
|
|
234
|
+
|
|
235
|
+
-- DuckDB: List and drop tables with temp prefix
|
|
236
|
+
SHOW TABLES;
|
|
237
|
+
DROP TABLE temp_table_name;
|
|
230
238
|
```
|
|
231
239
|
|
|
232
240
|
**Q: How to handle S3 cleanup for Athena tables?**
|
|
@@ -277,6 +285,7 @@ aws s3api list-objects-v2 --bucket your-athena-results-bucket --prefix "temp_" \
|
|
|
277
285
|
| **Redshift** | 16MB | Automatically switches at 16MB |
|
|
278
286
|
| **Trino** | 16MB (estimated) | Large dataset or complex CTEs |
|
|
279
287
|
| **Snowflake** | 1MB | Automatically switches at 1MB |
|
|
288
|
+
| **DuckDB** | 32MB (estimated) | Large dataset or complex CTEs |
|
|
280
289
|
|
|
281
290
|
### How to Control Execution Mode
|
|
282
291
|
|
|
@@ -301,13 +310,85 @@ def test_physical_tables():
|
|
|
301
310
|
query="SELECT * FROM table",
|
|
302
311
|
use_physical_tables=True # Force physical tables
|
|
303
312
|
)
|
|
313
|
+
|
|
314
|
+
# Physical Tables with Custom Parallel Settings
|
|
315
|
+
@sql_test(
|
|
316
|
+
mock_tables=[...],
|
|
317
|
+
result_class=ResultClass,
|
|
318
|
+
use_physical_tables=True,
|
|
319
|
+
max_workers=4 # Customize parallel execution
|
|
320
|
+
)
|
|
321
|
+
def test_with_custom_parallelism():
|
|
322
|
+
return TestCase(query="SELECT * FROM table")
|
|
304
323
|
```
|
|
305
324
|
|
|
306
325
|
**Notes:**
|
|
307
326
|
- **CTE Mode**: Default mode, works with all database engines, suitable for most use cases
|
|
308
327
|
- **Physical Tables**: Used automatically when CTE queries exceed database size limits or when explicitly requested
|
|
328
|
+
- **Parallel Table Creation**: When using physical tables with multiple mock tables, they are created in parallel by default for better performance
|
|
309
329
|
- **Snowflake**: Full support for both CTE and physical table modes
|
|
310
330
|
|
|
331
|
+
### Performance Optimization: Parallel Table Operations
|
|
332
|
+
|
|
333
|
+
When using `use_physical_tables=True` with multiple mock tables, the library can create and cleanup tables in parallel for better performance.
|
|
334
|
+
|
|
335
|
+
#### Parallel Table Creation
|
|
336
|
+
|
|
337
|
+
**Default Behavior:**
|
|
338
|
+
- Parallel creation is **enabled by default** when using physical tables
|
|
339
|
+
- Smart worker allocation based on table count:
|
|
340
|
+
- 1-2 tables: Same number of workers as tables
|
|
341
|
+
- 3-5 tables: 3 workers
|
|
342
|
+
- 6-10 tables: 5 workers
|
|
343
|
+
- 11+ tables: 8 workers (capped)
|
|
344
|
+
|
|
345
|
+
**Customization:**
|
|
346
|
+
```python
|
|
347
|
+
# Disable parallel creation
|
|
348
|
+
@sql_test(use_physical_tables=True, parallel_table_creation=False)
|
|
349
|
+
|
|
350
|
+
# Custom worker count
|
|
351
|
+
@sql_test(use_physical_tables=True, max_workers=2)
|
|
352
|
+
|
|
353
|
+
# In SQLTestCase directly
|
|
354
|
+
TestCase(
|
|
355
|
+
query="...",
|
|
356
|
+
use_physical_tables=True,
|
|
357
|
+
parallel_table_creation=True, # Default
|
|
358
|
+
max_workers=4 # Custom worker limit
|
|
359
|
+
)
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
#### Parallel Table Cleanup
|
|
363
|
+
|
|
364
|
+
**Default Behavior:**
|
|
365
|
+
- Parallel cleanup is **enabled by default** when using physical tables
|
|
366
|
+
- Uses the same smart worker allocation as table creation
|
|
367
|
+
- Cleanup errors are logged as warnings (best-effort cleanup)
|
|
368
|
+
|
|
369
|
+
**Customization:**
|
|
370
|
+
```python
|
|
371
|
+
# Disable parallel cleanup
|
|
372
|
+
@sql_test(use_physical_tables=True, parallel_table_cleanup=False)
|
|
373
|
+
|
|
374
|
+
# Custom worker count for both creation and cleanup
|
|
375
|
+
@sql_test(use_physical_tables=True, max_workers=2)
|
|
376
|
+
|
|
377
|
+
# In SQLTestCase directly
|
|
378
|
+
TestCase(
|
|
379
|
+
query="...",
|
|
380
|
+
use_physical_tables=True,
|
|
381
|
+
parallel_table_creation=True, # Default
|
|
382
|
+
parallel_table_cleanup=True, # Default
|
|
383
|
+
max_workers=4 # Custom worker limit for both operations
|
|
384
|
+
)
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
**Performance Benefits:**
|
|
388
|
+
- Both table creation and cleanup operations are parallelized when multiple tables are involved
|
|
389
|
+
- Significantly reduces test execution time for tests with many mock tables
|
|
390
|
+
- Particularly beneficial for cloud databases where network latency is a factor
|
|
391
|
+
|
|
311
392
|
## Installation
|
|
312
393
|
|
|
313
394
|
### For End Users (pip)
|
|
@@ -327,6 +408,9 @@ pip install sql-testing-library[trino]
|
|
|
327
408
|
# Install with Snowflake support
|
|
328
409
|
pip install sql-testing-library[snowflake]
|
|
329
410
|
|
|
411
|
+
# Install with DuckDB support
|
|
412
|
+
pip install sql-testing-library[duckdb]
|
|
413
|
+
|
|
330
414
|
# Or install with all database adapters
|
|
331
415
|
pip install sql-testing-library[all]
|
|
332
416
|
```
|
|
@@ -342,9 +426,10 @@ poetry install --with athena
|
|
|
342
426
|
poetry install --with redshift
|
|
343
427
|
poetry install --with trino
|
|
344
428
|
poetry install --with snowflake
|
|
429
|
+
poetry install --with duckdb
|
|
345
430
|
|
|
346
431
|
# Install with all database adapters and dev tools
|
|
347
|
-
poetry install --with bigquery,athena,redshift,trino,snowflake,dev
|
|
432
|
+
poetry install --with bigquery,athena,redshift,trino,snowflake,duckdb,dev
|
|
348
433
|
```
|
|
349
434
|
|
|
350
435
|
## Quick Start
|
|
@@ -353,7 +438,7 @@ poetry install --with bigquery,athena,redshift,trino,snowflake,dev
|
|
|
353
438
|
|
|
354
439
|
```ini
|
|
355
440
|
[sql_testing]
|
|
356
|
-
adapter = bigquery # Use 'bigquery', 'athena', 'redshift', 'trino', or '
|
|
441
|
+
adapter = bigquery # Use 'bigquery', 'athena', 'redshift', 'trino', 'snowflake', or 'duckdb'
|
|
357
442
|
|
|
358
443
|
# BigQuery configuration
|
|
359
444
|
[sql_testing.bigquery]
|
|
@@ -411,6 +496,10 @@ credentials_path = <path to credentials json>
|
|
|
411
496
|
#
|
|
412
497
|
# # Option 2: Password authentication (for accounts without MFA)
|
|
413
498
|
# password = <snowflake_password>
|
|
499
|
+
|
|
500
|
+
# DuckDB configuration
|
|
501
|
+
# [sql_testing.duckdb]
|
|
502
|
+
# database = <path/to/database.duckdb> # Optional: defaults to in-memory database
|
|
414
503
|
```
|
|
415
504
|
|
|
416
505
|
### Database Context Understanding
|
|
@@ -424,6 +513,7 @@ Each database adapter uses a different concept for organizing tables and queries
|
|
|
424
513
|
| **Redshift** | `{database}` | database only | `"test_db"` | `SELECT * FROM test_db.orders` |
|
|
425
514
|
| **Snowflake** | `{database}.{schema}` | database + schema | `"test_db.public"` | `SELECT * FROM test_db.public.products` |
|
|
426
515
|
| **Trino** | `{catalog}.{schema}` | catalog + schema | `"memory.default"` | `SELECT * FROM memory.default.inventory` |
|
|
516
|
+
| **DuckDB** | `{database}` | database only | `"test_db"` | `SELECT * FROM test_db.analytics` |
|
|
427
517
|
|
|
428
518
|
#### Key Points:
|
|
429
519
|
|
|
@@ -496,6 +586,14 @@ class ProductsMockTable(BaseMockTable):
|
|
|
496
586
|
|
|
497
587
|
def get_table_name(self) -> str:
|
|
498
588
|
return "products"
|
|
589
|
+
|
|
590
|
+
# DuckDB Mock Table
|
|
591
|
+
class AnalyticsMockTable(BaseMockTable):
|
|
592
|
+
def get_database_name(self) -> str:
|
|
593
|
+
return "test_db" # database only
|
|
594
|
+
|
|
595
|
+
def get_table_name(self) -> str:
|
|
596
|
+
return "analytics"
|
|
499
597
|
```
|
|
500
598
|
|
|
501
599
|
2. **Write a test** using one of the flexible patterns:
|
|
@@ -572,9 +670,9 @@ def test_pattern_3():
|
|
|
572
670
|
)
|
|
573
671
|
```
|
|
574
672
|
|
|
575
|
-
### Working with Struct Types (Athena and
|
|
673
|
+
### Working with Struct Types (Athena, Trino, and BigQuery)
|
|
576
674
|
|
|
577
|
-
The library supports struct/record types using Python dataclasses or Pydantic models for Athena and
|
|
675
|
+
The library supports struct/record types using Python dataclasses or Pydantic models for Athena, Trino, and BigQuery:
|
|
578
676
|
|
|
579
677
|
```python
|
|
580
678
|
from dataclasses import dataclass
|
|
@@ -621,7 +719,7 @@ class EmployeesMockTable(BaseMockTable):
|
|
|
621
719
|
|
|
622
720
|
# Test with struct types
|
|
623
721
|
@sql_test(
|
|
624
|
-
adapter_type="athena", # or "trino"
|
|
722
|
+
adapter_type="athena", # or "trino", "bigquery", or "duckdb"
|
|
625
723
|
mock_tables=[
|
|
626
724
|
EmployeesMockTable([
|
|
627
725
|
Employee(
|
|
@@ -667,7 +765,7 @@ def test_struct_with_dot_notation():
|
|
|
667
765
|
|
|
668
766
|
# You can also query entire structs
|
|
669
767
|
@sql_test(
|
|
670
|
-
adapter_type="trino",
|
|
768
|
+
adapter_type="trino", # or "athena", "bigquery", or "duckdb"
|
|
671
769
|
mock_tables=[EmployeesMockTable([...])],
|
|
672
770
|
result_class=dict # Returns full struct as dict
|
|
673
771
|
)
|
|
@@ -915,9 +1013,21 @@ def test_snowflake_query():
|
|
|
915
1013
|
query="SELECT user_id, name FROM users WHERE user_id = 1",
|
|
916
1014
|
default_namespace="test_db"
|
|
917
1015
|
)
|
|
1016
|
+
|
|
1017
|
+
# Use DuckDB adapter for this test
|
|
1018
|
+
@sql_test(
|
|
1019
|
+
adapter_type="duckdb",
|
|
1020
|
+
mock_tables=[...],
|
|
1021
|
+
result_class=UserResult
|
|
1022
|
+
)
|
|
1023
|
+
def test_duckdb_query():
|
|
1024
|
+
return TestCase(
|
|
1025
|
+
query="SELECT user_id, name FROM users WHERE user_id = 1",
|
|
1026
|
+
default_namespace="test_db"
|
|
1027
|
+
)
|
|
918
1028
|
```
|
|
919
1029
|
|
|
920
|
-
The adapter_type parameter will use the configuration from the corresponding section in pytest.ini, such as `[sql_testing.bigquery]`, `[sql_testing.athena]`, `[sql_testing.redshift]`, `[sql_testing.trino]`, or `[sql_testing.
|
|
1030
|
+
The adapter_type parameter will use the configuration from the corresponding section in pytest.ini, such as `[sql_testing.bigquery]`, `[sql_testing.athena]`, `[sql_testing.redshift]`, `[sql_testing.trino]`, `[sql_testing.snowflake]`, or `[sql_testing.duckdb]`.
|
|
921
1031
|
|
|
922
1032
|
**Default Adapter Behavior:**
|
|
923
1033
|
- If `adapter_type` is not specified in the test, the library uses the adapter from `[sql_testing]` section's `adapter` setting
|
|
@@ -960,6 +1070,14 @@ The adapter_type parameter will use the configuration from the corresponding sec
|
|
|
960
1070
|
- Supports authentication via username and password
|
|
961
1071
|
- Optional support for warehouse, role, and schema specification
|
|
962
1072
|
|
|
1073
|
+
#### DuckDB Adapter
|
|
1074
|
+
- Supports DuckDB embedded analytical database
|
|
1075
|
+
- Uses CTAS (CREATE TABLE AS SELECT) for efficient temporary table creation
|
|
1076
|
+
- Fast local database with excellent SQL standards compliance
|
|
1077
|
+
- Supports both file-based and in-memory databases
|
|
1078
|
+
- No authentication required - perfect for local development and testing
|
|
1079
|
+
- Excellent performance for analytical workloads
|
|
1080
|
+
|
|
963
1081
|
**Default Behavior:**
|
|
964
1082
|
- If adapter_type is not specified in the TestCase or decorator, the library will use the adapter specified in the `[sql_testing]` section's `adapter` setting.
|
|
965
1083
|
- If no adapter is specified in the `[sql_testing]` section, it defaults to "bigquery".
|
|
@@ -1168,10 +1286,53 @@ The library automatically:
|
|
|
1168
1286
|
|
|
1169
1287
|
For detailed usage and configuration options, see the example files included.
|
|
1170
1288
|
|
|
1289
|
+
## Integration with Mocksmith
|
|
1290
|
+
|
|
1291
|
+
SQL Testing Library works seamlessly with [Mocksmith](https://github.com/gurmeetsaran/mocksmith) for automatic test data generation. Mocksmith can reduce your test setup code by ~70% while providing more realistic test data.
|
|
1292
|
+
|
|
1293
|
+
Install mocksmith with: `pip install mocksmith[mock,pydantic]`
|
|
1294
|
+
|
|
1295
|
+
### Quick Example
|
|
1296
|
+
|
|
1297
|
+
```python
|
|
1298
|
+
# Without Mocksmith - Manual data creation
|
|
1299
|
+
customers = []
|
|
1300
|
+
for i in range(100):
|
|
1301
|
+
customers.append(Customer(
|
|
1302
|
+
id=i + 1,
|
|
1303
|
+
name=f"Customer {i + 1}",
|
|
1304
|
+
email=f"customer{i + 1}@test.com",
|
|
1305
|
+
balance=Decimal(str(random.uniform(0, 10000)))
|
|
1306
|
+
))
|
|
1307
|
+
|
|
1308
|
+
# With Mocksmith - Automatic realistic data
|
|
1309
|
+
from mocksmith import mockable, Varchar, Integer, Money
|
|
1310
|
+
|
|
1311
|
+
@mockable
|
|
1312
|
+
@dataclass
|
|
1313
|
+
class Customer:
|
|
1314
|
+
id: Integer()
|
|
1315
|
+
name: Varchar(100)
|
|
1316
|
+
email: Varchar(255)
|
|
1317
|
+
balance: Money()
|
|
1318
|
+
|
|
1319
|
+
customers = [Customer.mock() for _ in range(100)]
|
|
1320
|
+
```
|
|
1321
|
+
|
|
1322
|
+
See the [Mocksmith Integration Guide](docs/mocksmith_integration.md) and [examples](examples/mocksmith_integration_example.py) for detailed usage patterns.
|
|
1323
|
+
|
|
1171
1324
|
## Known Limitations and TODOs
|
|
1172
1325
|
|
|
1173
1326
|
The library has a few known limitations that are planned to be addressed in future updates:
|
|
1174
1327
|
|
|
1328
|
+
### Struct Type Support
|
|
1329
|
+
- **Redshift**: Struct types are not supported due to lack of native struct/record types (uses SUPER type for JSON)
|
|
1330
|
+
- **Snowflake**: Struct types are not supported due to lack of native struct/record types (uses VARIANT type for JSON)
|
|
1331
|
+
|
|
1332
|
+
|
|
1333
|
+
### Database-Specific Limitations
|
|
1334
|
+
- **BigQuery**: Does not support nested arrays (arrays of arrays). This is a BigQuery database limitation, not a library limitation. (See TODO in `test_struct_types_integration.py:test_nested_lists`)
|
|
1335
|
+
|
|
1175
1336
|
### General Improvements
|
|
1176
1337
|
- Add support for more SQL dialects
|
|
1177
1338
|
- Improve error handling for malformed SQL
|
|
@@ -1188,4 +1349,5 @@ The library has a few known limitations that are planned to be addressed in futu
|
|
|
1188
1349
|
- psycopg2-binary for Redshift
|
|
1189
1350
|
- trino for Trino
|
|
1190
1351
|
- snowflake-connector-python for Snowflake
|
|
1352
|
+
- duckdb for DuckDB
|
|
1191
1353
|
|