sql-decomposer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 sql_decomposer contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,116 @@
1
+ Metadata-Version: 2.4
2
+ Name: sql-decomposer
3
+ Version: 0.1.0
4
+ Summary: Extract repeated SQL subqueries into temporary tables using sqlglot.
5
+ Author: sql_decomposer contributors
6
+ License-Expression: MIT
7
+ Keywords: sql,sqlglot,decomposer,query-optimization
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Database
16
+ Requires-Python: >=3.9
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: sqlglot>=23.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: build>=1.2.2; extra == "dev"
22
+ Requires-Dist: pytest>=8.0; extra == "dev"
23
+ Requires-Dist: twine>=5.0; extra == "dev"
24
+ Dynamic: license-file
25
+
26
+ # sql_decomposer
27
+
28
+ `sql_decomposer` extracts repeated SQL subqueries into temporary tables.
29
+ It can help reduce duplication in large analytical queries and produce a more readable SQL script.
30
+
31
+ ## Features
32
+
33
+ - Parses SQL safely with `sqlglot`.
34
+ - Detects repeated `SELECT` subqueries.
35
+ - Rewrites repeated blocks to `SELECT * FROM <temp_table>`.
36
+ - Provides a Python API and a CLI.
37
+
38
+ ## Installation
39
+
40
+ From source:
41
+
42
+ ```bash
43
+ pip install .
44
+ ```
45
+
46
+ From PyPI (after release):
47
+
48
+ ```bash
49
+ pip install sql-decomposer
50
+ ```
51
+
52
+ ## CLI usage
53
+
54
+ ```bash
55
+ python -m sql_decomposer input.sql output.sql --min-count 2 --temp-prefix __temp
56
+ ```
57
+
58
+ Or with console script:
59
+
60
+ ```bash
61
+ sql-decomposer input.sql output.sql
62
+ ```
63
+
64
+ Options:
65
+
66
+ - `--dialect`: optional sqlglot dialect (`postgres`, `mysql`, etc.)
67
+ - `--min-count`: minimum repetition count to extract (default: `2`)
68
+ - `--temp-prefix`: generated temp table prefix (default: `__temp`)
69
+
70
+ ## Python API
71
+
72
+ ```python
73
+ from sql_decomposer import decompose_sql
74
+
75
+ sql = "SELECT * FROM (SELECT id FROM users) t1 JOIN (SELECT id FROM users) t2 ON t1.id=t2.id"
76
+ result = decompose_sql(sql, min_count=2, temp_prefix="tmp")
77
+ print(result)
78
+ ```
79
+
80
+ ## Development
81
+
82
+ Install dev dependencies:
83
+
84
+ ```bash
85
+ pip install -e ".[dev]"
86
+ ```
87
+
88
+ Run tests:
89
+
90
+ ```bash
91
+ pytest
92
+ ```
93
+
94
+ Build artifacts:
95
+
96
+ ```bash
97
+ python -m build
98
+ ```
99
+
100
+ Validate package metadata:
101
+
102
+ ```bash
103
+ twine check dist/*
104
+ ```
105
+
106
+ ## GitHub and PyPI release checklist
107
+
108
+ 1. Create repository named `sql_decomposer` on GitHub.
109
+ 2. Push code and enable Actions.
110
+ 3. Create a PyPI project `sql-decomposer`.
111
+ 4. Add `PYPI_API_TOKEN` as a GitHub Actions secret.
112
+ 5. Tag a release (`v0.1.0`) to trigger publish workflow.
113
+
114
+ ## License
115
+
116
+ MIT License. See `LICENSE`.
@@ -0,0 +1,91 @@
1
+ # sql_decomposer
2
+
3
+ `sql_decomposer` extracts repeated SQL subqueries into temporary tables.
4
+ It can help reduce duplication in large analytical queries and produce a more readable SQL script.
5
+
6
+ ## Features
7
+
8
+ - Parses SQL safely with `sqlglot`.
9
+ - Detects repeated `SELECT` subqueries.
10
+ - Rewrites repeated blocks to `SELECT * FROM <temp_table>`.
11
+ - Provides a Python API and a CLI.
12
+
13
+ ## Installation
14
+
15
+ From source:
16
+
17
+ ```bash
18
+ pip install .
19
+ ```
20
+
21
+ From PyPI (after release):
22
+
23
+ ```bash
24
+ pip install sql-decomposer
25
+ ```
26
+
27
+ ## CLI usage
28
+
29
+ ```bash
30
+ python -m sql_decomposer input.sql output.sql --min-count 2 --temp-prefix __temp
31
+ ```
32
+
33
+ Or with console script:
34
+
35
+ ```bash
36
+ sql-decomposer input.sql output.sql
37
+ ```
38
+
39
+ Options:
40
+
41
+ - `--dialect`: optional sqlglot dialect (`postgres`, `mysql`, etc.)
42
+ - `--min-count`: minimum repetition count to extract (default: `2`)
43
+ - `--temp-prefix`: generated temp table prefix (default: `__temp`)
44
+
45
+ ## Python API
46
+
47
+ ```python
48
+ from sql_decomposer import decompose_sql
49
+
50
+ sql = "SELECT * FROM (SELECT id FROM users) t1 JOIN (SELECT id FROM users) t2 ON t1.id=t2.id"
51
+ result = decompose_sql(sql, min_count=2, temp_prefix="tmp")
52
+ print(result)
53
+ ```
54
+
55
+ ## Development
56
+
57
+ Install dev dependencies:
58
+
59
+ ```bash
60
+ pip install -e ".[dev]"
61
+ ```
62
+
63
+ Run tests:
64
+
65
+ ```bash
66
+ pytest
67
+ ```
68
+
69
+ Build artifacts:
70
+
71
+ ```bash
72
+ python -m build
73
+ ```
74
+
75
+ Validate package metadata:
76
+
77
+ ```bash
78
+ twine check dist/*
79
+ ```
80
+
81
+ ## GitHub and PyPI release checklist
82
+
83
+ 1. Create repository named `sql_decomposer` on GitHub.
84
+ 2. Push code and enable Actions.
85
+ 3. Create a PyPI project `sql-decomposer`.
86
+ 4. Add `PYPI_API_TOKEN` as a GitHub Actions secret.
87
+ 5. Tag a release (`v0.1.0`) to trigger publish workflow.
88
+
89
+ ## License
90
+
91
+ MIT License. See `LICENSE`.
@@ -0,0 +1,46 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "sql-decomposer"
7
+ version = "0.1.0"
8
+ description = "Extract repeated SQL subqueries into temporary tables using sqlglot."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = "MIT"
12
+ authors = [
13
+ { name = "sql_decomposer contributors" }
14
+ ]
15
+ keywords = ["sql", "sqlglot", "decomposer", "query-optimization"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "Intended Audience :: Developers",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.9",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Topic :: Database",
25
+ ]
26
+ dependencies = [
27
+ "sqlglot>=23.0",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ dev = [
32
+ "build>=1.2.2",
33
+ "pytest>=8.0",
34
+ "twine>=5.0",
35
+ ]
36
+
37
+ [project.scripts]
38
+ sql-decomposer = "sql_decomposer.__main__:main"
39
+
40
+ [tool.setuptools.packages.find]
41
+ where = ["."]
42
+ include = ["sql_decomposer*"]
43
+
44
+ [tool.pytest.ini_options]
45
+ testpaths = ["tests"]
46
+ python_files = ["test_package.py"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,4 @@
1
+ from sql_decomposer.decomposer import decompose_sql
2
+
3
+ __version__ = "0.1.0"
4
+ __all__ = ["decompose_sql"]
@@ -0,0 +1,56 @@
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+ from sql_decomposer.decomposer import decompose_sql
5
+
6
+
7
+ def build_parser() -> argparse.ArgumentParser:
8
+ parser = argparse.ArgumentParser(
9
+ prog="sql_decomposer",
10
+ description="Decompose repeated SQL subqueries and save result to file.",
11
+ )
12
+ parser.add_argument("input", type=Path, help="Path to input .sql file")
13
+ parser.add_argument("output", type=Path, help="Path to output .sql file")
14
+ parser.add_argument(
15
+ "--dialect",
16
+ default=None,
17
+ help='Optional sqlglot dialect (e.g. "postgres", "mysql")',
18
+ )
19
+ parser.add_argument(
20
+ "--min-count",
21
+ type=int,
22
+ default=2,
23
+ help="Minimum repetition count for extraction (default: 2)",
24
+ )
25
+ parser.add_argument(
26
+ "--temp-prefix",
27
+ default="__temp",
28
+ help='Prefix for generated temp tables (default: "__temp")',
29
+ )
30
+ return parser
31
+
32
+
33
+ def main() -> int:
34
+ parser = build_parser()
35
+ args = parser.parse_args()
36
+
37
+ if not args.input.exists():
38
+ parser.error(f"Input file does not exist: {args.input}")
39
+ if not args.input.is_file():
40
+ parser.error(f"Input path is not a file: {args.input}")
41
+
42
+ sql_text = args.input.read_text(encoding="utf-8")
43
+ decomposed = decompose_sql(
44
+ sql=sql_text,
45
+ dialect=args.dialect,
46
+ min_count=args.min_count,
47
+ temp_prefix=args.temp_prefix,
48
+ )
49
+
50
+ args.output.parent.mkdir(parents=True, exist_ok=True)
51
+ args.output.write_text(decomposed, encoding="utf-8")
52
+ return 0
53
+
54
+
55
+ if __name__ == "__main__":
56
+ raise SystemExit(main())
@@ -0,0 +1,156 @@
1
+ """
2
+ SQL Decomposer — extracts repeated subqueries from a SQL statement
3
+ into temporary tables, reducing duplication.
4
+ """
5
+
6
+ import sqlglot
7
+ from sqlglot import exp
8
+ from typing import Optional
9
+
10
+
11
+ def _collect_sql_frequencies(
12
+ ast: exp.Expression, dialect: Optional[str]
13
+ ) -> tuple[dict[str, int], set[str]]:
14
+ """Collect SQL string frequencies and SELECT-only SQL strings."""
15
+ sql_counter: dict[str, int] = {}
16
+ select_sqls: set[str] = set()
17
+
18
+ for walked in ast.walk():
19
+ node = walked[0] if isinstance(walked, tuple) else walked
20
+ node_sql = node.sql(dialect=dialect)
21
+ if not node_sql:
22
+ continue
23
+
24
+ sql_counter[node_sql] = sql_counter.get(node_sql, 0) + 1
25
+ if isinstance(node, exp.Select):
26
+ select_sqls.add(node_sql)
27
+
28
+ return sql_counter, select_sqls
29
+
30
+
31
+ def _build_temp_table_map(
32
+ sql_counter: dict[str, int],
33
+ select_sqls: set[str],
34
+ root_sql: str,
35
+ min_count: int,
36
+ temp_prefix: str,
37
+ ) -> dict[str, str]:
38
+ """
39
+ Build stable mapping: repeated SELECT SQL -> temp table name.
40
+
41
+ Ordering is deterministic:
42
+ 1) higher repetition count first
43
+ 2) longer SQL first (helps nested/repeated patterns)
44
+ """
45
+ sorted_items = sorted(
46
+ sql_counter.items(),
47
+ key=lambda item: (item[1], len(item[0])),
48
+ reverse=True,
49
+ )
50
+
51
+ ordered_candidates = [
52
+ sql_key
53
+ for sql_key, count in sorted_items
54
+ if count >= min_count and sql_key != root_sql and sql_key in select_sqls
55
+ ]
56
+
57
+ temp_table_map: dict[str, str] = {}
58
+ working_sql = root_sql
59
+
60
+ # Keep legacy "longer-first replacement" semantics for candidate filtering:
61
+ # once a larger repeated SELECT is selected, nested candidates it contains
62
+ # may disappear and therefore should not be extracted separately.
63
+ for sql_key in ordered_candidates:
64
+ if sql_key not in working_sql:
65
+ continue
66
+
67
+ temp_name = f"{temp_prefix}_{len(temp_table_map) + 1}"
68
+ temp_table_map[sql_key] = temp_name
69
+ working_sql = working_sql.replace(sql_key, f"SELECT * FROM {temp_name}")
70
+
71
+ return temp_table_map
72
+
73
+
74
+ def _rewrite_selects_with_temp_tables(
75
+ ast: exp.Expression, dialect: Optional[str], temp_table_map: dict[str, str]
76
+ ) -> exp.Expression:
77
+ """Replace repeated SELECT nodes with SELECT * FROM <temp_table>."""
78
+ if not temp_table_map:
79
+ return ast
80
+
81
+ def _replace(node: exp.Expression) -> exp.Expression:
82
+ if isinstance(node, exp.Select):
83
+ node_sql = node.sql(dialect=dialect)
84
+ temp_name = temp_table_map.get(node_sql)
85
+ if temp_name:
86
+ return exp.select("*").from_(temp_name)
87
+ return node
88
+
89
+ return ast.transform(_replace)
90
+
91
+
92
+ def decompose_sql(
93
+ sql: str,
94
+ dialect: Optional[str] = None,
95
+ min_count: int = 2,
96
+ temp_prefix: str = "__temp",
97
+ ) -> str:
98
+ """
99
+ Decompose a SQL query by extracting repeated sub-SELECT expressions
100
+ into CREATE TEMPORARY TABLE statements.
101
+
102
+ Algorithm:
103
+ 1. Parse *sql* into an AST via sqlglot.
104
+ 2. Walk every node; convert each node back to a SQL string and
105
+ count occurrences in a dictionary.
106
+ 3. Regenerate the full SQL from the AST.
107
+ 4. Sort the dictionary by count DESC, then by SQL length DESC
108
+ (longer expressions first — avoids partial-substring collisions).
109
+ 5-6. For each entry that qualifies (is a SELECT, appears ≥ *min_count*
110
+ times, is not the root query), create a temp-table definition and
111
+ replace the subquery text in the working SQL string.
112
+ 7. Append the (possibly rewritten) final SQL.
113
+ 8. Return the ordered list of statements.
114
+
115
+ Args:
116
+ sql: Source SQL string.
117
+ dialect: sqlglot dialect name (e.g. "postgres", "mysql").
118
+ min_count: Minimum number of occurrences for extraction (default 2).
119
+ temp_prefix: Naming prefix for generated temp tables.
120
+
121
+ Returns:
122
+ SQL script as a single string: zero or more CREATE TEMPORARY TABLE
123
+ statements followed by the final (rewritten) query.
124
+ """
125
+ # ── Step 1: parse ────────────────────────────────────────────────
126
+ ast = sqlglot.parse_one(sql, dialect=dialect)
127
+
128
+ # ── Step 2: walk all nodes, count SQL representations ────────────
129
+ sql_counter, select_sqls = _collect_sql_frequencies(ast, dialect)
130
+
131
+ # ── Step 3: full SQL from AST ────────────────────────────────────
132
+ result_sql = ast.sql(dialect=dialect)
133
+
134
+ # ── Step 4: build SELECT->temp table mapping ─────────────────────
135
+ temp_table_map = _build_temp_table_map(
136
+ sql_counter=sql_counter,
137
+ select_sqls=select_sqls,
138
+ root_sql=result_sql,
139
+ min_count=min_count,
140
+ temp_prefix=temp_prefix,
141
+ )
142
+
143
+ # ── Steps 5-6: create temp tables and rewrite AST ────────────────
144
+ statements: list[str] = []
145
+
146
+ for sql_key, temp_name in temp_table_map.items():
147
+ statements.append(f"CREATE TEMPORARY TABLE {temp_name} AS {sql_key}")
148
+
149
+ rewritten_ast = _rewrite_selects_with_temp_tables(ast, dialect, temp_table_map)
150
+ result_sql = rewritten_ast.sql(dialect=dialect)
151
+
152
+ # ── Step 7: append final rewritten query ─────────────────────────
153
+ statements.append(result_sql)
154
+
155
+ # ── Step 8 ───────────────────────────────────────────────────────
156
+ return "\n".join(statements)
@@ -0,0 +1,116 @@
1
+ Metadata-Version: 2.4
2
+ Name: sql-decomposer
3
+ Version: 0.1.0
4
+ Summary: Extract repeated SQL subqueries into temporary tables using sqlglot.
5
+ Author: sql_decomposer contributors
6
+ License-Expression: MIT
7
+ Keywords: sql,sqlglot,decomposer,query-optimization
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.9
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Topic :: Database
16
+ Requires-Python: >=3.9
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: sqlglot>=23.0
20
+ Provides-Extra: dev
21
+ Requires-Dist: build>=1.2.2; extra == "dev"
22
+ Requires-Dist: pytest>=8.0; extra == "dev"
23
+ Requires-Dist: twine>=5.0; extra == "dev"
24
+ Dynamic: license-file
25
+
26
+ # sql_decomposer
27
+
28
+ `sql_decomposer` extracts repeated SQL subqueries into temporary tables.
29
+ It can help reduce duplication in large analytical queries and produce a more readable SQL script.
30
+
31
+ ## Features
32
+
33
+ - Parses SQL safely with `sqlglot`.
34
+ - Detects repeated `SELECT` subqueries.
35
+ - Rewrites repeated blocks to `SELECT * FROM <temp_table>`.
36
+ - Provides a Python API and a CLI.
37
+
38
+ ## Installation
39
+
40
+ From source:
41
+
42
+ ```bash
43
+ pip install .
44
+ ```
45
+
46
+ From PyPI (after release):
47
+
48
+ ```bash
49
+ pip install sql-decomposer
50
+ ```
51
+
52
+ ## CLI usage
53
+
54
+ ```bash
55
+ python -m sql_decomposer input.sql output.sql --min-count 2 --temp-prefix __temp
56
+ ```
57
+
58
+ Or with console script:
59
+
60
+ ```bash
61
+ sql-decomposer input.sql output.sql
62
+ ```
63
+
64
+ Options:
65
+
66
+ - `--dialect`: optional sqlglot dialect (`postgres`, `mysql`, etc.)
67
+ - `--min-count`: minimum repetition count to extract (default: `2`)
68
+ - `--temp-prefix`: generated temp table prefix (default: `__temp`)
69
+
70
+ ## Python API
71
+
72
+ ```python
73
+ from sql_decomposer import decompose_sql
74
+
75
+ sql = "SELECT * FROM (SELECT id FROM users) t1 JOIN (SELECT id FROM users) t2 ON t1.id=t2.id"
76
+ result = decompose_sql(sql, min_count=2, temp_prefix="tmp")
77
+ print(result)
78
+ ```
79
+
80
+ ## Development
81
+
82
+ Install dev dependencies:
83
+
84
+ ```bash
85
+ pip install -e ".[dev]"
86
+ ```
87
+
88
+ Run tests:
89
+
90
+ ```bash
91
+ pytest
92
+ ```
93
+
94
+ Build artifacts:
95
+
96
+ ```bash
97
+ python -m build
98
+ ```
99
+
100
+ Validate package metadata:
101
+
102
+ ```bash
103
+ twine check dist/*
104
+ ```
105
+
106
+ ## GitHub and PyPI release checklist
107
+
108
+ 1. Create repository named `sql_decomposer` on GitHub.
109
+ 2. Push code and enable Actions.
110
+ 3. Create a PyPI project `sql-decomposer`.
111
+ 4. Add `PYPI_API_TOKEN` as a GitHub Actions secret.
112
+ 5. Tag a release (`v0.1.0`) to trigger publish workflow.
113
+
114
+ ## License
115
+
116
+ MIT License. See `LICENSE`.
@@ -0,0 +1,13 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ sql_decomposer/__init__.py
5
+ sql_decomposer/__main__.py
6
+ sql_decomposer/decomposer.py
7
+ sql_decomposer.egg-info/PKG-INFO
8
+ sql_decomposer.egg-info/SOURCES.txt
9
+ sql_decomposer.egg-info/dependency_links.txt
10
+ sql_decomposer.egg-info/entry_points.txt
11
+ sql_decomposer.egg-info/requires.txt
12
+ sql_decomposer.egg-info/top_level.txt
13
+ tests/test_package.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ sql-decomposer = sql_decomposer.__main__:main
@@ -0,0 +1,6 @@
1
+ sqlglot>=23.0
2
+
3
+ [dev]
4
+ build>=1.2.2
5
+ pytest>=8.0
6
+ twine>=5.0
@@ -0,0 +1 @@
1
+ sql_decomposer
@@ -0,0 +1,82 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ from sql_decomposer import decompose_sql
6
+
7
+
8
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
9
+
10
+
11
+ def _read(rel_path: str) -> str:
12
+ return (PROJECT_ROOT / rel_path).read_text(encoding="utf-8").strip()
13
+
14
+
15
+ @pytest.mark.parametrize(
16
+ ("input_fixture", "expected_fixture"),
17
+ [
18
+ (
19
+ "tests/sql_big_analytics_query.sql",
20
+ "decomposed_sql/sql_big_analytics_query.sql",
21
+ ),
22
+ (
23
+ "tests/sql_complex_hr_query.sql",
24
+ "decomposed_sql/sql_complex_hr_query.sql",
25
+ ),
26
+ (
27
+ "tests/sql_financial_report_four_repeats.sql",
28
+ "decomposed_sql/sql_financial_report_four_repeats.sql",
29
+ ),
30
+ (
31
+ "tests/sql_flat_query_no_subqueries.sql",
32
+ "decomposed_sql/sql_flat_query_no_subqueries.sql",
33
+ ),
34
+ ("tests/sql_min_count_one.sql", "decomposed_sql/sql_min_count_one.sql"),
35
+ (
36
+ "tests/sql_nested_repeated_subqueries.sql",
37
+ "decomposed_sql/sql_nested_repeated_subqueries.sql",
38
+ ),
39
+ ("tests/sql_no_repetition.sql", "decomposed_sql/sql_no_repetition.sql"),
40
+ (
41
+ "tests/sql_simple_two_identical_subqueries.sql",
42
+ "decomposed_sql/sql_simple_two_identical_subqueries.sql",
43
+ ),
44
+ (
45
+ "tests/sql_triple_repeated_subquery.sql",
46
+ "decomposed_sql/sql_triple_repeated_subquery.sql",
47
+ ),
48
+ (
49
+ "tests/sql_two_different_repeated_subqueries.sql",
50
+ "decomposed_sql/sql_two_different_repeated_subqueries.sql",
51
+ ),
52
+ ],
53
+ )
54
+ def test_decompose_sql_matches_expected_fixtures(
55
+ input_fixture: str, expected_fixture: str
56
+ ) -> None:
57
+ sql = _read(input_fixture)
58
+ expected = _read(expected_fixture)
59
+ assert decompose_sql(sql) == expected
60
+
61
+
62
+ def test_decompose_sql_does_not_replace_string_literals() -> None:
63
+ sql = """
64
+ SELECT
65
+ 'SELECT id FROM users WHERE active = 1' AS query_text,
66
+ a.id
67
+ FROM (
68
+ SELECT id FROM users WHERE active = 1
69
+ ) AS a
70
+ JOIN (
71
+ SELECT id FROM users WHERE active = 1
72
+ ) AS b ON a.id = b.id
73
+ """
74
+ result = decompose_sql(sql)
75
+
76
+ assert (
77
+ "CREATE TEMPORARY TABLE __temp_1 AS SELECT id FROM users WHERE active = 1"
78
+ in result
79
+ )
80
+ assert "'SELECT id FROM users WHERE active = 1' AS query_text" in result
81
+ assert "(SELECT * FROM __temp_1) AS a" in result
82
+ assert "(SELECT * FROM __temp_1) AS b" in result