sql-assignment-generator 0.0.9__tar.gz → 0.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_assignment_generator-0.0.11/.gitattributes +1 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/Makefile +4 -1
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/PKG-INFO +6 -3
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/pyproject.toml +6 -3
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/requirements.txt +3 -2
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/__init__.py +155 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/dataset/__init__.py +1 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/dataset/dataset.py +145 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/dataset/strings.py +64 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/exercise/__init__.py +1 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/exercise/exercise.py +102 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/exercise/strings.py +72 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/constraints/__init__.py +2 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/base.py +12 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/costraintType.py +19 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/__init__.py +4 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/aggregation.py +83 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/base.py +19 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_from.py +122 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_group_by.py +57 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_having.py +65 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_order_by.py +160 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_select.py +91 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_where.py +798 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/rows.py +65 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/set_operations.py +109 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/subquery.py +98 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/schema/__init__.py +24 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/schema/base.py +28 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/schema/tables.py +205 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/schema/values.py +44 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/__init__.py +136 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/base.py +41 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_002.py +56 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_004.py +53 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_007.py +48 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_009.py +51 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_010.py +35 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_011.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_012.py +36 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_015.py +35 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_019.py +31 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_021.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_026.py +35 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_035.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_039.py +36 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_040.py +55 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_041.py +50 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_042.py +43 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_043.py +35 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_044.py +58 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_045.py +42 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_046.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_049.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_052.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_053.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_054.py +37 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_055.py +36 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_057.py +31 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_058.py +33 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_059.py +33 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_060.py +40 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_062.py +33 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_063.py +39 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_064.py +33 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_066.py +31 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_067.py +31 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_068.py +32 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_069.py +33 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_070.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_071.py +36 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_072.py +40 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_073.py +39 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_074.py +37 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_075.py +42 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_076.py +35 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_077.py +37 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_078.py +46 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_079.py +44 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_080.py +32 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_081.py +31 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_083.py +36 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_084.py +40 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_086.py +33 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_088.py +35 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_089.py +39 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_091.py +32 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_093.py +35 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_095.py +44 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_096.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_097.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_098.py +37 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_099.py +33 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_102.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_104.py +34 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_105.py +36 -0
- sql_assignment_generator-0.0.11/src/sql_assignment_generator/exceptions.py +28 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/test.py +7 -11
- sql_assignment_generator-0.0.11/tests/constraints/query/test_aggregation.py +68 -0
- sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_from.py +150 -0
- sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_group_by.py +74 -0
- sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_having.py +70 -0
- sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_order_by.py +141 -0
- sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_select.py +112 -0
- sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_where.py +543 -0
- sql_assignment_generator-0.0.11/tests/constraints/query/test_rows.py +102 -0
- sql_assignment_generator-0.0.11/tests/constraints/query/test_set_operation.py +108 -0
- sql_assignment_generator-0.0.11/tests/constraints/query/test_subquery.py +136 -0
- sql_assignment_generator-0.0.11/tests/constraints/schema/__init__.py +24 -0
- sql_assignment_generator-0.0.11/tests/constraints/schema/test_tables.py +154 -0
- sql_assignment_generator-0.0.11/tests/constraints/schema/test_values.py +78 -0
- sql_assignment_generator-0.0.9/src/sql_assignment_generator/__init__.py +0 -45
- sql_assignment_generator-0.0.9/src/sql_assignment_generator/assignments/dataset.py +0 -177
- sql_assignment_generator-0.0.9/src/sql_assignment_generator/assignments/exercise.py +0 -133
- sql_assignment_generator-0.0.9/src/sql_assignment_generator/constraints/base.py +0 -26
- sql_assignment_generator-0.0.9/src/sql_assignment_generator/constraints/query.py +0 -678
- sql_assignment_generator-0.0.9/src/sql_assignment_generator/constraints/schema.py +0 -148
- sql_assignment_generator-0.0.9/src/sql_assignment_generator/query_sintax.py +0 -572
- sql_assignment_generator-0.0.9/src/sql_assignment_generator/sql_errors_details.py +0 -1404
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/.env.template +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/.gitignore +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/.readthedocs.yaml +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/LICENSE +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/README.md +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/Makefile +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/conf.py +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/index.rst +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/make.bat +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/requirements.txt +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/assignments/__init__.py +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/assignments/assignment.py +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/difficulty_level.py +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/domains.py +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/llm/__init__.py +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/llm/chatgpt.py +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/llm/message.py +0 -0
- {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/llm/models.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
* text eol=lf
|
|
@@ -36,10 +36,13 @@ build: $(VENV) .env
|
|
|
36
36
|
uninstall: $(VENV)
|
|
37
37
|
$(VENV_BIN)/python -m pip uninstall -y $(NAME)
|
|
38
38
|
|
|
39
|
+
test: install
|
|
40
|
+
$(VENV_BIN)/python -m pytest
|
|
41
|
+
|
|
39
42
|
documentation:
|
|
40
43
|
make html SPHINXBUILD="../$(VENV_BIN)/sphinx-build" -C docs/
|
|
41
44
|
|
|
42
|
-
upload:
|
|
45
|
+
upload: test documentation
|
|
43
46
|
$(VENV_BIN)/python -m pip install --upgrade twine
|
|
44
47
|
$(VENV_BIN)/python -m twine upload --verbose dist/*
|
|
45
48
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sql_assignment_generator
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.11
|
|
4
4
|
Summary: This project generates SQL assignments based on common mistakes made by learners.
|
|
5
5
|
Project-URL: Repository, https://github.com/DavidePonzini/sql_assignment_generator
|
|
6
6
|
Project-URL: Documentation, https://sql-assignment-generator.readthedocs.io/en/latest/index.html
|
|
@@ -11,9 +11,12 @@ Classifier: License :: OSI Approved :: MIT License
|
|
|
11
11
|
Classifier: Operating System :: OS Independent
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
13
|
Requires-Python: >=3.11
|
|
14
|
-
Requires-Dist:
|
|
14
|
+
Requires-Dist: openai
|
|
15
15
|
Requires-Dist: pydantic
|
|
16
|
-
Requires-Dist: sql-error-
|
|
16
|
+
Requires-Dist: sql-error-taxonomy>=1.0.2
|
|
17
|
+
Requires-Dist: sqlglot>=11.5.6
|
|
18
|
+
Requires-Dist: sqlscope
|
|
19
|
+
Requires-Dist: sqlscope>=0.3.5
|
|
17
20
|
Description-Content-Type: text/markdown
|
|
18
21
|
|
|
19
22
|
# sql-assignment-generation
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "sql_assignment_generator"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.11"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name="Davide Ponzini", email="davide.ponzini95@gmail.com" },
|
|
10
10
|
]
|
|
@@ -18,8 +18,11 @@ classifiers = [
|
|
|
18
18
|
]
|
|
19
19
|
dependencies = [
|
|
20
20
|
"pydantic",
|
|
21
|
-
"
|
|
22
|
-
"
|
|
21
|
+
"sqlglot>=11.5.6",
|
|
22
|
+
"sqlscope>=0.3.5",
|
|
23
|
+
"sql-error-taxonomy>=1.0.2",
|
|
24
|
+
"sqlscope",
|
|
25
|
+
"openai",
|
|
23
26
|
]
|
|
24
27
|
|
|
25
28
|
[project.urls]
|
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
dav_tools>=0.4.22
|
|
2
2
|
# progress>=1.6
|
|
3
3
|
pydantic>=2.10.4
|
|
4
|
-
sql-error-
|
|
4
|
+
sql-error-taxonomy>=1.0.2
|
|
5
|
+
sqlscope>=1.0.7
|
|
6
|
+
sqlglot
|
|
5
7
|
dotenv
|
|
6
8
|
openai
|
|
7
9
|
|
|
@@ -12,5 +14,4 @@ ipython
|
|
|
12
14
|
build
|
|
13
15
|
autoapi
|
|
14
16
|
pytest
|
|
15
|
-
pytest-cov
|
|
16
17
|
sphinx-autoapi
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
'''Generate SQL assignments based on specified SQL errors and difficulty levels.'''
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Callable
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
|
+
import threading
|
|
8
|
+
import random
|
|
9
|
+
|
|
10
|
+
from .difficulty_level import DifficultyLevel
|
|
11
|
+
from .domains import random_domain
|
|
12
|
+
from .assignments import Assignment, Dataset, Exercise
|
|
13
|
+
from .constraints import SchemaConstraint, QueryConstraint
|
|
14
|
+
from .error_requirements import SqlErrorRequirements, ERROR_REQUIREMENTS_MAP
|
|
15
|
+
from .exceptions import ExerciseGenerationError
|
|
16
|
+
|
|
17
|
+
import dav_tools
|
|
18
|
+
from sql_error_taxonomy import SqlErrors
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def generate_assignment(
|
|
22
|
+
errors: list[tuple[SqlErrors, DifficultyLevel]],
|
|
23
|
+
domain: str | None = None,
|
|
24
|
+
*,
|
|
25
|
+
shuffle_exercises: bool = False,
|
|
26
|
+
naming_func: Callable[[SqlErrors, DifficultyLevel], str] = lambda error, difficulty: f'{error.name} - {difficulty.name}',
|
|
27
|
+
max_unique_attempts: int = 3,
|
|
28
|
+
max_workers: int | None = None
|
|
29
|
+
) -> Assignment:
|
|
30
|
+
'''
|
|
31
|
+
Generate SQL assignments based on the given SQL errors and their corresponding difficulty levels.
|
|
32
|
+
|
|
33
|
+
- Exercises are returned in the same order as the input `errors`.
|
|
34
|
+
- Logging happens as soon as possible (during generation), and each message uses the exercise title as its id.
|
|
35
|
+
- Deduplication is global across all generated exercises (thread-safe).
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
errors (list[tuple[SqlErrors, DifficultyLevel]]): A list of (error, difficulty) pairs.
|
|
39
|
+
domain (str | None): The domain for the assignments. If None, a random domain will be selected.
|
|
40
|
+
shuffle_exercises (bool): Whether to shuffle exercises to prevent ordering bias (shuffles input order).
|
|
41
|
+
naming_func (Callable[[SqlErrors, DifficultyLevel], str]): Generates exercise titles.
|
|
42
|
+
max_unique_attempts (int): Maximum retries to avoid duplicate solutions per (error, difficulty).
|
|
43
|
+
max_workers (int | None): Thread pool size. If None, uses ThreadPoolExecutor default.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Assignment: The generated assignment (stable order).
|
|
47
|
+
'''
|
|
48
|
+
|
|
49
|
+
dav_tools.messages.info(f'Starting assignment generation for {len(errors)} exercises...')
|
|
50
|
+
|
|
51
|
+
if domain is None:
|
|
52
|
+
domain = random_domain()
|
|
53
|
+
|
|
54
|
+
if shuffle_exercises:
|
|
55
|
+
random.shuffle(errors)
|
|
56
|
+
|
|
57
|
+
# convert SqlErrors -> SqlErrorRequirements, keeping difficulty levels
|
|
58
|
+
requirements: list[tuple[SqlErrors, SqlErrorRequirements, DifficultyLevel]] = []
|
|
59
|
+
for error, difficulty in errors:
|
|
60
|
+
if error not in ERROR_REQUIREMENTS_MAP:
|
|
61
|
+
raise NotImplementedError(f'Error requirements not implemented for error: {error.name}')
|
|
62
|
+
|
|
63
|
+
requirements.append((error, ERROR_REQUIREMENTS_MAP[error], difficulty))
|
|
64
|
+
|
|
65
|
+
# initialize requirements and extra details
|
|
66
|
+
dataset_requirements: list[SchemaConstraint] = []
|
|
67
|
+
for _, req, difficulty in requirements:
|
|
68
|
+
dataset_requirements.extend(req.dataset_constraints(difficulty))
|
|
69
|
+
|
|
70
|
+
dataset_extra_details: list[str] = [
|
|
71
|
+
req.dataset_extra_details()
|
|
72
|
+
for _, req, _ in requirements
|
|
73
|
+
]
|
|
74
|
+
dataset_extra_details = [detail for detail in dataset_extra_details if detail.strip()] # filter out empty details
|
|
75
|
+
dataset_extra_details = list(set(dataset_extra_details)) # deduplicate details
|
|
76
|
+
|
|
77
|
+
dav_tools.messages.info(f'Generating dataset for domain: {domain}')
|
|
78
|
+
dataset = Dataset.generate(domain, dataset_requirements, dataset_extra_details)
|
|
79
|
+
|
|
80
|
+
generated_solutions_hashes: set[str] = set()
|
|
81
|
+
hashes_lock = threading.Lock()
|
|
82
|
+
|
|
83
|
+
# Serialize log output to avoid interleaving (and to keep dav_tools usage thread-safe).
|
|
84
|
+
log_lock = threading.Lock()
|
|
85
|
+
|
|
86
|
+
def _worker(
|
|
87
|
+
idx: int,
|
|
88
|
+
error: SqlErrors,
|
|
89
|
+
difficulty: DifficultyLevel,
|
|
90
|
+
constraints: list[QueryConstraint],
|
|
91
|
+
extra_details: str
|
|
92
|
+
) -> tuple[int, Exercise | None]:
|
|
93
|
+
title = naming_func(error, difficulty)
|
|
94
|
+
|
|
95
|
+
dav_tools.messages.info(f'Starting generation for exercise: {title}')
|
|
96
|
+
|
|
97
|
+
last_generated_exercise: Exercise | None = None
|
|
98
|
+
|
|
99
|
+
for attempt in range(max_unique_attempts):
|
|
100
|
+
try:
|
|
101
|
+
generated_exercise = Exercise.generate(error, difficulty, constraints, extra_details, dataset=dataset, title=title)
|
|
102
|
+
except ExerciseGenerationError:
|
|
103
|
+
with log_lock:
|
|
104
|
+
dav_tools.messages.warning(f'{title}: Skipping exercise generation for {error.name} due to validation failures.')
|
|
105
|
+
return (idx, None)
|
|
106
|
+
|
|
107
|
+
last_generated_exercise = generated_exercise
|
|
108
|
+
raw_solution = generated_exercise.solutions[0]
|
|
109
|
+
normalized_solution = raw_solution.sql.lower().strip()
|
|
110
|
+
|
|
111
|
+
with hashes_lock:
|
|
112
|
+
is_duplicate = normalized_solution in generated_solutions_hashes
|
|
113
|
+
if not is_duplicate:
|
|
114
|
+
generated_solutions_hashes.add(normalized_solution)
|
|
115
|
+
|
|
116
|
+
if is_duplicate:
|
|
117
|
+
with log_lock:
|
|
118
|
+
dav_tools.messages.warning(f'{title}: Duplicate solution detected for {error.name} (Attempt {attempt + 1}/{max_unique_attempts}). Regenerating...')
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
return (idx, generated_exercise)
|
|
122
|
+
|
|
123
|
+
if last_generated_exercise is not None:
|
|
124
|
+
with log_lock:
|
|
125
|
+
dav_tools.messages.error(f'{title}: Could not generate a UNIQUE exercise for {error.name} after {max_unique_attempts} retries. Skipping.')
|
|
126
|
+
return (idx, None)
|
|
127
|
+
|
|
128
|
+
# Pre-allocate so we can preserve ordering no matter completion order.
|
|
129
|
+
ordered_results: list[Exercise | None] = [None] * len(errors)
|
|
130
|
+
|
|
131
|
+
if max_workers == 1:
|
|
132
|
+
for idx, (error, requirement, difficulty) in enumerate(requirements):
|
|
133
|
+
i, ex = _worker(idx, error, difficulty, requirement.exercise_constraints(difficulty), requirement.exercise_extra_details())
|
|
134
|
+
ordered_results[i] = ex
|
|
135
|
+
else:
|
|
136
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
137
|
+
futures = [
|
|
138
|
+
executor.submit(_worker, idx, error, difficulty, requirement.exercise_constraints(difficulty), requirement.exercise_extra_details())
|
|
139
|
+
for idx, (error, requirement, difficulty) in enumerate(requirements)
|
|
140
|
+
]
|
|
141
|
+
for fut in as_completed(futures):
|
|
142
|
+
idx, ex = fut.result()
|
|
143
|
+
ordered_results[idx] = ex
|
|
144
|
+
|
|
145
|
+
exercises: list[Exercise] = [ex for ex in ordered_results if ex is not None]
|
|
146
|
+
|
|
147
|
+
if len(exercises) < len(errors):
|
|
148
|
+
dav_tools.messages.warning(f'Finished generating exercises with some failures. Generated {len(exercises)} out of {len(errors)} requested.')
|
|
149
|
+
else:
|
|
150
|
+
dav_tools.messages.success(f'Successfully generated all {len(exercises)} exercises.')
|
|
151
|
+
|
|
152
|
+
return Assignment(
|
|
153
|
+
dataset=dataset,
|
|
154
|
+
exercises=exercises
|
|
155
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .dataset import Dataset
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
from collections.abc import Sequence
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
import dav_tools
|
|
4
|
+
import sqlglot
|
|
5
|
+
from sqlscope import Catalog, build_catalog_from_sql
|
|
6
|
+
|
|
7
|
+
from . import strings
|
|
8
|
+
from ...constraints.schema import SchemaConstraint
|
|
9
|
+
from ... import llm
|
|
10
|
+
from ...constraints import SchemaConstraint, schema as schema_constraints
|
|
11
|
+
from ...exceptions import SQLParsingError, ConstraintValidationError, DatasetGenerationError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Dataset:
|
|
16
|
+
'''A SQL dataset related to a specific domain, including schema creation and data insertion commands.'''
|
|
17
|
+
|
|
18
|
+
create_commands: list[str]
|
|
19
|
+
'''SQL commands to create the database schema.'''
|
|
20
|
+
|
|
21
|
+
insert_commands: list[str]
|
|
22
|
+
'''SQL commands to insert data into the database.'''
|
|
23
|
+
|
|
24
|
+
domain: str
|
|
25
|
+
'''The domain associated with the dataset.'''
|
|
26
|
+
|
|
27
|
+
_catalog_cache: Catalog | None = None
|
|
28
|
+
'''Cached SQLScope Catalog for the dataset.'''
|
|
29
|
+
|
|
30
|
+
_catalog_cache_commands_hash: int | None = None
|
|
31
|
+
'''Hash of the CREATE TABLE commands used to build the cached Catalog.'''
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def catalog(self) -> Catalog:
|
|
35
|
+
'''
|
|
36
|
+
Build and return a SQLScope Catalog from the dataset's SQL commands.
|
|
37
|
+
The result is cached for handling multiple accesses efficiently.
|
|
38
|
+
Cache is properly invalidated if the CREATE TABLE commands change.
|
|
39
|
+
'''
|
|
40
|
+
if self._catalog_cache is None or self._catalog_cache_commands_hash != hash(tuple(self.create_commands)):
|
|
41
|
+
full_sql = '\n'.join(self.create_commands)
|
|
42
|
+
self._catalog_cache = build_catalog_from_sql(full_sql)
|
|
43
|
+
self._catalog_cache_commands_hash = hash(tuple(self.create_commands))
|
|
44
|
+
|
|
45
|
+
return self._catalog_cache
|
|
46
|
+
|
|
47
|
+
def to_sql_no_context(self) -> str:
|
|
48
|
+
'''Generate the SQL commands to create and populate the dataset without schema context.'''
|
|
49
|
+
|
|
50
|
+
create_cmds = '\n'.join(self.create_commands)
|
|
51
|
+
insert_cmds = '\n'.join(self.insert_commands)
|
|
52
|
+
|
|
53
|
+
return f'''{create_cmds}\n\n{insert_cmds}'''
|
|
54
|
+
|
|
55
|
+
def to_sql(self, schema: str) -> str:
|
|
56
|
+
'''Generate the SQL commands to create and populate the dataset within the specified schema.'''
|
|
57
|
+
|
|
58
|
+
# Normalize schema name
|
|
59
|
+
schema = schema.lower().replace(' ', '_')
|
|
60
|
+
|
|
61
|
+
create_cmds = '\n\n'.join(self.create_commands)
|
|
62
|
+
insert_cmds = '\n\n'.join(self.insert_commands)
|
|
63
|
+
|
|
64
|
+
return strings.to_sql_format(schema=schema, create_cmds=create_cmds, insert_cmds=insert_cmds)
|
|
65
|
+
|
|
66
|
+
@staticmethod
|
|
67
|
+
def generate(domain: str,
|
|
68
|
+
constraints: Sequence[SchemaConstraint],
|
|
69
|
+
extra_details: list[str] = [],
|
|
70
|
+
*,
|
|
71
|
+
max_attempts: int = 5
|
|
72
|
+
) -> 'Dataset':
|
|
73
|
+
'''Generate a SQL dataset based on the specified parameters.'''
|
|
74
|
+
|
|
75
|
+
# merge similar constraints
|
|
76
|
+
constraints = schema_constraints.merge_constraints(constraints)
|
|
77
|
+
|
|
78
|
+
prompt_text = strings.prompt_generate(
|
|
79
|
+
domain=domain,
|
|
80
|
+
extra_details=extra_details,
|
|
81
|
+
constraints=constraints
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# query LLM to generate dataset
|
|
85
|
+
messages = llm.Message()
|
|
86
|
+
messages.add_message_user(prompt_text)
|
|
87
|
+
|
|
88
|
+
for attempt in range(max_attempts):
|
|
89
|
+
try:
|
|
90
|
+
answer = llm.generate_answer(messages, json_format=llm.models.Schema)
|
|
91
|
+
assert isinstance(answer, llm.models.Schema), "The response is not in the expected JSON format."
|
|
92
|
+
|
|
93
|
+
# parse CREATE TABLEs
|
|
94
|
+
parsed_tables = []
|
|
95
|
+
for create_table in answer.schema_tables:
|
|
96
|
+
try:
|
|
97
|
+
parsed = sqlglot.parse_one(create_table, read="postgres")
|
|
98
|
+
parsed_tables.append(parsed)
|
|
99
|
+
except Exception as e:
|
|
100
|
+
raise SQLParsingError(f"Syntax error in CREATE TABLE generated: {e}", create_table)
|
|
101
|
+
create_commands = [f'{cmd.sql(pretty=True, dialect="postgres")};' for cmd in parsed_tables]
|
|
102
|
+
|
|
103
|
+
# parse INSERT INTOs
|
|
104
|
+
parsed_inserts = []
|
|
105
|
+
for create_table in answer.insert_commands:
|
|
106
|
+
try:
|
|
107
|
+
parsed = sqlglot.parse_one(create_table, read="postgres")
|
|
108
|
+
parsed_inserts.append(parsed)
|
|
109
|
+
except Exception as e:
|
|
110
|
+
raise SQLParsingError(f"Syntax error in INSERT COMMANDS generated: {e}", create_table)
|
|
111
|
+
insert_commands = [f'{cmd.sql(pretty=True, dialect="postgres")};' for cmd in parsed_inserts]
|
|
112
|
+
|
|
113
|
+
catalog = build_catalog_from_sql('; '.join(cmd.sql() for cmd in parsed_tables))
|
|
114
|
+
|
|
115
|
+
# check if constraints are satisfied
|
|
116
|
+
errors = []
|
|
117
|
+
for constraint in constraints:
|
|
118
|
+
try:
|
|
119
|
+
constraint.validate(catalog, parsed_tables, parsed_inserts)
|
|
120
|
+
except ConstraintValidationError as e:
|
|
121
|
+
errors.append(str(e))
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
# no errors, return dataset
|
|
125
|
+
if not errors:
|
|
126
|
+
result = Dataset(
|
|
127
|
+
create_commands=create_commands,
|
|
128
|
+
insert_commands=insert_commands,
|
|
129
|
+
domain=domain
|
|
130
|
+
)
|
|
131
|
+
# fill cache, since we already have the catalog
|
|
132
|
+
result._catalog_cache = catalog
|
|
133
|
+
result._catalog_cache_commands_hash = hash(tuple(create_commands))
|
|
134
|
+
|
|
135
|
+
return result
|
|
136
|
+
|
|
137
|
+
dav_tools.messages.error(f'Validation failed for attempt {attempt + 1}. Missing requirements: {", ".join(errors)}')
|
|
138
|
+
|
|
139
|
+
messages.add_message_user(strings.feedback_constraint_violations(errors))
|
|
140
|
+
|
|
141
|
+
except SQLParsingError as e:
|
|
142
|
+
dav_tools.messages.error(f"Error during generation (Attempt {attempt + 1}): {e}")
|
|
143
|
+
messages.add_message_user(f"SQL code is not syntactically valid: {str(e)}. Please regenerate valid SQL.")
|
|
144
|
+
|
|
145
|
+
raise DatasetGenerationError(f'Failed to generate a valid dataset after {max_attempts} attempts.')
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from typing import Sequence
|
|
2
|
+
from ...constraints import SchemaConstraint
|
|
3
|
+
|
|
4
|
+
def to_sql_format(schema: str, create_cmds: str, insert_cmds: str) -> str:
|
|
5
|
+
return f'''BEGIN;
|
|
6
|
+
|
|
7
|
+
DROP SCHEMA IF EXISTS {schema} CASCADE;
|
|
8
|
+
CREATE SCHEMA {schema};
|
|
9
|
+
SET search_path TO {schema};
|
|
10
|
+
|
|
11
|
+
{create_cmds}
|
|
12
|
+
|
|
13
|
+
{insert_cmds}
|
|
14
|
+
|
|
15
|
+
COMMIT;'''
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def prompt_generate(domain: str, extra_details: list[str], constraints: Sequence[SchemaConstraint]) -> str:
|
|
19
|
+
formatted_constraints = '\n'.join(f'- {c.description}' for c in constraints)
|
|
20
|
+
|
|
21
|
+
# remove empty extra details
|
|
22
|
+
extra_details = [detail for detail in extra_details if detail.strip() != '']
|
|
23
|
+
# dataset characteristics str
|
|
24
|
+
if len(extra_details) > 0:
|
|
25
|
+
extra_details_str = "The dataset must have the following characteristics:\n"
|
|
26
|
+
for detail in extra_details:
|
|
27
|
+
extra_details_str += f"- {detail}\n"
|
|
28
|
+
else:
|
|
29
|
+
extra_details_str = ''
|
|
30
|
+
|
|
31
|
+
return f'''
|
|
32
|
+
Generate a SQL dataset about the following domain: "{domain}".
|
|
33
|
+
{extra_details_str}
|
|
34
|
+
|
|
35
|
+
MANDATORY CONSTRAINTS:
|
|
36
|
+
- FOREIGN KEY attributes should have the REFERENCES keyword inline (e.g. "col TYPE REFERENCES table_name(column_name)").
|
|
37
|
+
{formatted_constraints}
|
|
38
|
+
|
|
39
|
+
MANDATORY OUTPUT (JSON) - each line in both lists must correspond to a single table:
|
|
40
|
+
{{
|
|
41
|
+
"schema_tables": [
|
|
42
|
+
"CREATE TABLE t1(...);",
|
|
43
|
+
"CREATE TABLE t2(...);"
|
|
44
|
+
],
|
|
45
|
+
"insert_commands": [
|
|
46
|
+
"INSERT INTO t1(...) VALUES(val_1, val_2, ...), (...), (val_n, val_n+1, ...);",
|
|
47
|
+
"INSERT INTO t2(...) VALUES(val_1, val_2, ...), (...), (val_n, val_n+1, ...);"
|
|
48
|
+
]
|
|
49
|
+
}}
|
|
50
|
+
|
|
51
|
+
INSERT INTO statements must have following format (Multi-row insert):
|
|
52
|
+
INSERT INTO tableName(<all columns except SERIAL/AUTO_INCREMENT>) VALUES
|
|
53
|
+
(val_1, val_2, ...),
|
|
54
|
+
(val_n, val_n+1, ...);
|
|
55
|
+
|
|
56
|
+
For each table, insert at least 5 rows of data.
|
|
57
|
+
Skip any SERIAL/AUTO_INCREMENT columns in the INSERT statements.
|
|
58
|
+
'''
|
|
59
|
+
|
|
60
|
+
def feedback_constraint_violations(errors: list[str]) -> str:
|
|
61
|
+
return (
|
|
62
|
+
f"The previous JSON output was rejected because the SQL violated these constraints: {', '.join(errors)}\n"
|
|
63
|
+
"Regenerate the JSON correcting the SQL to satisfy all mandatory constraints."
|
|
64
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .exercise import Exercise
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from sql_error_taxonomy import SqlErrors
|
|
3
|
+
from sqlscope import Query
|
|
4
|
+
import dav_tools
|
|
5
|
+
|
|
6
|
+
from . import strings
|
|
7
|
+
from ..dataset import Dataset
|
|
8
|
+
from ...constraints import QueryConstraint
|
|
9
|
+
from ...difficulty_level import DifficultyLevel
|
|
10
|
+
from ... import llm
|
|
11
|
+
from ...exceptions import ExerciseGenerationError, SQLParsingError, ConstraintValidationError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Exercise:
|
|
16
|
+
'''A SQL exercise consisting of a title, request, and solutions.'''
|
|
17
|
+
|
|
18
|
+
title: str
|
|
19
|
+
'''The title of the exercise.'''
|
|
20
|
+
|
|
21
|
+
request: str
|
|
22
|
+
'''The natural language request or question for the exercise.'''
|
|
23
|
+
|
|
24
|
+
solutions: list[Query]
|
|
25
|
+
'''The list of SQL query solutions for the exercise.'''
|
|
26
|
+
|
|
27
|
+
difficulty: DifficultyLevel
|
|
28
|
+
'''The difficulty level of the exercise.'''
|
|
29
|
+
|
|
30
|
+
error: SqlErrors
|
|
31
|
+
'''The SQL error type associated with the exercise.'''
|
|
32
|
+
|
|
33
|
+
@staticmethod
|
|
34
|
+
def generate(
|
|
35
|
+
error: SqlErrors,
|
|
36
|
+
difficulty: DifficultyLevel,
|
|
37
|
+
constraints: list[QueryConstraint],
|
|
38
|
+
extra_details: str,
|
|
39
|
+
dataset: Dataset,
|
|
40
|
+
title: str,
|
|
41
|
+
*,
|
|
42
|
+
max_attempts: int = 3,
|
|
43
|
+
) -> 'Exercise':
|
|
44
|
+
'''Generate a SQL exercise based on the specified parameters.'''
|
|
45
|
+
|
|
46
|
+
messages = llm.Message()
|
|
47
|
+
messages.add_message_user(strings.prompt_generate(
|
|
48
|
+
dataset_str=dataset.to_sql_no_context(),
|
|
49
|
+
extra_details=extra_details,
|
|
50
|
+
constraints=constraints
|
|
51
|
+
))
|
|
52
|
+
|
|
53
|
+
for attempt in range(max_attempts):
|
|
54
|
+
try:
|
|
55
|
+
answer = llm.generate_answer(messages, json_format=llm.models.Assignment)
|
|
56
|
+
assert isinstance(answer, llm.models.Assignment)
|
|
57
|
+
|
|
58
|
+
# check syntax correctness of solution
|
|
59
|
+
try:
|
|
60
|
+
query = Query(answer.solution, catalog=dataset.catalog)
|
|
61
|
+
except Exception as e:
|
|
62
|
+
raise SQLParsingError(f"Generated SQL solution contains syntax errors: {e}", answer.solution)
|
|
63
|
+
|
|
64
|
+
# constraint validation
|
|
65
|
+
constraint_errors = []
|
|
66
|
+
|
|
67
|
+
for constraint in constraints:
|
|
68
|
+
try:
|
|
69
|
+
constraint.validate(query)
|
|
70
|
+
except ConstraintValidationError:
|
|
71
|
+
constraint_errors.append(constraint.description)
|
|
72
|
+
|
|
73
|
+
if constraint_errors:
|
|
74
|
+
dav_tools.messages.error(f'Validation failed for attempt {attempt + 1} (error: {error.name}). Missing requirements: {", ".join(constraint_errors)}')
|
|
75
|
+
messages.add_message_user(strings.feedback_validation_errors(constraint_errors))
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
# refine natural language request to remove hints
|
|
79
|
+
messages_refinement = llm.Message()
|
|
80
|
+
messages_refinement.add_message_user(strings.prompt_refine_request(answer.request, query))
|
|
81
|
+
answer_refinement = llm.generate_answer(
|
|
82
|
+
messages_refinement,
|
|
83
|
+
json_format=llm.models.RemoveHints
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
assert isinstance(answer_refinement, llm.models.RemoveHints)
|
|
87
|
+
# dav_tools.messages.debug(f"Old Request: {answer.request}")
|
|
88
|
+
# dav_tools.messages.debug(f"Refined Request: {answer_refinement.request_without_hints}")
|
|
89
|
+
answer.request = answer_refinement.request_without_hints
|
|
90
|
+
|
|
91
|
+
return Exercise(
|
|
92
|
+
title=title,
|
|
93
|
+
request=answer.request,
|
|
94
|
+
solutions=[query],
|
|
95
|
+
difficulty=difficulty,
|
|
96
|
+
error=error
|
|
97
|
+
)
|
|
98
|
+
except Exception as e:
|
|
99
|
+
dav_tools.messages.error(f"Error during exercise generation (Attempt {attempt + 1}): {e}")
|
|
100
|
+
messages.add_message_user(f"An error occurred: {str(e)}. Please regenerate valid JSON/SQL.")
|
|
101
|
+
|
|
102
|
+
raise ExerciseGenerationError(f'Failed to generate a valid exercise for {error.name} after {max_attempts} attempts.')
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
from ...constraints import QueryConstraint
|
|
2
|
+
from sqlscope import Query
|
|
3
|
+
|
|
4
|
+
def prompt_generate(dataset_str: str, extra_details: str, constraints: list[QueryConstraint]) -> str:
|
|
5
|
+
|
|
6
|
+
formatted_constraints = '\n'.join(f'- {constraint.description}' for constraint in constraints)
|
|
7
|
+
|
|
8
|
+
if extra_details.strip():
|
|
9
|
+
extra_details_formatted = f"The exercise must have the following characteristics:\n{extra_details}"
|
|
10
|
+
else:
|
|
11
|
+
extra_details_formatted = ""
|
|
12
|
+
|
|
13
|
+
return f'''
|
|
14
|
+
### CONTEXT (DATABASE SCHEMA AND DATA) ###
|
|
15
|
+
{dataset_str}
|
|
16
|
+
|
|
17
|
+
### GUIDELINES ###
|
|
18
|
+
Generate a SQL exercise based on the dataset above.
|
|
19
|
+
{extra_details_formatted}
|
|
20
|
+
|
|
21
|
+
### MANDATORY REQUIREMENTS FOR THE EXERCISE ###
|
|
22
|
+
{formatted_constraints}
|
|
23
|
+
|
|
24
|
+
#### JSON REQUIRED OUTPUT FORMAT ####
|
|
25
|
+
{{
|
|
26
|
+
"request": "Extract and return ONLY the natural language query request, following the specified constraints. Never ask to include mistakes. Be concise and clear. Do not provide hints or explanations.",
|
|
27
|
+
"solution": "Only a single syntactically and semantically correct (i.e. executable with minimum 1 returned row) SQL query that solves the exercise."
|
|
28
|
+
}}
|
|
29
|
+
'''
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def feedback_validation_errors(errors: list[str]) -> str:
|
|
33
|
+
return (
|
|
34
|
+
f"The previous JSON output was rejected because it violated these constraints: {', '.join(errors)}\n"
|
|
35
|
+
"Regenerate the JSON to satisfy all constraints."
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def prompt_refine_request(request: str, query: Query) -> str:
|
|
40
|
+
result = f'''For the following query solution:
|
|
41
|
+
--- SOLUTION START ---
|
|
42
|
+
{query.sql}
|
|
43
|
+
--- SOLUTION END ---
|
|
44
|
+
|
|
45
|
+
Reword the natural language request to remove any kind of hints on how to write it.
|
|
46
|
+
Keep the condition purely at the problem level, not the SQL level.
|
|
47
|
+
Keep it realistic, simple and straightforward. It doesn't have to sound like a school exercise, but like a real-world request.
|
|
48
|
+
Do not use generic phrases like "a certain amount"; instead specify exact terms.
|
|
49
|
+
Avoid mentioning tables explicitly. Remove any reference to joins or join keys.
|
|
50
|
+
Do not use any formatting on the answer.
|
|
51
|
+
'''
|
|
52
|
+
|
|
53
|
+
# Aliases
|
|
54
|
+
result += 'Make it clear which columns should be selected. If any columns are aliased in the solution, make sure to reflect that in the request, otherwise students might be confused.'
|
|
55
|
+
aliases: list[tuple[str, str]] = []
|
|
56
|
+
for col in query.main_query.output.columns:
|
|
57
|
+
if col.name != col.real_name:
|
|
58
|
+
aliases.append((col.real_name, col.name))
|
|
59
|
+
|
|
60
|
+
if aliases:
|
|
61
|
+
aliases_str = ', '.join([f'"{alias}"' for real_name, alias in aliases])
|
|
62
|
+
result += f"\nIn particular, you must specify the need to use the following aliases: {aliases_str}."
|
|
63
|
+
|
|
64
|
+
result += f'''
|
|
65
|
+
|
|
66
|
+
Natural Language Request:
|
|
67
|
+
--- REQUEST START ---
|
|
68
|
+
{request}
|
|
69
|
+
--- REQUEST END ---
|
|
70
|
+
'''
|
|
71
|
+
|
|
72
|
+
return result
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from sqlscope import Query
|
|
3
|
+
from sqlglot import exp
|
|
4
|
+
|
|
5
|
+
class BaseConstraint(ABC):
|
|
6
|
+
'''Abstract base class for SQL query constraints.'''
|
|
7
|
+
|
|
8
|
+
@property
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def description(self) -> str:
|
|
11
|
+
'''Textual description of the constraint, to be used in prompts.'''
|
|
12
|
+
pass
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
class WhereConstraintType(Enum):
|
|
4
|
+
# CLASSIC = "WHERE conditions"
|
|
5
|
+
# STRING = "WHERE STRING conditions"
|
|
6
|
+
# EMPTY = "WHERE EMPTY conditions"
|
|
7
|
+
# NULL = "WHERE NULL conditions"
|
|
8
|
+
# NOT_NULL = "WHERE NOT NULL conditions"
|
|
9
|
+
# MULTIPLE = "MULTIPLE WHERE conditions"
|
|
10
|
+
NESTED = "NESTED WHERE conditions"
|
|
11
|
+
# WILDCARD = "WHERE conditions with WILDCARD that must have minimum 4 letters"
|
|
12
|
+
# NO_WILDCARD = "WHERE conditions without WILDCARD"
|
|
13
|
+
# EXIST = "EXIST in WHERE conditions"
|
|
14
|
+
# NOT_EXIST = "NOT EXIST in WHERE conditions"
|
|
15
|
+
# EXIST_OR_IN = "EXIST and NOT EXIST or IN and NOT IN into WHERE conditions"
|
|
16
|
+
# NOT = "NOT in WHERE conditions"
|
|
17
|
+
# COMPARISON_OPERATORS = "COMPARISON OPERATORS in WHERE conditions"
|
|
18
|
+
ANY_ALL_IN = "ANY or ALL or IN in WHERE conditions"
|
|
19
|
+
# HAVING = "WHERE or HAVING conditions"
|