sql-assignment-generator 0.0.9__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. sql_assignment_generator-0.0.11/.gitattributes +1 -0
  2. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/Makefile +4 -1
  3. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/PKG-INFO +6 -3
  4. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/pyproject.toml +6 -3
  5. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/requirements.txt +3 -2
  6. sql_assignment_generator-0.0.11/src/sql_assignment_generator/__init__.py +155 -0
  7. sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/dataset/__init__.py +1 -0
  8. sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/dataset/dataset.py +145 -0
  9. sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/dataset/strings.py +64 -0
  10. sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/exercise/__init__.py +1 -0
  11. sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/exercise/exercise.py +102 -0
  12. sql_assignment_generator-0.0.11/src/sql_assignment_generator/assignments/exercise/strings.py +72 -0
  13. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/constraints/__init__.py +2 -0
  14. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/base.py +12 -0
  15. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/costraintType.py +19 -0
  16. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/__init__.py +4 -0
  17. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/aggregation.py +83 -0
  18. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/base.py +19 -0
  19. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_from.py +122 -0
  20. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_group_by.py +57 -0
  21. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_having.py +65 -0
  22. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_order_by.py +160 -0
  23. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_select.py +91 -0
  24. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/clause_where.py +798 -0
  25. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/rows.py +65 -0
  26. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/set_operations.py +109 -0
  27. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/query/subquery.py +98 -0
  28. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/schema/__init__.py +24 -0
  29. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/schema/base.py +28 -0
  30. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/schema/tables.py +205 -0
  31. sql_assignment_generator-0.0.11/src/sql_assignment_generator/constraints/schema/values.py +44 -0
  32. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/__init__.py +136 -0
  33. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/base.py +41 -0
  34. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_002.py +56 -0
  35. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_004.py +53 -0
  36. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_007.py +48 -0
  37. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_009.py +51 -0
  38. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_010.py +35 -0
  39. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_011.py +34 -0
  40. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_012.py +36 -0
  41. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_015.py +35 -0
  42. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_019.py +31 -0
  43. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_021.py +34 -0
  44. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_026.py +35 -0
  45. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_035.py +34 -0
  46. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_039.py +36 -0
  47. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_040.py +55 -0
  48. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_041.py +50 -0
  49. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_042.py +43 -0
  50. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_043.py +35 -0
  51. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_044.py +58 -0
  52. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_045.py +42 -0
  53. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_046.py +34 -0
  54. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_049.py +34 -0
  55. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_052.py +34 -0
  56. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_053.py +34 -0
  57. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_054.py +37 -0
  58. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_055.py +36 -0
  59. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_057.py +31 -0
  60. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_058.py +33 -0
  61. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_059.py +33 -0
  62. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_060.py +40 -0
  63. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_062.py +33 -0
  64. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_063.py +39 -0
  65. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_064.py +33 -0
  66. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_066.py +31 -0
  67. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_067.py +31 -0
  68. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_068.py +32 -0
  69. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_069.py +33 -0
  70. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_070.py +34 -0
  71. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_071.py +36 -0
  72. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_072.py +40 -0
  73. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_073.py +39 -0
  74. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_074.py +37 -0
  75. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_075.py +42 -0
  76. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_076.py +35 -0
  77. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_077.py +37 -0
  78. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_078.py +46 -0
  79. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_079.py +44 -0
  80. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_080.py +32 -0
  81. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_081.py +31 -0
  82. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_083.py +36 -0
  83. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_084.py +40 -0
  84. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_086.py +33 -0
  85. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_088.py +35 -0
  86. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_089.py +39 -0
  87. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_091.py +32 -0
  88. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_093.py +35 -0
  89. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_095.py +44 -0
  90. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_096.py +34 -0
  91. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_097.py +34 -0
  92. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_098.py +37 -0
  93. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_099.py +33 -0
  94. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_102.py +34 -0
  95. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_104.py +34 -0
  96. sql_assignment_generator-0.0.11/src/sql_assignment_generator/error_requirements/err_105.py +36 -0
  97. sql_assignment_generator-0.0.11/src/sql_assignment_generator/exceptions.py +28 -0
  98. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/test.py +7 -11
  99. sql_assignment_generator-0.0.11/tests/constraints/query/test_aggregation.py +68 -0
  100. sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_from.py +150 -0
  101. sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_group_by.py +74 -0
  102. sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_having.py +70 -0
  103. sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_order_by.py +141 -0
  104. sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_select.py +112 -0
  105. sql_assignment_generator-0.0.11/tests/constraints/query/test_clause_where.py +543 -0
  106. sql_assignment_generator-0.0.11/tests/constraints/query/test_rows.py +102 -0
  107. sql_assignment_generator-0.0.11/tests/constraints/query/test_set_operation.py +108 -0
  108. sql_assignment_generator-0.0.11/tests/constraints/query/test_subquery.py +136 -0
  109. sql_assignment_generator-0.0.11/tests/constraints/schema/__init__.py +24 -0
  110. sql_assignment_generator-0.0.11/tests/constraints/schema/test_tables.py +154 -0
  111. sql_assignment_generator-0.0.11/tests/constraints/schema/test_values.py +78 -0
  112. sql_assignment_generator-0.0.9/src/sql_assignment_generator/__init__.py +0 -45
  113. sql_assignment_generator-0.0.9/src/sql_assignment_generator/assignments/dataset.py +0 -177
  114. sql_assignment_generator-0.0.9/src/sql_assignment_generator/assignments/exercise.py +0 -133
  115. sql_assignment_generator-0.0.9/src/sql_assignment_generator/constraints/base.py +0 -26
  116. sql_assignment_generator-0.0.9/src/sql_assignment_generator/constraints/query.py +0 -678
  117. sql_assignment_generator-0.0.9/src/sql_assignment_generator/constraints/schema.py +0 -148
  118. sql_assignment_generator-0.0.9/src/sql_assignment_generator/query_sintax.py +0 -572
  119. sql_assignment_generator-0.0.9/src/sql_assignment_generator/sql_errors_details.py +0 -1404
  120. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/.env.template +0 -0
  121. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/.gitignore +0 -0
  122. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/.readthedocs.yaml +0 -0
  123. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/LICENSE +0 -0
  124. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/README.md +0 -0
  125. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/Makefile +0 -0
  126. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/conf.py +0 -0
  127. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/index.rst +0 -0
  128. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/make.bat +0 -0
  129. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/docs/requirements.txt +0 -0
  130. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/assignments/__init__.py +0 -0
  131. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/assignments/assignment.py +0 -0
  132. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/difficulty_level.py +0 -0
  133. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/domains.py +0 -0
  134. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/llm/__init__.py +0 -0
  135. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/llm/chatgpt.py +0 -0
  136. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/llm/message.py +0 -0
  137. {sql_assignment_generator-0.0.9 → sql_assignment_generator-0.0.11}/src/sql_assignment_generator/llm/models.py +0 -0
@@ -0,0 +1 @@
1
+ * text eol=lf
@@ -36,10 +36,13 @@ build: $(VENV) .env
36
36
  uninstall: $(VENV)
37
37
  $(VENV_BIN)/python -m pip uninstall -y $(NAME)
38
38
 
39
+ test: install
40
+ $(VENV_BIN)/python -m pytest
41
+
39
42
  documentation:
40
43
  make html SPHINXBUILD="../$(VENV_BIN)/sphinx-build" -C docs/
41
44
 
42
- upload: build documentation
45
+ upload: test documentation
43
46
  $(VENV_BIN)/python -m pip install --upgrade twine
44
47
  $(VENV_BIN)/python -m twine upload --verbose dist/*
45
48
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql_assignment_generator
3
- Version: 0.0.9
3
+ Version: 0.0.11
4
4
  Summary: This project generates SQL assignments based on common mistakes made by learners.
5
5
  Project-URL: Repository, https://github.com/DavidePonzini/sql_assignment_generator
6
6
  Project-URL: Documentation, https://sql-assignment-generator.readthedocs.io/en/latest/index.html
@@ -11,9 +11,12 @@ Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Operating System :: OS Independent
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Requires-Python: >=3.11
14
- Requires-Dist: dotenv
14
+ Requires-Dist: openai
15
15
  Requires-Dist: pydantic
16
- Requires-Dist: sql-error-categorizer>=0.1.10
16
+ Requires-Dist: sql-error-taxonomy>=1.0.2
17
+ Requires-Dist: sqlglot>=11.5.6
18
+ Requires-Dist: sqlscope
19
+ Requires-Dist: sqlscope>=0.3.5
17
20
  Description-Content-Type: text/markdown
18
21
 
19
22
  # sql-assignment-generation
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sql_assignment_generator"
7
- version = "0.0.9"
7
+ version = "0.0.11"
8
8
  authors = [
9
9
  { name="Davide Ponzini", email="davide.ponzini95@gmail.com" },
10
10
  ]
@@ -18,8 +18,11 @@ classifiers = [
18
18
  ]
19
19
  dependencies = [
20
20
  "pydantic",
21
- "sql_error_categorizer>=0.1.10",
22
- "dotenv",
21
+ "sqlglot>=11.5.6",
22
+ "sqlscope>=0.3.5",
23
+ "sql-error-taxonomy>=1.0.2",
24
+ "sqlscope",
25
+ "openai",
23
26
  ]
24
27
 
25
28
  [project.urls]
@@ -1,7 +1,9 @@
1
1
  dav_tools>=0.4.22
2
2
  # progress>=1.6
3
3
  pydantic>=2.10.4
4
- sql-error-categorizer>=0.1.10
4
+ sql-error-taxonomy>=1.0.2
5
+ sqlscope>=1.0.7
6
+ sqlglot
5
7
  dotenv
6
8
  openai
7
9
 
@@ -12,5 +14,4 @@ ipython
12
14
  build
13
15
  autoapi
14
16
  pytest
15
- pytest-cov
16
17
  sphinx-autoapi
@@ -0,0 +1,155 @@
1
+ '''Generate SQL assignments based on specified SQL errors and difficulty levels.'''
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Callable
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ import threading
8
+ import random
9
+
10
+ from .difficulty_level import DifficultyLevel
11
+ from .domains import random_domain
12
+ from .assignments import Assignment, Dataset, Exercise
13
+ from .constraints import SchemaConstraint, QueryConstraint
14
+ from .error_requirements import SqlErrorRequirements, ERROR_REQUIREMENTS_MAP
15
+ from .exceptions import ExerciseGenerationError
16
+
17
+ import dav_tools
18
+ from sql_error_taxonomy import SqlErrors
19
+
20
+
21
+ def generate_assignment(
22
+ errors: list[tuple[SqlErrors, DifficultyLevel]],
23
+ domain: str | None = None,
24
+ *,
25
+ shuffle_exercises: bool = False,
26
+ naming_func: Callable[[SqlErrors, DifficultyLevel], str] = lambda error, difficulty: f'{error.name} - {difficulty.name}',
27
+ max_unique_attempts: int = 3,
28
+ max_workers: int | None = None
29
+ ) -> Assignment:
30
+ '''
31
+ Generate SQL assignments based on the given SQL errors and their corresponding difficulty levels.
32
+
33
+ - Exercises are returned in the same order as the input `errors`.
34
+ - Logging happens as soon as possible (during generation), and each message uses the exercise title as its id.
35
+ - Deduplication is global across all generated exercises (thread-safe).
36
+
37
+ Args:
38
+ errors (list[tuple[SqlErrors, DifficultyLevel]]): A list of (error, difficulty) pairs.
39
+ domain (str | None): The domain for the assignments. If None, a random domain will be selected.
40
+ shuffle_exercises (bool): Whether to shuffle exercises to prevent ordering bias (shuffles input order).
41
+ naming_func (Callable[[SqlErrors, DifficultyLevel], str]): Generates exercise titles.
42
+ max_unique_attempts (int): Maximum retries to avoid duplicate solutions per (error, difficulty).
43
+ max_workers (int | None): Thread pool size. If None, uses ThreadPoolExecutor default.
44
+
45
+ Returns:
46
+ Assignment: The generated assignment (stable order).
47
+ '''
48
+
49
+ dav_tools.messages.info(f'Starting assignment generation for {len(errors)} exercises...')
50
+
51
+ if domain is None:
52
+ domain = random_domain()
53
+
54
+ if shuffle_exercises:
55
+ random.shuffle(errors)
56
+
57
+ # convert SqlErrors -> SqlErrorRequirements, keeping difficulty levels
58
+ requirements: list[tuple[SqlErrors, SqlErrorRequirements, DifficultyLevel]] = []
59
+ for error, difficulty in errors:
60
+ if error not in ERROR_REQUIREMENTS_MAP:
61
+ raise NotImplementedError(f'Error requirements not implemented for error: {error.name}')
62
+
63
+ requirements.append((error, ERROR_REQUIREMENTS_MAP[error], difficulty))
64
+
65
+ # initialize requirements and extra details
66
+ dataset_requirements: list[SchemaConstraint] = []
67
+ for _, req, difficulty in requirements:
68
+ dataset_requirements.extend(req.dataset_constraints(difficulty))
69
+
70
+ dataset_extra_details: list[str] = [
71
+ req.dataset_extra_details()
72
+ for _, req, _ in requirements
73
+ ]
74
+ dataset_extra_details = [detail for detail in dataset_extra_details if detail.strip()] # filter out empty details
75
+ dataset_extra_details = list(set(dataset_extra_details)) # deduplicate details
76
+
77
+ dav_tools.messages.info(f'Generating dataset for domain: {domain}')
78
+ dataset = Dataset.generate(domain, dataset_requirements, dataset_extra_details)
79
+
80
+ generated_solutions_hashes: set[str] = set()
81
+ hashes_lock = threading.Lock()
82
+
83
+ # Serialize log output to avoid interleaving (and to keep dav_tools usage thread-safe).
84
+ log_lock = threading.Lock()
85
+
86
+ def _worker(
87
+ idx: int,
88
+ error: SqlErrors,
89
+ difficulty: DifficultyLevel,
90
+ constraints: list[QueryConstraint],
91
+ extra_details: str
92
+ ) -> tuple[int, Exercise | None]:
93
+ title = naming_func(error, difficulty)
94
+
95
+ dav_tools.messages.info(f'Starting generation for exercise: {title}')
96
+
97
+ last_generated_exercise: Exercise | None = None
98
+
99
+ for attempt in range(max_unique_attempts):
100
+ try:
101
+ generated_exercise = Exercise.generate(error, difficulty, constraints, extra_details, dataset=dataset, title=title)
102
+ except ExerciseGenerationError:
103
+ with log_lock:
104
+ dav_tools.messages.warning(f'{title}: Skipping exercise generation for {error.name} due to validation failures.')
105
+ return (idx, None)
106
+
107
+ last_generated_exercise = generated_exercise
108
+ raw_solution = generated_exercise.solutions[0]
109
+ normalized_solution = raw_solution.sql.lower().strip()
110
+
111
+ with hashes_lock:
112
+ is_duplicate = normalized_solution in generated_solutions_hashes
113
+ if not is_duplicate:
114
+ generated_solutions_hashes.add(normalized_solution)
115
+
116
+ if is_duplicate:
117
+ with log_lock:
118
+ dav_tools.messages.warning(f'{title}: Duplicate solution detected for {error.name} (Attempt {attempt + 1}/{max_unique_attempts}). Regenerating...')
119
+ continue
120
+
121
+ return (idx, generated_exercise)
122
+
123
+ if last_generated_exercise is not None:
124
+ with log_lock:
125
+ dav_tools.messages.error(f'{title}: Could not generate a UNIQUE exercise for {error.name} after {max_unique_attempts} retries. Skipping.')
126
+ return (idx, None)
127
+
128
+ # Pre-allocate so we can preserve ordering no matter completion order.
129
+ ordered_results: list[Exercise | None] = [None] * len(errors)
130
+
131
+ if max_workers == 1:
132
+ for idx, (error, requirement, difficulty) in enumerate(requirements):
133
+ i, ex = _worker(idx, error, difficulty, requirement.exercise_constraints(difficulty), requirement.exercise_extra_details())
134
+ ordered_results[i] = ex
135
+ else:
136
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
137
+ futures = [
138
+ executor.submit(_worker, idx, error, difficulty, requirement.exercise_constraints(difficulty), requirement.exercise_extra_details())
139
+ for idx, (error, requirement, difficulty) in enumerate(requirements)
140
+ ]
141
+ for fut in as_completed(futures):
142
+ idx, ex = fut.result()
143
+ ordered_results[idx] = ex
144
+
145
+ exercises: list[Exercise] = [ex for ex in ordered_results if ex is not None]
146
+
147
+ if len(exercises) < len(errors):
148
+ dav_tools.messages.warning(f'Finished generating exercises with some failures. Generated {len(exercises)} out of {len(errors)} requested.')
149
+ else:
150
+ dav_tools.messages.success(f'Successfully generated all {len(exercises)} exercises.')
151
+
152
+ return Assignment(
153
+ dataset=dataset,
154
+ exercises=exercises
155
+ )
@@ -0,0 +1,145 @@
1
+ from collections.abc import Sequence
2
+ from dataclasses import dataclass
3
+ import dav_tools
4
+ import sqlglot
5
+ from sqlscope import Catalog, build_catalog_from_sql
6
+
7
+ from . import strings
8
+ from ...constraints.schema import SchemaConstraint
9
+ from ... import llm
10
+ from ...constraints import SchemaConstraint, schema as schema_constraints
11
+ from ...exceptions import SQLParsingError, ConstraintValidationError, DatasetGenerationError
12
+
13
+
14
+ @dataclass
15
+ class Dataset:
16
+ '''A SQL dataset related to a specific domain, including schema creation and data insertion commands.'''
17
+
18
+ create_commands: list[str]
19
+ '''SQL commands to create the database schema.'''
20
+
21
+ insert_commands: list[str]
22
+ '''SQL commands to insert data into the database.'''
23
+
24
+ domain: str
25
+ '''The domain associated with the dataset.'''
26
+
27
+ _catalog_cache: Catalog | None = None
28
+ '''Cached SQLScope Catalog for the dataset.'''
29
+
30
+ _catalog_cache_commands_hash: int | None = None
31
+ '''Hash of the CREATE TABLE commands used to build the cached Catalog.'''
32
+
33
+ @property
34
+ def catalog(self) -> Catalog:
35
+ '''
36
+ Build and return a SQLScope Catalog from the dataset's SQL commands.
37
+ The result is cached for handling multiple accesses efficiently.
38
+ Cache is properly invalidated if the CREATE TABLE commands change.
39
+ '''
40
+ if self._catalog_cache is None or self._catalog_cache_commands_hash != hash(tuple(self.create_commands)):
41
+ full_sql = '\n'.join(self.create_commands)
42
+ self._catalog_cache = build_catalog_from_sql(full_sql)
43
+ self._catalog_cache_commands_hash = hash(tuple(self.create_commands))
44
+
45
+ return self._catalog_cache
46
+
47
+ def to_sql_no_context(self) -> str:
48
+ '''Generate the SQL commands to create and populate the dataset without schema context.'''
49
+
50
+ create_cmds = '\n'.join(self.create_commands)
51
+ insert_cmds = '\n'.join(self.insert_commands)
52
+
53
+ return f'''{create_cmds}\n\n{insert_cmds}'''
54
+
55
+ def to_sql(self, schema: str) -> str:
56
+ '''Generate the SQL commands to create and populate the dataset within the specified schema.'''
57
+
58
+ # Normalize schema name
59
+ schema = schema.lower().replace(' ', '_')
60
+
61
+ create_cmds = '\n\n'.join(self.create_commands)
62
+ insert_cmds = '\n\n'.join(self.insert_commands)
63
+
64
+ return strings.to_sql_format(schema=schema, create_cmds=create_cmds, insert_cmds=insert_cmds)
65
+
66
+ @staticmethod
67
+ def generate(domain: str,
68
+ constraints: Sequence[SchemaConstraint],
69
+ extra_details: list[str] = [],
70
+ *,
71
+ max_attempts: int = 5
72
+ ) -> 'Dataset':
73
+ '''Generate a SQL dataset based on the specified parameters.'''
74
+
75
+ # merge similar constraints
76
+ constraints = schema_constraints.merge_constraints(constraints)
77
+
78
+ prompt_text = strings.prompt_generate(
79
+ domain=domain,
80
+ extra_details=extra_details,
81
+ constraints=constraints
82
+ )
83
+
84
+ # query LLM to generate dataset
85
+ messages = llm.Message()
86
+ messages.add_message_user(prompt_text)
87
+
88
+ for attempt in range(max_attempts):
89
+ try:
90
+ answer = llm.generate_answer(messages, json_format=llm.models.Schema)
91
+ assert isinstance(answer, llm.models.Schema), "The response is not in the expected JSON format."
92
+
93
+ # parse CREATE TABLEs
94
+ parsed_tables = []
95
+ for create_table in answer.schema_tables:
96
+ try:
97
+ parsed = sqlglot.parse_one(create_table, read="postgres")
98
+ parsed_tables.append(parsed)
99
+ except Exception as e:
100
+ raise SQLParsingError(f"Syntax error in CREATE TABLE generated: {e}", create_table)
101
+ create_commands = [f'{cmd.sql(pretty=True, dialect="postgres")};' for cmd in parsed_tables]
102
+
103
+ # parse INSERT INTOs
104
+ parsed_inserts = []
105
+ for create_table in answer.insert_commands:
106
+ try:
107
+ parsed = sqlglot.parse_one(create_table, read="postgres")
108
+ parsed_inserts.append(parsed)
109
+ except Exception as e:
110
+ raise SQLParsingError(f"Syntax error in INSERT COMMANDS generated: {e}", create_table)
111
+ insert_commands = [f'{cmd.sql(pretty=True, dialect="postgres")};' for cmd in parsed_inserts]
112
+
113
+ catalog = build_catalog_from_sql('; '.join(cmd.sql() for cmd in parsed_tables))
114
+
115
+ # check if constraints are satisfied
116
+ errors = []
117
+ for constraint in constraints:
118
+ try:
119
+ constraint.validate(catalog, parsed_tables, parsed_inserts)
120
+ except ConstraintValidationError as e:
121
+ errors.append(str(e))
122
+ continue
123
+
124
+ # no errors, return dataset
125
+ if not errors:
126
+ result = Dataset(
127
+ create_commands=create_commands,
128
+ insert_commands=insert_commands,
129
+ domain=domain
130
+ )
131
+ # fill cache, since we already have the catalog
132
+ result._catalog_cache = catalog
133
+ result._catalog_cache_commands_hash = hash(tuple(create_commands))
134
+
135
+ return result
136
+
137
+ dav_tools.messages.error(f'Validation failed for attempt {attempt + 1}. Missing requirements: {", ".join(errors)}')
138
+
139
+ messages.add_message_user(strings.feedback_constraint_violations(errors))
140
+
141
+ except SQLParsingError as e:
142
+ dav_tools.messages.error(f"Error during generation (Attempt {attempt + 1}): {e}")
143
+ messages.add_message_user(f"SQL code is not syntactically valid: {str(e)}. Please regenerate valid SQL.")
144
+
145
+ raise DatasetGenerationError(f'Failed to generate a valid dataset after {max_attempts} attempts.')
@@ -0,0 +1,64 @@
1
+ from typing import Sequence
2
+ from ...constraints import SchemaConstraint
3
+
4
+ def to_sql_format(schema: str, create_cmds: str, insert_cmds: str) -> str:
5
+ return f'''BEGIN;
6
+
7
+ DROP SCHEMA IF EXISTS {schema} CASCADE;
8
+ CREATE SCHEMA {schema};
9
+ SET search_path TO {schema};
10
+
11
+ {create_cmds}
12
+
13
+ {insert_cmds}
14
+
15
+ COMMIT;'''
16
+
17
+
18
+ def prompt_generate(domain: str, extra_details: list[str], constraints: Sequence[SchemaConstraint]) -> str:
19
+ formatted_constraints = '\n'.join(f'- {c.description}' for c in constraints)
20
+
21
+ # remove empty extra details
22
+ extra_details = [detail for detail in extra_details if detail.strip() != '']
23
+ # dataset characteristics str
24
+ if len(extra_details) > 0:
25
+ extra_details_str = "The dataset must have the following characteristics:\n"
26
+ for detail in extra_details:
27
+ extra_details_str += f"- {detail}\n"
28
+ else:
29
+ extra_details_str = ''
30
+
31
+ return f'''
32
+ Generate a SQL dataset about the following domain: "{domain}".
33
+ {extra_details_str}
34
+
35
+ MANDATORY CONSTRAINTS:
36
+ - FOREIGN KEY attributes should have the REFERENCES keyword inline (e.g. "col TYPE REFERENCES table_name(column_name)").
37
+ {formatted_constraints}
38
+
39
+ MANDATORY OUTPUT (JSON) - each line in both lists must correspond to a single table:
40
+ {{
41
+ "schema_tables": [
42
+ "CREATE TABLE t1(...);",
43
+ "CREATE TABLE t2(...);"
44
+ ],
45
+ "insert_commands": [
46
+ "INSERT INTO t1(...) VALUES(val_1, val_2, ...), (...), (val_n, val_n+1, ...);",
47
+ "INSERT INTO t2(...) VALUES(val_1, val_2, ...), (...), (val_n, val_n+1, ...);"
48
+ ]
49
+ }}
50
+
51
+ INSERT INTO statements must have following format (Multi-row insert):
52
+ INSERT INTO tableName(<all columns except SERIAL/AUTO_INCREMENT>) VALUES
53
+ (val_1, val_2, ...),
54
+ (val_n, val_n+1, ...);
55
+
56
+ For each table, insert at least 5 rows of data.
57
+ Skip any SERIAL/AUTO_INCREMENT columns in the INSERT statements.
58
+ '''
59
+
60
+ def feedback_constraint_violations(errors: list[str]) -> str:
61
+ return (
62
+ f"The previous JSON output was rejected because the SQL violated these constraints: {', '.join(errors)}\n"
63
+ "Regenerate the JSON correcting the SQL to satisfy all mandatory constraints."
64
+ )
@@ -0,0 +1,102 @@
1
+ from dataclasses import dataclass
2
+ from sql_error_taxonomy import SqlErrors
3
+ from sqlscope import Query
4
+ import dav_tools
5
+
6
+ from . import strings
7
+ from ..dataset import Dataset
8
+ from ...constraints import QueryConstraint
9
+ from ...difficulty_level import DifficultyLevel
10
+ from ... import llm
11
+ from ...exceptions import ExerciseGenerationError, SQLParsingError, ConstraintValidationError
12
+
13
+
14
+ @dataclass
15
+ class Exercise:
16
+ '''A SQL exercise consisting of a title, request, and solutions.'''
17
+
18
+ title: str
19
+ '''The title of the exercise.'''
20
+
21
+ request: str
22
+ '''The natural language request or question for the exercise.'''
23
+
24
+ solutions: list[Query]
25
+ '''The list of SQL query solutions for the exercise.'''
26
+
27
+ difficulty: DifficultyLevel
28
+ '''The difficulty level of the exercise.'''
29
+
30
+ error: SqlErrors
31
+ '''The SQL error type associated with the exercise.'''
32
+
33
+ @staticmethod
34
+ def generate(
35
+ error: SqlErrors,
36
+ difficulty: DifficultyLevel,
37
+ constraints: list[QueryConstraint],
38
+ extra_details: str,
39
+ dataset: Dataset,
40
+ title: str,
41
+ *,
42
+ max_attempts: int = 3,
43
+ ) -> 'Exercise':
44
+ '''Generate a SQL exercise based on the specified parameters.'''
45
+
46
+ messages = llm.Message()
47
+ messages.add_message_user(strings.prompt_generate(
48
+ dataset_str=dataset.to_sql_no_context(),
49
+ extra_details=extra_details,
50
+ constraints=constraints
51
+ ))
52
+
53
+ for attempt in range(max_attempts):
54
+ try:
55
+ answer = llm.generate_answer(messages, json_format=llm.models.Assignment)
56
+ assert isinstance(answer, llm.models.Assignment)
57
+
58
+ # check syntax correctness of solution
59
+ try:
60
+ query = Query(answer.solution, catalog=dataset.catalog)
61
+ except Exception as e:
62
+ raise SQLParsingError(f"Generated SQL solution contains syntax errors: {e}", answer.solution)
63
+
64
+ # constraint validation
65
+ constraint_errors = []
66
+
67
+ for constraint in constraints:
68
+ try:
69
+ constraint.validate(query)
70
+ except ConstraintValidationError:
71
+ constraint_errors.append(constraint.description)
72
+
73
+ if constraint_errors:
74
+ dav_tools.messages.error(f'Validation failed for attempt {attempt + 1} (error: {error.name}). Missing requirements: {", ".join(constraint_errors)}')
75
+ messages.add_message_user(strings.feedback_validation_errors(constraint_errors))
76
+ continue
77
+
78
+ # refine natural language request to remove hints
79
+ messages_refinement = llm.Message()
80
+ messages_refinement.add_message_user(strings.prompt_refine_request(answer.request, query))
81
+ answer_refinement = llm.generate_answer(
82
+ messages_refinement,
83
+ json_format=llm.models.RemoveHints
84
+ )
85
+
86
+ assert isinstance(answer_refinement, llm.models.RemoveHints)
87
+ # dav_tools.messages.debug(f"Old Request: {answer.request}")
88
+ # dav_tools.messages.debug(f"Refined Request: {answer_refinement.request_without_hints}")
89
+ answer.request = answer_refinement.request_without_hints
90
+
91
+ return Exercise(
92
+ title=title,
93
+ request=answer.request,
94
+ solutions=[query],
95
+ difficulty=difficulty,
96
+ error=error
97
+ )
98
+ except Exception as e:
99
+ dav_tools.messages.error(f"Error during exercise generation (Attempt {attempt + 1}): {e}")
100
+ messages.add_message_user(f"An error occurred: {str(e)}. Please regenerate valid JSON/SQL.")
101
+
102
+ raise ExerciseGenerationError(f'Failed to generate a valid exercise for {error.name} after {max_attempts} attempts.')
@@ -0,0 +1,72 @@
1
+ from ...constraints import QueryConstraint
2
+ from sqlscope import Query
3
+
4
+ def prompt_generate(dataset_str: str, extra_details: str, constraints: list[QueryConstraint]) -> str:
5
+
6
+ formatted_constraints = '\n'.join(f'- {constraint.description}' for constraint in constraints)
7
+
8
+ if extra_details.strip():
9
+ extra_details_formatted = f"The exercise must have the following characteristics:\n{extra_details}"
10
+ else:
11
+ extra_details_formatted = ""
12
+
13
+ return f'''
14
+ ### CONTEXT (DATABASE SCHEMA AND DATA) ###
15
+ {dataset_str}
16
+
17
+ ### GUIDELINES ###
18
+ Generate a SQL exercise based on the dataset above.
19
+ {extra_details_formatted}
20
+
21
+ ### MANDATORY REQUIREMENTS FOR THE EXERCISE ###
22
+ {formatted_constraints}
23
+
24
+ #### JSON REQUIRED OUTPUT FORMAT ####
25
+ {{
26
+ "request": "Extract and return ONLY the natural language query request, following the specified constraints. Never ask to include mistakes. Be concise and clear. Do not provide hints or explanations.",
27
+ "solution": "Only a single syntactically and semantically correct (i.e. executable with minimum 1 returned row) SQL query that solves the exercise."
28
+ }}
29
+ '''
30
+
31
+
32
+ def feedback_validation_errors(errors: list[str]) -> str:
33
+ return (
34
+ f"The previous JSON output was rejected because it violated these constraints: {', '.join(errors)}\n"
35
+ "Regenerate the JSON to satisfy all constraints."
36
+ )
37
+
38
+
39
+ def prompt_refine_request(request: str, query: Query) -> str:
40
+ result = f'''For the following query solution:
41
+ --- SOLUTION START ---
42
+ {query.sql}
43
+ --- SOLUTION END ---
44
+
45
+ Reword the natural language request to remove any kind of hints on how to write it.
46
+ Keep the condition purely at the problem level, not the SQL level.
47
+ Keep it realistic, simple and straightforward. It doesn't have to sound like a school exercise, but like a real-world request.
48
+ Do not use generic phrases like "a certain amount"; instead specify exact terms.
49
+ Avoid mentioning tables explicitly. Remove any reference to joins or join keys.
50
+ Do not use any formatting on the answer.
51
+ '''
52
+
53
+ # Aliases
54
+ result += 'Make it clear which columns should be selected. If any columns are aliased in the solution, make sure to reflect that in the request, otherwise students might be confused.'
55
+ aliases: list[tuple[str, str]] = []
56
+ for col in query.main_query.output.columns:
57
+ if col.name != col.real_name:
58
+ aliases.append((col.real_name, col.name))
59
+
60
+ if aliases:
61
+ aliases_str = ', '.join([f'"{alias}"' for real_name, alias in aliases])
62
+ result += f"\nIn particular, you must specify the need to use the following aliases: {aliases_str}."
63
+
64
+ result += f'''
65
+
66
+ Natural Language Request:
67
+ --- REQUEST START ---
68
+ {request}
69
+ --- REQUEST END ---
70
+ '''
71
+
72
+ return result
@@ -1,5 +1,7 @@
1
1
  '''Constraints for assignment generation.'''
2
2
 
3
3
  from .base import BaseConstraint
4
+ from .query import QueryConstraint
5
+ from .schema import SchemaConstraint
4
6
  from . import query
5
7
  from . import schema
@@ -0,0 +1,12 @@
1
+ from abc import ABC, abstractmethod
2
+ from sqlscope import Query
3
+ from sqlglot import exp
4
+
5
+ class BaseConstraint(ABC):
6
+ '''Abstract base class for SQL query constraints.'''
7
+
8
+ @property
9
+ @abstractmethod
10
+ def description(self) -> str:
11
+ '''Textual description of the constraint, to be used in prompts.'''
12
+ pass
@@ -0,0 +1,19 @@
1
+ from enum import Enum
2
+
3
+ class WhereConstraintType(Enum):
4
+ # CLASSIC = "WHERE conditions"
5
+ # STRING = "WHERE STRING conditions"
6
+ # EMPTY = "WHERE EMPTY conditions"
7
+ # NULL = "WHERE NULL conditions"
8
+ # NOT_NULL = "WHERE NOT NULL conditions"
9
+ # MULTIPLE = "MULTIPLE WHERE conditions"
10
+ NESTED = "NESTED WHERE conditions"
11
+ # WILDCARD = "WHERE conditions with WILDCARD that must have minimum 4 letters"
12
+ # NO_WILDCARD = "WHERE conditions without WILDCARD"
13
+ # EXIST = "EXIST in WHERE conditions"
14
+ # NOT_EXIST = "NOT EXIST in WHERE conditions"
15
+ # EXIST_OR_IN = "EXIST and NOT EXIST or IN and NOT IN into WHERE conditions"
16
+ # NOT = "NOT in WHERE conditions"
17
+ # COMPARISON_OPERATORS = "COMPARISON OPERATORS in WHERE conditions"
18
+ ANY_ALL_IN = "ANY or ALL or IN in WHERE conditions"
19
+ # HAVING = "WHERE or HAVING conditions"
@@ -0,0 +1,4 @@
1
+ '''Constraints related to SQL queries.'''
2
+
3
+ from .base import QueryConstraint
4
+ from . import aggregation, clause_select, clause_from, clause_where, clause_group_by, clause_having, clause_order_by, rows, set_operations, subquery