sql-assignment-generator 0.0.13__tar.gz → 0.0.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/PKG-INFO +1 -1
  2. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/pyproject.toml +1 -1
  3. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/requirements.txt +1 -1
  4. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/__init__.py +74 -21
  5. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/assignments/dataset/dataset.py +197 -144
  6. sql_assignment_generator-0.0.15/src/sql_assignment_generator/assignments/dataset/strings.py +105 -0
  7. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/assignments/exercise/exercise.py +116 -101
  8. sql_assignment_generator-0.0.15/src/sql_assignment_generator/assignments/exercise/strings.py +132 -0
  9. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/constraints/base.py +2 -3
  10. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/aggregation.py +126 -0
  11. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/constraints/query/base.py +1 -1
  12. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/clause_from.py +190 -0
  13. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/clause_group_by.py +79 -0
  14. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/clause_having.py +87 -0
  15. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/clause_order_by.py +224 -0
  16. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/clause_select.py +127 -0
  17. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/clause_where.py +1020 -0
  18. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/rows.py +89 -0
  19. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/set_operations.py +150 -0
  20. sql_assignment_generator-0.0.15/src/sql_assignment_generator/constraints/query/subquery.py +153 -0
  21. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/constraints/schema/base.py +1 -0
  22. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/constraints/schema/tables.py +72 -22
  23. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/constraints/schema/values.py +18 -4
  24. sql_assignment_generator-0.0.15/src/sql_assignment_generator/domains.py +21 -0
  25. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/__init__.py +64 -64
  26. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/base.py +8 -4
  27. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_002.py +6 -2
  28. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/exceptions.py +9 -1
  29. sql_assignment_generator-0.0.15/src/sql_assignment_generator/translatable_text.py +53 -0
  30. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/test.py +60 -56
  31. sql_assignment_generator-0.0.13/src/sql_assignment_generator/assignments/dataset/strings.py +0 -64
  32. sql_assignment_generator-0.0.13/src/sql_assignment_generator/assignments/exercise/strings.py +0 -72
  33. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/costraintType.py +0 -19
  34. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/aggregation.py +0 -83
  35. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/clause_from.py +0 -122
  36. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/clause_group_by.py +0 -57
  37. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/clause_having.py +0 -65
  38. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/clause_order_by.py +0 -160
  39. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/clause_select.py +0 -91
  40. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/clause_where.py +0 -798
  41. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/rows.py +0 -65
  42. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/set_operations.py +0 -109
  43. sql_assignment_generator-0.0.13/src/sql_assignment_generator/constraints/query/subquery.py +0 -98
  44. sql_assignment_generator-0.0.13/src/sql_assignment_generator/domains.py +0 -20
  45. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/.env.template +0 -0
  46. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/.gitattributes +0 -0
  47. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/.gitignore +0 -0
  48. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/.readthedocs.yaml +0 -0
  49. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/LICENSE +0 -0
  50. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/Makefile +0 -0
  51. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/README.md +0 -0
  52. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/docs/Makefile +0 -0
  53. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/docs/conf.py +0 -0
  54. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/docs/index.rst +0 -0
  55. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/docs/make.bat +0 -0
  56. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/docs/requirements.txt +0 -0
  57. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/assignments/__init__.py +0 -0
  58. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/assignments/assignment.py +0 -0
  59. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/assignments/dataset/__init__.py +0 -0
  60. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/assignments/exercise/__init__.py +0 -0
  61. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/constraints/__init__.py +0 -0
  62. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/constraints/query/__init__.py +0 -0
  63. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/constraints/schema/__init__.py +0 -0
  64. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/difficulty_level.py +0 -0
  65. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_004.py +0 -0
  66. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_007.py +0 -0
  67. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_009.py +0 -0
  68. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_010.py +0 -0
  69. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_011.py +0 -0
  70. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_012.py +0 -0
  71. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_015.py +0 -0
  72. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_019.py +0 -0
  73. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_021.py +0 -0
  74. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_026.py +0 -0
  75. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_035.py +0 -0
  76. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_039.py +0 -0
  77. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_040.py +0 -0
  78. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_041.py +0 -0
  79. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_042.py +0 -0
  80. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_043.py +0 -0
  81. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_044.py +0 -0
  82. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_045.py +0 -0
  83. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_046.py +0 -0
  84. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_049.py +0 -0
  85. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_052.py +0 -0
  86. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_053.py +0 -0
  87. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_054.py +0 -0
  88. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_055.py +0 -0
  89. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_057.py +0 -0
  90. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_058.py +0 -0
  91. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_059.py +0 -0
  92. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_060.py +0 -0
  93. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_062.py +0 -0
  94. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_063.py +0 -0
  95. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_064.py +0 -0
  96. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_066.py +0 -0
  97. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_067.py +0 -0
  98. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_068.py +0 -0
  99. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_069.py +0 -0
  100. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_070.py +0 -0
  101. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_071.py +0 -0
  102. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_072.py +0 -0
  103. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_073.py +0 -0
  104. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_074.py +0 -0
  105. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_075.py +0 -0
  106. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_076.py +0 -0
  107. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_077.py +0 -0
  108. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_078.py +0 -0
  109. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_079.py +0 -0
  110. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_080.py +0 -0
  111. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_081.py +0 -0
  112. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_083.py +0 -0
  113. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_084.py +0 -0
  114. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_086.py +0 -0
  115. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_088.py +0 -0
  116. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_089.py +0 -0
  117. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_091.py +0 -0
  118. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_093.py +0 -0
  119. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_095.py +0 -0
  120. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_096.py +0 -0
  121. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_097.py +0 -0
  122. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_098.py +0 -0
  123. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_099.py +0 -0
  124. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_102.py +0 -0
  125. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_104.py +0 -0
  126. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/error_requirements/err_105.py +0 -0
  127. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/llm/__init__.py +0 -0
  128. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/llm/chatgpt.py +0 -0
  129. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/llm/message.py +0 -0
  130. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/src/sql_assignment_generator/llm/models.py +0 -0
  131. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_aggregation.py +0 -0
  132. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_clause_from.py +0 -0
  133. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_clause_group_by.py +0 -0
  134. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_clause_having.py +0 -0
  135. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_clause_order_by.py +0 -0
  136. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_clause_select.py +0 -0
  137. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_clause_where.py +0 -0
  138. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_rows.py +0 -0
  139. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_set_operation.py +0 -0
  140. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/query/test_subquery.py +0 -0
  141. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/schema/__init__.py +0 -0
  142. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/schema/test_tables.py +0 -0
  143. {sql_assignment_generator-0.0.13 → sql_assignment_generator-0.0.15}/tests/constraints/schema/test_values.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sql_assignment_generator
3
- Version: 0.0.13
3
+ Version: 0.0.15
4
4
  Summary: This project generates SQL assignments based on common mistakes made by learners.
5
5
  Project-URL: Repository, https://github.com/DavidePonzini/sql_assignment_generator
6
6
  Project-URL: Documentation, https://sql-assignment-generator.readthedocs.io/en/latest/index.html
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sql_assignment_generator"
7
- version = "0.0.13"
7
+ version = "0.0.15"
8
8
  authors = [
9
9
  { name="Davide Ponzini", email="davide.ponzini95@gmail.com" },
10
10
  ]
@@ -2,7 +2,7 @@ dav_tools>=0.4.22
2
2
  # progress>=1.6
3
3
  pydantic>=2.10.4
4
4
  sql-error-taxonomy>=1.0.2
5
- sqlscope>=1.0.7
5
+ sqlscope>=1.0.8
6
6
  sqlglot
7
7
  dotenv
8
8
  openai
@@ -20,10 +20,15 @@ from sql_error_taxonomy import SqlErrors
20
20
 
21
21
  def generate_assignment(
22
22
  errors: list[tuple[SqlErrors, DifficultyLevel]],
23
- domain: str | None = None,
23
+ sql_dialect: str = 'postgres',
24
24
  *,
25
+ language: str = 'en',
26
+ domain: str | None = None,
27
+ dataset_str: str | None = None,
25
28
  shuffle_exercises: bool = False,
26
29
  naming_func: Callable[[SqlErrors, DifficultyLevel], str] = lambda error, difficulty: f'{error.name} - {difficulty.name}',
30
+ max_dataset_attempts: int = 3,
31
+ max_exercise_attempts: int = 3,
27
32
  max_unique_attempts: int = 3,
28
33
  max_workers: int | None = None
29
34
  ) -> Assignment:
@@ -36,9 +41,14 @@ def generate_assignment(
36
41
 
37
42
  Args:
38
43
  errors (list[tuple[SqlErrors, DifficultyLevel]]): A list of (error, difficulty) pairs.
44
+ sql_dialect (str): The SQL dialect to use for generating the dataset and exercises (e.g., 'postgres', 'mysql').
39
45
  domain (str | None): The domain for the assignments. If None, a random domain will be selected.
46
+ language (str): The language for the assignment generation (e.g., 'en' for English).
47
+ dataset_str (str | None): Optional SQL string to use as the dataset. If provided, it will be used instead of generating a new dataset.
40
48
  shuffle_exercises (bool): Whether to shuffle exercises to prevent ordering bias (shuffles input order).
41
49
  naming_func (Callable[[SqlErrors, DifficultyLevel], str]): Generates exercise titles.
50
+ max_dataset_attempts (int): Maximum retries for generating a valid dataset before skipping.
51
+ max_exercise_attempts (int): Maximum retries for generating a valid exercise before skipping.
42
52
  max_unique_attempts (int): Maximum retries to avoid duplicate solutions per (error, difficulty).
43
53
  max_workers (int | None): Thread pool size. If None, uses ThreadPoolExecutor default.
44
54
 
@@ -57,9 +67,6 @@ def generate_assignment(
57
67
  if not supported_errors:
58
68
  raise ValueError('No supported errors provided for assignment generation.')
59
69
 
60
- if domain is None:
61
- domain = random_domain()
62
-
63
70
  if shuffle_exercises:
64
71
  random.shuffle(errors)
65
72
 
@@ -67,22 +74,45 @@ def generate_assignment(
67
74
  dav_tools.messages.info(f'Starting assignment generation for {len(supported_errors)} exercises (out of {len(errors)} requested)')
68
75
 
69
76
  # convert SqlErrors -> SqlErrorRequirements, keeping difficulty levels
70
- requirements: list[tuple[SqlErrors, SqlErrorRequirements, DifficultyLevel]] = [(error, ERROR_REQUIREMENTS_MAP[error], difficulty) for error, difficulty in supported_errors]
71
-
72
- # initialize requirements and extra details
73
- dataset_requirements: list[SchemaConstraint] = []
74
- for _, req, difficulty in requirements:
75
- dataset_requirements.extend(req.dataset_constraints(difficulty))
76
-
77
- dataset_extra_details: list[str] = [
78
- req.dataset_extra_details()
79
- for _, req, _ in requirements
77
+ requirements: list[tuple[SqlErrors, SqlErrorRequirements, DifficultyLevel]] = [
78
+ (
79
+ error,
80
+ ERROR_REQUIREMENTS_MAP[error](language=language),
81
+ difficulty
82
+ )
83
+ for error, difficulty in supported_errors
80
84
  ]
81
- dataset_extra_details = [detail for detail in dataset_extra_details if detail.strip()] # filter out empty details
82
- dataset_extra_details = list(set(dataset_extra_details)) # deduplicate details
83
85
 
84
- dav_tools.messages.info(f'Generating dataset for domain: {domain}')
85
- dataset = Dataset.generate(domain, dataset_requirements, dataset_extra_details)
86
+ if not dataset_str:
87
+ # No dataset string provided, so we need to generate a dataset based on the requirements of the exercises.
88
+ if domain is None:
89
+ domain = random_domain(language=language)
90
+
91
+ dataset_requirements: list[SchemaConstraint] = []
92
+ for _, req, difficulty in requirements:
93
+ dataset_requirements.extend(req.dataset_constraints(difficulty))
94
+
95
+ dataset_extra_details: list[str] = [
96
+ req.dataset_extra_details().get(language=language)
97
+ for _, req, _ in requirements
98
+ ]
99
+ dataset_extra_details = [detail for detail in dataset_extra_details if detail.strip()] # filter out empty details
100
+ dataset_extra_details = list(set(dataset_extra_details)) # deduplicate details
101
+
102
+ dav_tools.messages.info(f'Generating dataset for domain: {domain}')
103
+ dataset = Dataset.generate(
104
+ domain=domain,
105
+ sql_dialect=sql_dialect,
106
+ constraints=dataset_requirements,
107
+ extra_details=dataset_extra_details,
108
+ language=language,
109
+ max_attempts=max_dataset_attempts,
110
+ )
111
+ else:
112
+ dataset = Dataset.from_sql(
113
+ sql_str=dataset_str,
114
+ sql_dialect=sql_dialect
115
+ )
86
116
 
87
117
  generated_solutions_hashes: set[str] = set()
88
118
  hashes_lock = threading.Lock()
@@ -105,7 +135,17 @@ def generate_assignment(
105
135
 
106
136
  for attempt in range(max_unique_attempts):
107
137
  try:
108
- generated_exercise = Exercise.generate(error, difficulty, constraints, extra_details, dataset=dataset, title=title)
138
+ generated_exercise = Exercise.generate(
139
+ error=error,
140
+ difficulty=difficulty,
141
+ constraints=constraints,
142
+ extra_details=extra_details,
143
+ sql_dialect=sql_dialect,
144
+ dataset=dataset,
145
+ title=title,
146
+ max_attempts=max_exercise_attempts,
147
+ language=language
148
+ )
109
149
  except ExerciseGenerationError:
110
150
  with log_lock:
111
151
  dav_tools.messages.warning(f'{title}: Skipping exercise generation for {error.name} due to validation failures.')
@@ -137,12 +177,25 @@ def generate_assignment(
137
177
 
138
178
  if max_workers == 1:
139
179
  for idx, (error, requirement, difficulty) in enumerate(requirements):
140
- i, ex = _worker(idx, error, difficulty, requirement.exercise_constraints(difficulty), requirement.exercise_extra_details())
180
+ i, ex = _worker(
181
+ idx=idx,
182
+ error=error,
183
+ difficulty=difficulty,
184
+ constraints=requirement.exercise_constraints(difficulty),
185
+ extra_details=requirement.exercise_extra_details().get(language=language)
186
+ )
141
187
  ordered_results[i] = ex
142
188
  else:
143
189
  with ThreadPoolExecutor(max_workers=max_workers) as executor:
144
190
  futures = [
145
- executor.submit(_worker, idx, error, difficulty, requirement.exercise_constraints(difficulty), requirement.exercise_extra_details())
191
+ executor.submit(
192
+ _worker,
193
+ idx=idx,
194
+ error=error,
195
+ difficulty=difficulty,
196
+ constraints=requirement.exercise_constraints(difficulty),
197
+ extra_details=requirement.exercise_extra_details().get(language=language)
198
+ )
146
199
  for idx, (error, requirement, difficulty) in enumerate(requirements)
147
200
  ]
148
201
  for fut in as_completed(futures):
@@ -1,145 +1,198 @@
1
- from collections.abc import Sequence
2
- from dataclasses import dataclass
3
- import dav_tools
4
- import sqlglot
5
- from sqlscope import Catalog, build_catalog_from_sql
6
-
7
- from . import strings
8
- from ...constraints.schema import SchemaConstraint
9
- from ... import llm
10
- from ...constraints import SchemaConstraint, schema as schema_constraints
11
- from ...exceptions import SQLParsingError, ConstraintValidationError, DatasetGenerationError
12
-
13
-
14
- @dataclass
15
- class Dataset:
16
- '''A SQL dataset related to a specific domain, including schema creation and data insertion commands.'''
17
-
18
- create_commands: list[str]
19
- '''SQL commands to create the database schema.'''
20
-
21
- insert_commands: list[str]
22
- '''SQL commands to insert data into the database.'''
23
-
24
- domain: str
25
- '''The domain associated with the dataset.'''
26
-
27
- _catalog_cache: Catalog | None = None
28
- '''Cached SQLScope Catalog for the dataset.'''
29
-
30
- _catalog_cache_commands_hash: int | None = None
31
- '''Hash of the CREATE TABLE commands used to build the cached Catalog.'''
32
-
33
- @property
34
- def catalog(self) -> Catalog:
35
- '''
36
- Build and return a SQLScope Catalog from the dataset's SQL commands.
37
- The result is cached for handling multiple accesses efficiently.
38
- Cache is properly invalidated if the CREATE TABLE commands change.
39
- '''
40
- if self._catalog_cache is None or self._catalog_cache_commands_hash != hash(tuple(self.create_commands)):
41
- full_sql = '\n'.join(self.create_commands)
42
- self._catalog_cache = build_catalog_from_sql(full_sql)
43
- self._catalog_cache_commands_hash = hash(tuple(self.create_commands))
44
-
45
- return self._catalog_cache
46
-
47
- def to_sql_no_context(self) -> str:
48
- '''Generate the SQL commands to create and populate the dataset without schema context.'''
49
-
50
- create_cmds = '\n'.join(self.create_commands)
51
- insert_cmds = '\n'.join(self.insert_commands)
52
-
53
- return f'''{create_cmds}\n\n{insert_cmds}'''
54
-
55
- def to_sql(self, schema: str) -> str:
56
- '''Generate the SQL commands to create and populate the dataset within the specified schema.'''
57
-
58
- # Normalize schema name
59
- schema = schema.lower().replace(' ', '_')
60
-
61
- create_cmds = '\n\n'.join(self.create_commands)
62
- insert_cmds = '\n\n'.join(self.insert_commands)
63
-
64
- return strings.to_sql_format(schema=schema, create_cmds=create_cmds, insert_cmds=insert_cmds)
65
-
66
- @staticmethod
67
- def generate(domain: str,
68
- constraints: Sequence[SchemaConstraint],
69
- extra_details: list[str] = [],
70
- *,
71
- max_attempts: int = 5
72
- ) -> 'Dataset':
73
- '''Generate a SQL dataset based on the specified parameters.'''
74
-
75
- # merge similar constraints
76
- constraints = schema_constraints.merge_constraints(constraints)
77
-
78
- prompt_text = strings.prompt_generate(
79
- domain=domain,
80
- extra_details=extra_details,
81
- constraints=constraints
82
- )
83
-
84
- # query LLM to generate dataset
85
- messages = llm.Message()
86
- messages.add_message_user(prompt_text)
87
-
88
- for attempt in range(max_attempts):
89
- try:
90
- answer = llm.generate_answer(messages, json_format=llm.models.Schema)
91
- assert isinstance(answer, llm.models.Schema), "The response is not in the expected JSON format."
92
-
93
- # parse CREATE TABLEs
94
- parsed_tables = []
95
- for create_table in answer.schema_tables:
96
- try:
97
- parsed = sqlglot.parse_one(create_table, read="postgres")
98
- parsed_tables.append(parsed)
99
- except Exception as e:
100
- raise SQLParsingError(f"Syntax error in CREATE TABLE generated: {e}", create_table)
101
- create_commands = [f'{cmd.sql(pretty=True, dialect="postgres")};' for cmd in parsed_tables]
102
-
103
- # parse INSERT INTOs
104
- parsed_inserts = []
105
- for create_table in answer.insert_commands:
106
- try:
107
- parsed = sqlglot.parse_one(create_table, read="postgres")
108
- parsed_inserts.append(parsed)
109
- except Exception as e:
110
- raise SQLParsingError(f"Syntax error in INSERT COMMANDS generated: {e}", create_table)
111
- insert_commands = [f'{cmd.sql(pretty=True, dialect="postgres")};' for cmd in parsed_inserts]
112
-
113
- catalog = build_catalog_from_sql('; '.join(cmd.sql() for cmd in parsed_tables))
114
-
115
- # check if constraints are satisfied
116
- errors = []
117
- for constraint in constraints:
118
- try:
119
- constraint.validate(catalog, parsed_tables, parsed_inserts)
120
- except ConstraintValidationError as e:
121
- errors.append(str(e))
122
- continue
123
-
124
- # no errors, return dataset
125
- if not errors:
126
- result = Dataset(
127
- create_commands=create_commands,
128
- insert_commands=insert_commands,
129
- domain=domain
130
- )
131
- # fill cache, since we already have the catalog
132
- result._catalog_cache = catalog
133
- result._catalog_cache_commands_hash = hash(tuple(create_commands))
134
-
135
- return result
136
-
137
- dav_tools.messages.error(f'Validation failed for attempt {attempt + 1}. Missing requirements: {", ".join(errors)}')
138
-
139
- messages.add_message_user(strings.feedback_constraint_violations(errors))
140
-
141
- except SQLParsingError as e:
142
- dav_tools.messages.error(f"Error during generation (Attempt {attempt + 1}): {e}")
143
- messages.add_message_user(f"SQL code is not syntactically valid: {str(e)}. Please regenerate valid SQL.")
144
-
1
+ from collections.abc import Sequence
2
+ from dataclasses import dataclass
3
+ import dav_tools
4
+ import sqlglot
5
+ from sqlglot import exp
6
+ from sqlscope import Catalog, build_catalog_from_sql
7
+
8
+ from . import strings
9
+ from ...constraints.schema import SchemaConstraint
10
+ from ... import llm
11
+ from ...constraints import SchemaConstraint, schema as schema_constraints
12
+ from ...exceptions import SQLParsingError, ConstraintValidationError, DatasetGenerationError
13
+ from ...translatable_text import TranslatableText
14
+
15
+
16
+ @dataclass
17
+ class Dataset:
18
+ '''A SQL dataset related to a specific domain, including schema creation and data insertion commands.'''
19
+
20
+ create_commands: list[str]
21
+ '''SQL commands to create the database schema.'''
22
+
23
+ insert_commands: list[str]
24
+ '''SQL commands to insert data into the database.'''
25
+
26
+ domain: str
27
+ '''The domain associated with the dataset.'''
28
+
29
+ _catalog_cache: Catalog | None = None
30
+ '''Cached SQLScope Catalog for the dataset.'''
31
+
32
+ _catalog_cache_commands_hash: int | None = None
33
+ '''Hash of the CREATE TABLE commands used to build the cached Catalog.'''
34
+
35
+ @property
36
+ def catalog(self) -> Catalog:
37
+ '''
38
+ Build and return a SQLScope Catalog from the dataset's SQL commands.
39
+ The result is cached for handling multiple accesses efficiently.
40
+ Cache is properly invalidated if the CREATE TABLE commands change.
41
+ '''
42
+ if self._catalog_cache is None or self._catalog_cache_commands_hash != hash(tuple(self.create_commands)):
43
+ full_sql = '\n'.join(self.create_commands)
44
+ self._catalog_cache = build_catalog_from_sql(full_sql)
45
+ self._catalog_cache_commands_hash = hash(tuple(self.create_commands))
46
+
47
+ return self._catalog_cache
48
+
49
+ def to_sql_no_context(self) -> str:
50
+ '''Generate the SQL commands to create and populate the dataset without schema context.'''
51
+
52
+ create_cmds = '\n'.join(self.create_commands)
53
+ insert_cmds = '\n'.join(self.insert_commands)
54
+
55
+ return f'''{create_cmds}\n\n{insert_cmds}'''
56
+
57
+ def to_sql(self, schema: str) -> str:
58
+ '''Generate the SQL commands to create and populate the dataset within the specified schema.'''
59
+
60
+ # Normalize schema name
61
+ schema = schema.lower().replace(' ', '_')
62
+
63
+ create_cmds = '\n\n'.join(self.create_commands)
64
+ insert_cmds = '\n\n'.join(self.insert_commands)
65
+
66
+ return strings.to_sql_format(schema=schema, create_cmds=create_cmds, insert_cmds=insert_cmds)
67
+
68
+ @staticmethod
69
+ def from_sql(sql_str: str, sql_dialect: str) -> 'Dataset':
70
+ '''Create a Dataset instance from a raw SQL string containing CREATE TABLE and INSERT INTO commands.'''
71
+
72
+ try:
73
+ parsed = sqlglot.parse(sql_str, read=sql_dialect)
74
+ create_commands = []
75
+ insert_commands = []
76
+
77
+ for statement in parsed:
78
+ if isinstance(statement, exp.Create):
79
+ if statement.kind is not None and statement.kind.upper() != 'TABLE':
80
+ continue # skip non-table creation statements, e.g. CREATE SCHEMA
81
+ create_commands.append(f'{statement.sql()};')
82
+ elif isinstance(statement, exp.Insert):
83
+ insert_commands.append(f'{statement.sql()};')
84
+
85
+ if not create_commands:
86
+ raise ValueError("No CREATE TABLE commands found in the provided SQL string.")
87
+ except Exception as e:
88
+ raise SQLParsingError(f"Error parsing SQL string: {e}", sql_str)
89
+
90
+ return Dataset(
91
+ create_commands=create_commands,
92
+ insert_commands=insert_commands,
93
+ domain="CUSTOM_DATASET"
94
+ )
95
+
96
+ @staticmethod
97
+ def generate(
98
+ domain: str,
99
+ sql_dialect: str,
100
+ constraints: Sequence[SchemaConstraint],
101
+ extra_details: list[str] = [],
102
+ *,
103
+ language: str,
104
+ max_attempts: int = 5
105
+ ) -> 'Dataset':
106
+ '''Generate a SQL dataset based on the specified parameters.'''
107
+
108
+ # merge similar constraints
109
+ constraints = schema_constraints.merge_constraints(constraints)
110
+
111
+ prompt_text = strings.prompt_generate(
112
+ domain=domain,
113
+ extra_details=extra_details,
114
+ constraints=constraints,
115
+ sql_dialect=sql_dialect,
116
+ language=language,
117
+ )
118
+
119
+ # query LLM to generate dataset
120
+ messages = llm.Message()
121
+ messages.add_message_user(prompt_text)
122
+
123
+ for attempt in range(max_attempts):
124
+ messages.print_chat()
125
+ try:
126
+ answer = llm.generate_answer(messages, json_format=llm.models.Schema)
127
+ assert isinstance(answer, llm.models.Schema), "The response is not in the expected JSON format."
128
+
129
+ # parse CREATE TABLEs
130
+ parsed_tables = []
131
+ for create_table in answer.schema_tables:
132
+ try:
133
+ parsed = sqlglot.parse_one(create_table, read=sql_dialect)
134
+ parsed_tables.append(parsed)
135
+ except Exception as e:
136
+ raise SQLParsingError(
137
+ TranslatableText(
138
+ f"Syntax error in CREATE TABLE generated: {e}",
139
+ it=f"Errore di sintassi nella CREATE TABLE generata: {e}"
140
+ ).get(language),
141
+ create_table
142
+ )
143
+ create_commands = [f'{cmd.sql(pretty=True, dialect=sql_dialect)};' for cmd in parsed_tables]
144
+
145
+ # parse INSERT INTOs
146
+ parsed_inserts = []
147
+ for create_table in answer.insert_commands:
148
+ try:
149
+ parsed = sqlglot.parse_one(create_table, read=sql_dialect)
150
+ parsed_inserts.append(parsed)
151
+ except Exception as e:
152
+ raise SQLParsingError(
153
+ TranslatableText(
154
+ f"Syntax error in INSERT COMMANDS generated: {e}",
155
+ it=f"Errore di sintassi nei comandi INSERT generati: {e}"
156
+ ).get(language),
157
+ create_table
158
+ )
159
+ insert_commands = [f'{cmd.sql(pretty=True, dialect=sql_dialect)};' for cmd in parsed_inserts]
160
+
161
+ catalog = build_catalog_from_sql('; '.join(cmd.sql() for cmd in parsed_tables))
162
+
163
+ # check if constraints are satisfied
164
+ errors = []
165
+ for constraint in constraints:
166
+ try:
167
+ constraint.validate(catalog, parsed_tables, parsed_inserts)
168
+ except ConstraintValidationError as e:
169
+ errors.append(e.get(language=language))
170
+ continue
171
+
172
+ # no errors, return dataset
173
+ if not errors:
174
+ result = Dataset(
175
+ create_commands=create_commands,
176
+ insert_commands=insert_commands,
177
+ domain=domain
178
+ )
179
+ # fill cache, since we already have the catalog
180
+ result._catalog_cache = catalog
181
+ result._catalog_cache_commands_hash = hash(tuple(create_commands))
182
+
183
+ return result
184
+
185
+ dav_tools.messages.error(f'Validation failed for attempt {attempt + 1}. Missing requirements: {", ".join(errors)}')
186
+
187
+ messages.add_message_user(strings.feedback_constraint_violations(errors, language=language))
188
+
189
+ except SQLParsingError as e:
190
+ dav_tools.messages.error(f"Error during generation (Attempt {attempt + 1}): {e}")
191
+ messages.add_message_user(
192
+ TranslatableText(
193
+ f"Generated SQL code is not syntactically valid: {str(e)}. Please regenerate valid SQL.",
194
+ it=f"Il codice SQL generato non è sintatticamente valido: {str(e)}. Per favore, rigenera un SQL valido."
195
+ ).get(language)
196
+ )
197
+
145
198
  raise DatasetGenerationError(f'Failed to generate a valid dataset after {max_attempts} attempts.')
@@ -0,0 +1,105 @@
1
+ from typing import Sequence
2
+ from ...constraints import SchemaConstraint
3
+ from ...translatable_text import TranslatableText
4
+
5
+ def to_sql_format(schema: str, create_cmds: str, insert_cmds: str) -> str:
6
+ return f'''BEGIN;
7
+
8
+ DROP SCHEMA IF EXISTS {schema} CASCADE;
9
+ CREATE SCHEMA {schema};
10
+ SET search_path TO {schema};
11
+
12
+ {create_cmds}
13
+
14
+ {insert_cmds}
15
+
16
+ COMMIT;'''
17
+
18
+
19
+ def prompt_generate(
20
+ domain: str,
21
+ extra_details: list[str],
22
+ constraints: Sequence[SchemaConstraint],
23
+ *,
24
+ sql_dialect: str,
25
+ language: str
26
+ ) -> str:
27
+ formatted_constraints = '\n'.join(f'- {c.description.get(language)}' for c in constraints)
28
+
29
+ # remove empty extra details
30
+ extra_details = [detail for detail in extra_details if detail.strip() != '']
31
+ # dataset characteristics str
32
+ if len(extra_details) > 0:
33
+ extra_details_str = TranslatableText(
34
+ "The dataset must have the following characteristics:\n",
35
+ it="Il dataset deve avere le seguenti caratteristiche:\n"
36
+ ).get(language)
37
+ for detail in extra_details:
38
+ extra_details_str += f"- {detail}\n"
39
+ else:
40
+ extra_details_str = ''
41
+
42
+ return TranslatableText(
43
+ f'''
44
+ Generate a {sql_dialect} SQL dataset about the following domain: "{domain}".
45
+ {extra_details_str}
46
+
47
+ MANDATORY CONSTRAINTS:
48
+ - FOREIGN KEY attributes should have the REFERENCES keyword inline (e.g. "col TYPE REFERENCES table_name(column_name)").
49
+ - VARCHAR columns should not have a length specified (e.g. use "col VARCHAR" instead of "col VARCHAR(255)").
50
+ {formatted_constraints}
51
+
52
+ MANDATORY OUTPUT (JSON) - each line in both lists must correspond to a single table:
53
+ {{
54
+ "schema_tables": [
55
+ "CREATE TABLE t1(...);",
56
+ "CREATE TABLE t2(...);"
57
+ ],
58
+ "insert_commands": [
59
+ "INSERT INTO t1(...) VALUES(val_1, val_2, ...), (...), (val_n, val_n+1, ...);",
60
+ "INSERT INTO t2(...) VALUES(val_1, val_2, ...), (...), (val_n, val_n+1, ...);"
61
+ ]
62
+ }}
63
+
64
+ INSERT INTO statements must have following format (Multi-row insert):
65
+ INSERT INTO tableName(<all columns except SERIAL/AUTO_INCREMENT>) VALUES
66
+ (val_1, val_2, ...),
67
+ (val_n, val_n+1, ...);
68
+
69
+ For each table, insert at least 5 rows of data.
70
+ Skip any SERIAL/AUTO_INCREMENT columns in the INSERT statements.
71
+ ''',
72
+ it=f'''Genera un dataset SQL {sql_dialect} sul seguente dominio: "{domain}".
73
+ {extra_details_str}
74
+
75
+ CONSTRAINT OBBLIGATORIE:
76
+ - Gli attributi FOREIGN KEY devono avere la keyword REFERENCES inline (es. "col TYPE REFERENCES table_name(column_name)").
77
+ - Le colonne VARCHAR non devono avere una lunghezza specificata (es. usa "col VARCHAR" invece di "col VARCHAR(255)").
78
+ {formatted_constraints}
79
+
80
+ OUTPUT OBBLIGATORIO (JSON) - ogni riga in entrambe le liste deve corrispondere a una singola tabella:
81
+ {{
82
+ "schema_tables": [
83
+ "CREATE TABLE t1(...);",
84
+ "CREATE TABLE t2(...);"
85
+ ],
86
+ "insert_commands": [
87
+ "INSERT INTO t1(...) VALUES(val_1, val_2, ...), (...), (val_n, val_n+1, ...);",
88
+ "INSERT INTO t2(...) VALUES(val_1, val_2, ...), (...), (val_n, val_n+1, ...);"
89
+ ]
90
+ }}
91
+
92
+ Le istruzioni INSERT INTO devono avere il seguente formato (Multi-row insert):
93
+ INSERT INTO tableName(<tutte le colonne tranne SERIAL/AUTO_INCREMENT>) VALUES
94
+ (val_1, val_2, ...),
95
+ (val_n, val_n+1, ...);
96
+
97
+ Per ogni tabella, inserisci almeno 5 righe di dati.
98
+ ''',
99
+ ).get(language)
100
+
101
+ def feedback_constraint_violations(errors: list[str], * , language: str) -> str:
102
+ return TranslatableText(
103
+ f"The previous JSON output was rejected because the SQL violated these constraints: {', '.join(errors)}\n Regenerate the JSON correcting the SQL to satisfy all mandatory constraints.",
104
+ it=f"Il precedente output JSON è stato rifiutato perché il SQL ha violato queste constraint: {', '.join(errors)}\n Rigenera il JSON correggendo il SQL per soddisfare tutte le constraint obbligatorie."
105
+ ).get(language)