sql-blocks 1.25.6109999999999__py3-none-any.whl → 1.2025.625__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_blocks/sql_blocks.py +164 -22
- {sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.2025.625.dist-info}/METADATA +47 -1
- sql_blocks-1.2025.625.dist-info/RECORD +7 -0
- sql_blocks-1.25.6109999999999.dist-info/RECORD +0 -7
- {sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.2025.625.dist-info}/LICENSE +0 -0
- {sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.2025.625.dist-info}/WHEEL +0 -0
- {sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.2025.625.dist-info}/top_level.txt +0 -0
sql_blocks/sql_blocks.py
CHANGED
@@ -81,7 +81,9 @@ class SQLObject:
|
|
81
81
|
|
82
82
|
@staticmethod
|
83
83
|
def get_separator(key: str) -> str:
|
84
|
-
|
84
|
+
if key == WHERE:
|
85
|
+
return r'\s+and\s+|\s+AND\s+'
|
86
|
+
appendix = {FROM: r'\s+join\s+|\s+JOIN\s+'}
|
85
87
|
return KEYWORD[key][0].format(appendix.get(key, ''))
|
86
88
|
|
87
89
|
@staticmethod
|
@@ -92,6 +94,7 @@ class SQLObject:
|
|
92
94
|
def split_fields(cls, text: str, key: str) -> list:
|
93
95
|
if key == SELECT and cls.contains_CASE_statement(text):
|
94
96
|
return Case.parse(text)
|
97
|
+
text = re.sub(r'\s+', ' ', text)
|
95
98
|
separator = cls.get_separator(key)
|
96
99
|
return re.split(separator, text)
|
97
100
|
|
@@ -844,8 +847,15 @@ class Rows:
|
|
844
847
|
)
|
845
848
|
|
846
849
|
|
850
|
+
class DescOrderBy:
|
851
|
+
@classmethod
|
852
|
+
def add(cls, name: str, main: SQLObject):
|
853
|
+
name = Clause.format(name, main)
|
854
|
+
main.values.setdefault(ORDER_BY, []).append(name + SortType.DESC.value)
|
855
|
+
|
847
856
|
class OrderBy(Clause):
|
848
857
|
sort: SortType = SortType.ASC
|
858
|
+
DESC = DescOrderBy
|
849
859
|
|
850
860
|
@classmethod
|
851
861
|
def add(cls, name: str, main: SQLObject):
|
@@ -1220,6 +1230,10 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1220
1230
|
pattern = '{_from}{where}{select}{group_by}{order_by}'
|
1221
1231
|
has_default = {key: False for key in KEYWORD}
|
1222
1232
|
file_extension = FileExtension.CSV
|
1233
|
+
HEADER_IMPORT_LIB = ['import pandas as pd']
|
1234
|
+
LIB_INITIALIZATION = ''
|
1235
|
+
FIELD_LIST_FMT = '[[{}{}]]'
|
1236
|
+
PREFIX_LIBRARY = 'pd.'
|
1223
1237
|
|
1224
1238
|
def add_field(self, values: list) -> str:
|
1225
1239
|
def line_field_fmt(field: str) -> str:
|
@@ -1228,30 +1242,43 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1228
1242
|
)
|
1229
1243
|
common_fields = self.split_agg_fields(values)
|
1230
1244
|
if common_fields:
|
1231
|
-
return
|
1232
|
-
','.join(line_field_fmt(fld) for fld in common_fields)
|
1245
|
+
return self.FIELD_LIST_FMT.format(
|
1246
|
+
','.join(line_field_fmt(fld) for fld in common_fields),
|
1247
|
+
self.LINE_BREAK
|
1233
1248
|
)
|
1234
1249
|
return ''
|
1235
1250
|
|
1251
|
+
def merge_tables(self, elements: list, main_table: str) -> str:
|
1252
|
+
a1, f1, a2, f2 = elements
|
1253
|
+
return "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
|
1254
|
+
main_table, self.names[a1], self.names[a2], f1, f2, 'inner'
|
1255
|
+
)
|
1256
|
+
|
1236
1257
|
def get_tables(self, values: list) -> str:
|
1237
|
-
result = '
|
1238
|
-
|
1258
|
+
result = '\n'.join(self.HEADER_IMPORT_LIB) + '\n'
|
1259
|
+
if self.LIB_INITIALIZATION:
|
1260
|
+
result += f'\n{self.LIB_INITIALIZATION}'
|
1261
|
+
self.names = {}
|
1239
1262
|
for table in values:
|
1240
1263
|
table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
|
1241
1264
|
alias, table = SQLObject.split_alias(table)
|
1242
|
-
result += "\ndf_{table} =
|
1243
|
-
|
1265
|
+
result += "\ndf_{table} = {prefix}{func}('{table}.{ext}')".format(
|
1266
|
+
prefix=self.PREFIX_LIBRARY, func=self.file_extension.value,
|
1267
|
+
table=table, ext=self.file_extension.name.lower()
|
1244
1268
|
)
|
1245
|
-
names[alias] = table
|
1269
|
+
self.names[alias] = table
|
1246
1270
|
if join:
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
)
|
1271
|
+
result += self.merge_tables([
|
1272
|
+
r.strip() for r in re.split('[().=]', join[-1]) if r
|
1273
|
+
], last_table)
|
1251
1274
|
last_table = table
|
1252
1275
|
_, table = SQLObject.split_alias(values[0])
|
1253
|
-
result += f'\ndf = df_{table}\n\ndf = df
|
1276
|
+
result += f'\ndf = df_{table}\n\ndf = df'
|
1254
1277
|
return result
|
1278
|
+
|
1279
|
+
def split_condition_elements(self, expr: str) -> list:
|
1280
|
+
expr = self.remove_alias(expr)
|
1281
|
+
return [t for t in re.split(r'(\w+)', expr) if t.strip()]
|
1255
1282
|
|
1256
1283
|
def extract_conditions(self, values: list) -> str:
|
1257
1284
|
conditions = []
|
@@ -1261,8 +1288,7 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1261
1288
|
3: '.str.contains(',
|
1262
1289
|
}
|
1263
1290
|
for expr in values:
|
1264
|
-
|
1265
|
-
field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
|
1291
|
+
field, op, *const = self.split_condition_elements(expr)
|
1266
1292
|
if op.upper() == 'LIKE' and len(const) == 3:
|
1267
1293
|
level = 0
|
1268
1294
|
if '%' in const[0]:
|
@@ -1319,6 +1345,73 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1319
1345
|
return ''
|
1320
1346
|
|
1321
1347
|
|
1348
|
+
class SparkLanguage(PandasLanguage):
|
1349
|
+
HEADER_IMPORT_LIB = [
|
1350
|
+
'from pyspark.sql import SparkSession',
|
1351
|
+
'from pyspark.sql.functions import col, avg, sum, count'
|
1352
|
+
]
|
1353
|
+
FIELD_LIST_FMT = '.select({}{})'
|
1354
|
+
PREFIX_LIBRARY = 'pyspark.pandas.'
|
1355
|
+
|
1356
|
+
def merge_tables(self, elements: list, main_table: str) -> str:
|
1357
|
+
a1, f1, a2, f2 = elements
|
1358
|
+
COMMAND_FMT = """{cr}
|
1359
|
+
df_{result} = df_{table1}.join(
|
1360
|
+
{indent}df_{table2},
|
1361
|
+
{indent}df_{table1}.{fk_field}{op}df_{table2}.{primary_key}{cr}
|
1362
|
+
)
|
1363
|
+
"""
|
1364
|
+
return re.sub(r'\s+', '', COMMAND_FMT).format(
|
1365
|
+
result=main_table, cr=self.LINE_BREAK, indent=self.TABULATION,
|
1366
|
+
table1=self.names[a1], table2=self.names[a2],
|
1367
|
+
fk_field=f1, primary_key=f2, op=' == '
|
1368
|
+
)
|
1369
|
+
|
1370
|
+
def extract_conditions(self, values: list) -> str:
|
1371
|
+
conditions = []
|
1372
|
+
for expr in values:
|
1373
|
+
field, op, *const = self.split_condition_elements(expr)
|
1374
|
+
const = ''.join(const)
|
1375
|
+
if op.upper() == 'LIKE':
|
1376
|
+
line = f"\n\t( col('{field}').like({const}) )"
|
1377
|
+
else:
|
1378
|
+
line = f"\n\t( col('{field}') {op} {const} )"
|
1379
|
+
conditions.append(line)
|
1380
|
+
if not conditions:
|
1381
|
+
return ''
|
1382
|
+
return '.filter({}\n)'.format(
|
1383
|
+
'\n\t&'.join(conditions)
|
1384
|
+
)
|
1385
|
+
|
1386
|
+
def sort_by(self, values: list) -> str:
|
1387
|
+
if not values:
|
1388
|
+
return ''
|
1389
|
+
return '.orderBy({}{}{})'.format(
|
1390
|
+
self.TABULATION,
|
1391
|
+
self.clean_values(values),
|
1392
|
+
self.LINE_BREAK
|
1393
|
+
)
|
1394
|
+
|
1395
|
+
def set_group(self, values: list) -> str:
|
1396
|
+
result = '.groupBy({}{}{})'.format(
|
1397
|
+
self.TABULATION,
|
1398
|
+
self.clean_values(values),
|
1399
|
+
self.LINE_BREAK
|
1400
|
+
)
|
1401
|
+
if self.aggregation_fields:
|
1402
|
+
result += '.agg('
|
1403
|
+
for field in self.aggregation_fields:
|
1404
|
+
func, field, *alias = re.split(r'[()]|\s+as\s+|\s+AS\s+', field)
|
1405
|
+
result += "{}{}('{}')".format(
|
1406
|
+
self.TABULATION, func.lower(),
|
1407
|
+
field if field else '*'
|
1408
|
+
)
|
1409
|
+
if alias:
|
1410
|
+
result += f".alias('{alias[-1]}')"
|
1411
|
+
result += '\n)'
|
1412
|
+
return result
|
1413
|
+
|
1414
|
+
|
1322
1415
|
class Parser:
|
1323
1416
|
REGEX = {}
|
1324
1417
|
|
@@ -1366,7 +1459,7 @@ class SQLParser(Parser):
|
|
1366
1459
|
def prepare(self):
|
1367
1460
|
keywords = '|'.join(k + r'\b' for k in KEYWORD)
|
1368
1461
|
flags = re.IGNORECASE + re.MULTILINE
|
1369
|
-
self.REGEX['keywords'] = re.compile(f'({keywords}
|
1462
|
+
self.REGEX['keywords'] = re.compile(f'({keywords})', flags)
|
1370
1463
|
self.REGEX['subquery'] = re.compile(r'(\w\.)*\w+ +in +\(SELECT.*?\)', flags)
|
1371
1464
|
|
1372
1465
|
def eval(self, txt: str):
|
@@ -1887,13 +1980,12 @@ class NotSelectIN(SelectIN):
|
|
1887
1980
|
|
1888
1981
|
class CTE(Select):
|
1889
1982
|
prefix = ''
|
1983
|
+
show_query = True
|
1890
1984
|
|
1891
|
-
def __init__(self, table_name: str, query_list: list[Select]):
|
1985
|
+
def __init__(self, table_name: str, query_list: list[Select]=[]):
|
1892
1986
|
super().__init__(table_name)
|
1893
|
-
for query in query_list:
|
1894
|
-
query.break_lines = False
|
1895
1987
|
self.query_list = query_list
|
1896
|
-
self.break_lines = False
|
1988
|
+
self.break_lines = False
|
1897
1989
|
|
1898
1990
|
def __str__(self) -> str:
|
1899
1991
|
size = 0
|
@@ -1903,6 +1995,7 @@ class CTE(Select):
|
|
1903
1995
|
self.break_lines = True
|
1904
1996
|
# ---------------------------------------------------------
|
1905
1997
|
def justify(query: Select) -> str:
|
1998
|
+
query.break_lines = False
|
1906
1999
|
result, line = [], ''
|
1907
2000
|
keywords = '|'.join(KEYWORD)
|
1908
2001
|
for word in re.split(fr'({keywords}|AND|OR|,)', str(query)):
|
@@ -1918,7 +2011,7 @@ class CTE(Select):
|
|
1918
2011
|
self.prefix, self.table_name,
|
1919
2012
|
'\nUNION ALL\n '.join(
|
1920
2013
|
justify(q) for q in self.query_list
|
1921
|
-
), super().__str__()
|
2014
|
+
), super().__str__() if self.show_query else ''
|
1922
2015
|
)
|
1923
2016
|
|
1924
2017
|
def join(self, pattern: str, fields: list | str, format: str=''):
|
@@ -1972,6 +2065,56 @@ class Recursive(CTE):
|
|
1972
2065
|
return self
|
1973
2066
|
|
1974
2067
|
|
2068
|
+
MAIN_TAG = '__main__'
|
2069
|
+
|
2070
|
+
class CTEFactory:
|
2071
|
+
def __init__(self, txt: str):
|
2072
|
+
"""
|
2073
|
+
Syntax:
|
2074
|
+
---
|
2075
|
+
**SELECT ...
|
2076
|
+
FROM** ( `sub_query1` ) **AS** `alias_1`
|
2077
|
+
JOIN ( `sub_query2` ) **AS** `alias_2` **ON** `__join__`
|
2078
|
+
"""
|
2079
|
+
summary = self.extract_subqueries(txt)
|
2080
|
+
self.main = detect( summary.pop(MAIN_TAG) )
|
2081
|
+
self.cte_list = [
|
2082
|
+
CTE(alias, [
|
2083
|
+
Select.parse(query)[0]
|
2084
|
+
])
|
2085
|
+
for alias, query in summary.items()
|
2086
|
+
]
|
2087
|
+
|
2088
|
+
def __str__(self):
|
2089
|
+
CTE.show_query = False
|
2090
|
+
lines = [str(cte) for cte in self.cte_list]
|
2091
|
+
return ',\n'.join(lines) + '\n' + str(self.main)
|
2092
|
+
|
2093
|
+
@staticmethod
|
2094
|
+
def extract_subqueries(txt: str) -> dict:
|
2095
|
+
result = {}
|
2096
|
+
for found in re.finditer(r'(FROM|JOIN)\s*[(]\s*SELECT', txt, re.IGNORECASE):
|
2097
|
+
start = found.start()
|
2098
|
+
alias = ''
|
2099
|
+
pos = start
|
2100
|
+
while not alias:
|
2101
|
+
found = re.search(r'[)]\s*AS\s+\w+', txt[pos:], re.IGNORECASE)
|
2102
|
+
if not found:
|
2103
|
+
break
|
2104
|
+
end = found.end() + pos
|
2105
|
+
elements = txt[start: end].split()
|
2106
|
+
if '(' not in elements[-3]:
|
2107
|
+
_, alias = elements[-2:]
|
2108
|
+
pos = end
|
2109
|
+
first_word = elements.pop(0)
|
2110
|
+
if not result:
|
2111
|
+
result[MAIN_TAG] = txt[:start]
|
2112
|
+
result[MAIN_TAG] += f' {first_word} {alias} {alias}'
|
2113
|
+
result[alias] = ' '.join(elements[1: -3])
|
2114
|
+
result[MAIN_TAG] += txt[end:]
|
2115
|
+
return result
|
2116
|
+
|
2117
|
+
|
1975
2118
|
# ----- Rules -----
|
1976
2119
|
|
1977
2120
|
class RulePutLimit(Rule):
|
@@ -2116,4 +2259,3 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
|
|
2116
2259
|
result += query
|
2117
2260
|
return result
|
2118
2261
|
# ===========================================================================================//
|
2119
|
-
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sql_blocks
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.2025.625
|
4
4
|
Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
|
5
5
|
Home-page: https://github.com/julio-cascalles/sql_blocks
|
6
6
|
Author: Júlio Cascalles
|
@@ -888,3 +888,49 @@ R2 = Recursive.create(
|
|
888
888
|
|
889
889
|
>> Note: Comments added later.
|
890
890
|
---
|
891
|
+
|
892
|
+
### CTEFactory class
|
893
|
+
CTEFactory exchanges subqueries for CTEs, simply by passing the text of the "dirty" query:
|
894
|
+
|
895
|
+
*Example*:
|
896
|
+
```
|
897
|
+
print(
|
898
|
+
CTEFactory("""
|
899
|
+
SELECT u001.name, agg_sales.total
|
900
|
+
FROM (
|
901
|
+
SELECT * FROM Users u
|
902
|
+
WHERE u.status = 'active'
|
903
|
+
) AS u001
|
904
|
+
JOIN (
|
905
|
+
SELECT s.user_id, Sum(s.value) as total
|
906
|
+
FROM Sales s
|
907
|
+
GROUP BY s.user_id
|
908
|
+
)
|
909
|
+
As agg_sales
|
910
|
+
ON u001.id = agg_sales.user_id
|
911
|
+
ORDER BY u001.name
|
912
|
+
""")
|
913
|
+
)
|
914
|
+
```
|
915
|
+
results...
|
916
|
+
```
|
917
|
+
WITH u001 AS (
|
918
|
+
SELECT * FROM Users u
|
919
|
+
WHERE u.status = 'active'
|
920
|
+
),
|
921
|
+
WITH agg_sales AS (
|
922
|
+
SELECT s.user_id, Sum(s.value) as total
|
923
|
+
FROM Sales s
|
924
|
+
GROUP BY s.user_id
|
925
|
+
)
|
926
|
+
SELECT
|
927
|
+
u001.name,
|
928
|
+
agg_sales.total
|
929
|
+
FROM
|
930
|
+
u001 u001
|
931
|
+
JOIN agg_sales agg_sales ON
|
932
|
+
(u001.id = agg_sales.user_id)
|
933
|
+
ORDER BY
|
934
|
+
u001.name
|
935
|
+
```
|
936
|
+
---
|
@@ -0,0 +1,7 @@
|
|
1
|
+
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
+
sql_blocks/sql_blocks.py,sha256=J7zEJ5JNRxI3F-7TIypb0myb5OgdE5Stv4boZCTVBLM,76610
|
3
|
+
sql_blocks-1.2025.625.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
+
sql_blocks-1.2025.625.dist-info/METADATA,sha256=I6KTi5S_usvCjJEQl6lm8LG4DaxHhX0NlGRwhHpKBT8,23328
|
5
|
+
sql_blocks-1.2025.625.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
sql_blocks-1.2025.625.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
+
sql_blocks-1.2025.625.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
-
sql_blocks/sql_blocks.py,sha256=09o87wu2xc82AMMyV-OBiYmv0d_kfL_DIH6G--3-DIA,71615
|
3
|
-
sql_blocks-1.25.6109999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
-
sql_blocks-1.25.6109999999999.dist-info/METADATA,sha256=KpFEm1tvvHvoURZ3kV1VjvBvSIEGGAWuskacBXg0Xp4,22236
|
5
|
-
sql_blocks-1.25.6109999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
sql_blocks-1.25.6109999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
-
sql_blocks-1.25.6109999999999.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|