sql-blocks 1.25.610999999999__py3-none-any.whl → 1.25.6139999999999__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_blocks/sql_blocks.py +118 -28
- {sql_blocks-1.25.610999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/METADATA +1 -1
- sql_blocks-1.25.6139999999999.dist-info/RECORD +7 -0
- sql_blocks-1.25.610999999999.dist-info/RECORD +0 -7
- {sql_blocks-1.25.610999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/LICENSE +0 -0
- {sql_blocks-1.25.610999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/WHEEL +0 -0
- {sql_blocks-1.25.610999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/top_level.txt +0 -0
sql_blocks/sql_blocks.py
CHANGED
@@ -81,9 +81,23 @@ class SQLObject:
|
|
81
81
|
|
82
82
|
@staticmethod
|
83
83
|
def get_separator(key: str) -> str:
|
84
|
-
|
84
|
+
if key == WHERE:
|
85
|
+
return r'\s+and\s+|\s+AND\s+'
|
86
|
+
appendix = {FROM: r'\s+join\s+|\s+JOIN\s+'}
|
85
87
|
return KEYWORD[key][0].format(appendix.get(key, ''))
|
86
88
|
|
89
|
+
@staticmethod
|
90
|
+
def contains_CASE_statement(text: str) -> bool:
|
91
|
+
return re.search(r'\bCASE\b', text, re.IGNORECASE)
|
92
|
+
|
93
|
+
@classmethod
|
94
|
+
def split_fields(cls, text: str, key: str) -> list:
|
95
|
+
if key == SELECT and cls.contains_CASE_statement(text):
|
96
|
+
return Case.parse(text)
|
97
|
+
text = re.sub(r'\s+', ' ', text)
|
98
|
+
separator = cls.get_separator(key)
|
99
|
+
return re.split(separator, text)
|
100
|
+
|
87
101
|
@staticmethod
|
88
102
|
def is_named_field(fld: str, name: str='') -> bool:
|
89
103
|
return re.search(fr'(\s+as\s+|\s+AS\s+){name}', fld)
|
@@ -103,16 +117,13 @@ class SQLObject:
|
|
103
117
|
result += re.split(r'([=()]|<>|\s+ON\s+|\s+on\s+)', fld)
|
104
118
|
return result
|
105
119
|
def cleanup(text: str) -> str:
|
106
|
-
if re.search(r'^CASE\b', text):
|
120
|
+
# if re.search(r'^CASE\b', text):
|
121
|
+
if self.contains_CASE_statement(text):
|
107
122
|
return text
|
108
123
|
text = re.sub(r'[\n\t]', ' ', text)
|
109
124
|
if exact:
|
110
125
|
text = text.lower()
|
111
126
|
return text.strip()
|
112
|
-
def split_fields(text: str) -> list:
|
113
|
-
if key == SELECT:
|
114
|
-
return Case.parse(text)
|
115
|
-
return re.split(separator, text)
|
116
127
|
def field_set(source: list) -> set:
|
117
128
|
return set(
|
118
129
|
(
|
@@ -122,14 +133,13 @@ class SQLObject:
|
|
122
133
|
re.sub(pattern, '', cleanup(fld))
|
123
134
|
)
|
124
135
|
for string in disassemble(source)
|
125
|
-
for fld in split_fields(string)
|
136
|
+
for fld in self.split_fields(string, key)
|
126
137
|
)
|
127
138
|
pattern = KEYWORD[key][1]
|
128
139
|
if exact:
|
129
140
|
if key == WHERE:
|
130
141
|
pattern = r'["\']| '
|
131
142
|
pattern += f'|{PATTERN_PREFIX}'
|
132
|
-
separator = self.get_separator(key)
|
133
143
|
s1 = field_set(search_list)
|
134
144
|
s2 = field_set(self.values.get(key, []))
|
135
145
|
if exact:
|
@@ -727,7 +737,7 @@ class Case:
|
|
727
737
|
result += block.fields
|
728
738
|
block.fields = []
|
729
739
|
elif word not in RESERVED_WORDS:
|
730
|
-
result.append(word)
|
740
|
+
result.append(word.replace(',', ''))
|
731
741
|
last_word = word
|
732
742
|
return result
|
733
743
|
|
@@ -1213,6 +1223,10 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1213
1223
|
pattern = '{_from}{where}{select}{group_by}{order_by}'
|
1214
1224
|
has_default = {key: False for key in KEYWORD}
|
1215
1225
|
file_extension = FileExtension.CSV
|
1226
|
+
HEADER_IMPORT_LIB = ['import pandas as pd']
|
1227
|
+
LIB_INITIALIZATION = ''
|
1228
|
+
FIELD_LIST_FMT = '[[{}{}]]'
|
1229
|
+
PREFIX_LIBRARY = 'pd.'
|
1216
1230
|
|
1217
1231
|
def add_field(self, values: list) -> str:
|
1218
1232
|
def line_field_fmt(field: str) -> str:
|
@@ -1221,30 +1235,43 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1221
1235
|
)
|
1222
1236
|
common_fields = self.split_agg_fields(values)
|
1223
1237
|
if common_fields:
|
1224
|
-
return
|
1225
|
-
','.join(line_field_fmt(fld) for fld in common_fields)
|
1238
|
+
return self.FIELD_LIST_FMT.format(
|
1239
|
+
','.join(line_field_fmt(fld) for fld in common_fields),
|
1240
|
+
self.LINE_BREAK
|
1226
1241
|
)
|
1227
1242
|
return ''
|
1228
1243
|
|
1244
|
+
def merge_tables(self, elements: list, main_table: str) -> str:
|
1245
|
+
a1, f1, a2, f2 = elements
|
1246
|
+
return "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
|
1247
|
+
main_table, self.names[a1], self.names[a2], f1, f2, 'inner'
|
1248
|
+
)
|
1249
|
+
|
1229
1250
|
def get_tables(self, values: list) -> str:
|
1230
|
-
result = '
|
1231
|
-
|
1251
|
+
result = '\n'.join(self.HEADER_IMPORT_LIB) + '\n'
|
1252
|
+
if self.LIB_INITIALIZATION:
|
1253
|
+
result += f'\n{self.LIB_INITIALIZATION}'
|
1254
|
+
self.names = {}
|
1232
1255
|
for table in values:
|
1233
1256
|
table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
|
1234
1257
|
alias, table = SQLObject.split_alias(table)
|
1235
|
-
result += "\ndf_{table} =
|
1236
|
-
|
1258
|
+
result += "\ndf_{table} = {prefix}{func}('{table}.{ext}')".format(
|
1259
|
+
prefix=self.PREFIX_LIBRARY, func=self.file_extension.value,
|
1260
|
+
table=table, ext=self.file_extension.name.lower()
|
1237
1261
|
)
|
1238
|
-
names[alias] = table
|
1262
|
+
self.names[alias] = table
|
1239
1263
|
if join:
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
)
|
1264
|
+
result += self.merge_tables([
|
1265
|
+
r.strip() for r in re.split('[().=]', join[-1]) if r
|
1266
|
+
], last_table)
|
1244
1267
|
last_table = table
|
1245
1268
|
_, table = SQLObject.split_alias(values[0])
|
1246
|
-
result += f'\ndf = df_{table}\n\ndf = df
|
1269
|
+
result += f'\ndf = df_{table}\n\ndf = df'
|
1247
1270
|
return result
|
1271
|
+
|
1272
|
+
def split_condition_elements(self, expr: str) -> list:
|
1273
|
+
expr = self.remove_alias(expr)
|
1274
|
+
return [t for t in re.split(r'(\w+)', expr) if t.strip()]
|
1248
1275
|
|
1249
1276
|
def extract_conditions(self, values: list) -> str:
|
1250
1277
|
conditions = []
|
@@ -1254,8 +1281,7 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1254
1281
|
3: '.str.contains(',
|
1255
1282
|
}
|
1256
1283
|
for expr in values:
|
1257
|
-
|
1258
|
-
field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
|
1284
|
+
field, op, *const = self.split_condition_elements(expr)
|
1259
1285
|
if op.upper() == 'LIKE' and len(const) == 3:
|
1260
1286
|
level = 0
|
1261
1287
|
if '%' in const[0]:
|
@@ -1312,6 +1338,73 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1312
1338
|
return ''
|
1313
1339
|
|
1314
1340
|
|
1341
|
+
class SparkLanguage(PandasLanguage):
|
1342
|
+
HEADER_IMPORT_LIB = [
|
1343
|
+
'from pyspark.sql import SparkSession',
|
1344
|
+
'from pyspark.sql.functions import col, avg, sum, count'
|
1345
|
+
]
|
1346
|
+
FIELD_LIST_FMT = '.select({}{})'
|
1347
|
+
PREFIX_LIBRARY = 'pyspark.pandas.'
|
1348
|
+
|
1349
|
+
def merge_tables(self, elements: list, main_table: str) -> str:
|
1350
|
+
a1, f1, a2, f2 = elements
|
1351
|
+
COMMAND_FMT = """{cr}
|
1352
|
+
df_{result} = df_{table1}.join(
|
1353
|
+
{indent}df_{table2},
|
1354
|
+
{indent}df_{table1}.{fk_field}{op}df_{table2}.{primary_key}{cr}
|
1355
|
+
)
|
1356
|
+
"""
|
1357
|
+
return re.sub(r'\s+', '', COMMAND_FMT).format(
|
1358
|
+
result=main_table, cr=self.LINE_BREAK, indent=self.TABULATION,
|
1359
|
+
table1=self.names[a1], table2=self.names[a2],
|
1360
|
+
fk_field=f1, primary_key=f2, op=' == '
|
1361
|
+
)
|
1362
|
+
|
1363
|
+
def extract_conditions(self, values: list) -> str:
|
1364
|
+
conditions = []
|
1365
|
+
for expr in values:
|
1366
|
+
field, op, *const = self.split_condition_elements(expr)
|
1367
|
+
const = ''.join(const)
|
1368
|
+
if op.upper() == 'LIKE':
|
1369
|
+
line = f"\n\t( col('{field}').like({const}) )"
|
1370
|
+
else:
|
1371
|
+
line = f"\n\t( col('{field}') {op} {const} )"
|
1372
|
+
conditions.append(line)
|
1373
|
+
if not conditions:
|
1374
|
+
return ''
|
1375
|
+
return '.filter({}\n)'.format(
|
1376
|
+
'\n\t&'.join(conditions)
|
1377
|
+
)
|
1378
|
+
|
1379
|
+
def sort_by(self, values: list) -> str:
|
1380
|
+
if not values:
|
1381
|
+
return ''
|
1382
|
+
return '.orderBy({}{}{})'.format(
|
1383
|
+
self.TABULATION,
|
1384
|
+
self.clean_values(values),
|
1385
|
+
self.LINE_BREAK
|
1386
|
+
)
|
1387
|
+
|
1388
|
+
def set_group(self, values: list) -> str:
|
1389
|
+
result = '.groupBy({}{}{})'.format(
|
1390
|
+
self.TABULATION,
|
1391
|
+
self.clean_values(values),
|
1392
|
+
self.LINE_BREAK
|
1393
|
+
)
|
1394
|
+
if self.aggregation_fields:
|
1395
|
+
result += '.agg('
|
1396
|
+
for field in self.aggregation_fields:
|
1397
|
+
func, field, *alias = re.split(r'[()]|\s+as\s+|\s+AS\s+', field)
|
1398
|
+
result += "{}{}('{}')".format(
|
1399
|
+
self.TABULATION, func.lower(),
|
1400
|
+
field if field else '*'
|
1401
|
+
)
|
1402
|
+
if alias:
|
1403
|
+
result += f".alias('{alias[-1]}')"
|
1404
|
+
result += '\n)'
|
1405
|
+
return result
|
1406
|
+
|
1407
|
+
|
1315
1408
|
class Parser:
|
1316
1409
|
REGEX = {}
|
1317
1410
|
|
@@ -1359,7 +1452,7 @@ class SQLParser(Parser):
|
|
1359
1452
|
def prepare(self):
|
1360
1453
|
keywords = '|'.join(k + r'\b' for k in KEYWORD)
|
1361
1454
|
flags = re.IGNORECASE + re.MULTILINE
|
1362
|
-
self.REGEX['keywords'] = re.compile(f'({keywords}
|
1455
|
+
self.REGEX['keywords'] = re.compile(f'({keywords})', flags)
|
1363
1456
|
self.REGEX['subquery'] = re.compile(r'(\w\.)*\w+ +in +\(SELECT.*?\)', flags)
|
1364
1457
|
|
1365
1458
|
def eval(self, txt: str):
|
@@ -1417,13 +1510,12 @@ class SQLParser(Parser):
|
|
1417
1510
|
for key in USUAL_KEYS:
|
1418
1511
|
if not key in values:
|
1419
1512
|
continue
|
1420
|
-
separator = self.class_type.get_separator(key)
|
1421
1513
|
cls = {
|
1422
1514
|
ORDER_BY: OrderBy, GROUP_BY: GroupBy
|
1423
1515
|
}.get(key, Field)
|
1424
1516
|
obj.values[key] = [
|
1425
1517
|
cls.format(fld, obj)
|
1426
|
-
for fld in
|
1518
|
+
for fld in self.class_type.split_fields(values[key], key)
|
1427
1519
|
if (fld != '*' and len(tables) == 1) or obj.match(fld, key)
|
1428
1520
|
]
|
1429
1521
|
result[obj.alias] = obj
|
@@ -2110,5 +2202,3 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
|
|
2110
2202
|
result += query
|
2111
2203
|
return result
|
2112
2204
|
# ===========================================================================================//
|
2113
|
-
|
2114
|
-
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sql_blocks
|
3
|
-
Version: 1.25.
|
3
|
+
Version: 1.25.6139999999999
|
4
4
|
Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
|
5
5
|
Home-page: https://github.com/julio-cascalles/sql_blocks
|
6
6
|
Author: Júlio Cascalles
|
@@ -0,0 +1,7 @@
|
|
1
|
+
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
+
sql_blocks/sql_blocks.py,sha256=qDwpII5rYY8tqpp93bKKHW9udOqI3SRb-0wfxjAsYD4,74694
|
3
|
+
sql_blocks-1.25.6139999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
+
sql_blocks-1.25.6139999999999.dist-info/METADATA,sha256=Pmk2nqJ3hV7PZVGE8V0bqw8TnSa1_rVIiylJad-nDCk,22236
|
5
|
+
sql_blocks-1.25.6139999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
sql_blocks-1.25.6139999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
+
sql_blocks-1.25.6139999999999.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
-
sql_blocks/sql_blocks.py,sha256=tdfGConHw2iosex_BSXAWxYTGufPyOVMSTyi3g6gqpM,71400
|
3
|
-
sql_blocks-1.25.610999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
-
sql_blocks-1.25.610999999999.dist-info/METADATA,sha256=09DIeKq_SinVNKt0OyYQQDOZ2_DDR6CmoP_n99t4ZXA,22235
|
5
|
-
sql_blocks-1.25.610999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
sql_blocks-1.25.610999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
-
sql_blocks-1.25.610999999999.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{sql_blocks-1.25.610999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/top_level.txt
RENAMED
File without changes
|