sql-blocks 1.25.6109999999999__py3-none-any.whl → 1.25.6139999999999__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_blocks/sql_blocks.py +102 -17
- {sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/METADATA +1 -1
- sql_blocks-1.25.6139999999999.dist-info/RECORD +7 -0
- sql_blocks-1.25.6109999999999.dist-info/RECORD +0 -7
- {sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/LICENSE +0 -0
- {sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/WHEEL +0 -0
- {sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/top_level.txt +0 -0
sql_blocks/sql_blocks.py
CHANGED
@@ -81,7 +81,9 @@ class SQLObject:
|
|
81
81
|
|
82
82
|
@staticmethod
|
83
83
|
def get_separator(key: str) -> str:
|
84
|
-
|
84
|
+
if key == WHERE:
|
85
|
+
return r'\s+and\s+|\s+AND\s+'
|
86
|
+
appendix = {FROM: r'\s+join\s+|\s+JOIN\s+'}
|
85
87
|
return KEYWORD[key][0].format(appendix.get(key, ''))
|
86
88
|
|
87
89
|
@staticmethod
|
@@ -92,6 +94,7 @@ class SQLObject:
|
|
92
94
|
def split_fields(cls, text: str, key: str) -> list:
|
93
95
|
if key == SELECT and cls.contains_CASE_statement(text):
|
94
96
|
return Case.parse(text)
|
97
|
+
text = re.sub(r'\s+', ' ', text)
|
95
98
|
separator = cls.get_separator(key)
|
96
99
|
return re.split(separator, text)
|
97
100
|
|
@@ -1220,6 +1223,10 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1220
1223
|
pattern = '{_from}{where}{select}{group_by}{order_by}'
|
1221
1224
|
has_default = {key: False for key in KEYWORD}
|
1222
1225
|
file_extension = FileExtension.CSV
|
1226
|
+
HEADER_IMPORT_LIB = ['import pandas as pd']
|
1227
|
+
LIB_INITIALIZATION = ''
|
1228
|
+
FIELD_LIST_FMT = '[[{}{}]]'
|
1229
|
+
PREFIX_LIBRARY = 'pd.'
|
1223
1230
|
|
1224
1231
|
def add_field(self, values: list) -> str:
|
1225
1232
|
def line_field_fmt(field: str) -> str:
|
@@ -1228,30 +1235,43 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1228
1235
|
)
|
1229
1236
|
common_fields = self.split_agg_fields(values)
|
1230
1237
|
if common_fields:
|
1231
|
-
return
|
1232
|
-
','.join(line_field_fmt(fld) for fld in common_fields)
|
1238
|
+
return self.FIELD_LIST_FMT.format(
|
1239
|
+
','.join(line_field_fmt(fld) for fld in common_fields),
|
1240
|
+
self.LINE_BREAK
|
1233
1241
|
)
|
1234
1242
|
return ''
|
1235
1243
|
|
1244
|
+
def merge_tables(self, elements: list, main_table: str) -> str:
|
1245
|
+
a1, f1, a2, f2 = elements
|
1246
|
+
return "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
|
1247
|
+
main_table, self.names[a1], self.names[a2], f1, f2, 'inner'
|
1248
|
+
)
|
1249
|
+
|
1236
1250
|
def get_tables(self, values: list) -> str:
|
1237
|
-
result = '
|
1238
|
-
|
1251
|
+
result = '\n'.join(self.HEADER_IMPORT_LIB) + '\n'
|
1252
|
+
if self.LIB_INITIALIZATION:
|
1253
|
+
result += f'\n{self.LIB_INITIALIZATION}'
|
1254
|
+
self.names = {}
|
1239
1255
|
for table in values:
|
1240
1256
|
table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
|
1241
1257
|
alias, table = SQLObject.split_alias(table)
|
1242
|
-
result += "\ndf_{table} =
|
1243
|
-
|
1258
|
+
result += "\ndf_{table} = {prefix}{func}('{table}.{ext}')".format(
|
1259
|
+
prefix=self.PREFIX_LIBRARY, func=self.file_extension.value,
|
1260
|
+
table=table, ext=self.file_extension.name.lower()
|
1244
1261
|
)
|
1245
|
-
names[alias] = table
|
1262
|
+
self.names[alias] = table
|
1246
1263
|
if join:
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
)
|
1264
|
+
result += self.merge_tables([
|
1265
|
+
r.strip() for r in re.split('[().=]', join[-1]) if r
|
1266
|
+
], last_table)
|
1251
1267
|
last_table = table
|
1252
1268
|
_, table = SQLObject.split_alias(values[0])
|
1253
|
-
result += f'\ndf = df_{table}\n\ndf = df
|
1269
|
+
result += f'\ndf = df_{table}\n\ndf = df'
|
1254
1270
|
return result
|
1271
|
+
|
1272
|
+
def split_condition_elements(self, expr: str) -> list:
|
1273
|
+
expr = self.remove_alias(expr)
|
1274
|
+
return [t for t in re.split(r'(\w+)', expr) if t.strip()]
|
1255
1275
|
|
1256
1276
|
def extract_conditions(self, values: list) -> str:
|
1257
1277
|
conditions = []
|
@@ -1261,8 +1281,7 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1261
1281
|
3: '.str.contains(',
|
1262
1282
|
}
|
1263
1283
|
for expr in values:
|
1264
|
-
|
1265
|
-
field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
|
1284
|
+
field, op, *const = self.split_condition_elements(expr)
|
1266
1285
|
if op.upper() == 'LIKE' and len(const) == 3:
|
1267
1286
|
level = 0
|
1268
1287
|
if '%' in const[0]:
|
@@ -1319,6 +1338,73 @@ class PandasLanguage(DataAnalysisLanguage):
|
|
1319
1338
|
return ''
|
1320
1339
|
|
1321
1340
|
|
1341
|
+
class SparkLanguage(PandasLanguage):
|
1342
|
+
HEADER_IMPORT_LIB = [
|
1343
|
+
'from pyspark.sql import SparkSession',
|
1344
|
+
'from pyspark.sql.functions import col, avg, sum, count'
|
1345
|
+
]
|
1346
|
+
FIELD_LIST_FMT = '.select({}{})'
|
1347
|
+
PREFIX_LIBRARY = 'pyspark.pandas.'
|
1348
|
+
|
1349
|
+
def merge_tables(self, elements: list, main_table: str) -> str:
|
1350
|
+
a1, f1, a2, f2 = elements
|
1351
|
+
COMMAND_FMT = """{cr}
|
1352
|
+
df_{result} = df_{table1}.join(
|
1353
|
+
{indent}df_{table2},
|
1354
|
+
{indent}df_{table1}.{fk_field}{op}df_{table2}.{primary_key}{cr}
|
1355
|
+
)
|
1356
|
+
"""
|
1357
|
+
return re.sub(r'\s+', '', COMMAND_FMT).format(
|
1358
|
+
result=main_table, cr=self.LINE_BREAK, indent=self.TABULATION,
|
1359
|
+
table1=self.names[a1], table2=self.names[a2],
|
1360
|
+
fk_field=f1, primary_key=f2, op=' == '
|
1361
|
+
)
|
1362
|
+
|
1363
|
+
def extract_conditions(self, values: list) -> str:
|
1364
|
+
conditions = []
|
1365
|
+
for expr in values:
|
1366
|
+
field, op, *const = self.split_condition_elements(expr)
|
1367
|
+
const = ''.join(const)
|
1368
|
+
if op.upper() == 'LIKE':
|
1369
|
+
line = f"\n\t( col('{field}').like({const}) )"
|
1370
|
+
else:
|
1371
|
+
line = f"\n\t( col('{field}') {op} {const} )"
|
1372
|
+
conditions.append(line)
|
1373
|
+
if not conditions:
|
1374
|
+
return ''
|
1375
|
+
return '.filter({}\n)'.format(
|
1376
|
+
'\n\t&'.join(conditions)
|
1377
|
+
)
|
1378
|
+
|
1379
|
+
def sort_by(self, values: list) -> str:
|
1380
|
+
if not values:
|
1381
|
+
return ''
|
1382
|
+
return '.orderBy({}{}{})'.format(
|
1383
|
+
self.TABULATION,
|
1384
|
+
self.clean_values(values),
|
1385
|
+
self.LINE_BREAK
|
1386
|
+
)
|
1387
|
+
|
1388
|
+
def set_group(self, values: list) -> str:
|
1389
|
+
result = '.groupBy({}{}{})'.format(
|
1390
|
+
self.TABULATION,
|
1391
|
+
self.clean_values(values),
|
1392
|
+
self.LINE_BREAK
|
1393
|
+
)
|
1394
|
+
if self.aggregation_fields:
|
1395
|
+
result += '.agg('
|
1396
|
+
for field in self.aggregation_fields:
|
1397
|
+
func, field, *alias = re.split(r'[()]|\s+as\s+|\s+AS\s+', field)
|
1398
|
+
result += "{}{}('{}')".format(
|
1399
|
+
self.TABULATION, func.lower(),
|
1400
|
+
field if field else '*'
|
1401
|
+
)
|
1402
|
+
if alias:
|
1403
|
+
result += f".alias('{alias[-1]}')"
|
1404
|
+
result += '\n)'
|
1405
|
+
return result
|
1406
|
+
|
1407
|
+
|
1322
1408
|
class Parser:
|
1323
1409
|
REGEX = {}
|
1324
1410
|
|
@@ -1366,7 +1452,7 @@ class SQLParser(Parser):
|
|
1366
1452
|
def prepare(self):
|
1367
1453
|
keywords = '|'.join(k + r'\b' for k in KEYWORD)
|
1368
1454
|
flags = re.IGNORECASE + re.MULTILINE
|
1369
|
-
self.REGEX['keywords'] = re.compile(f'({keywords}
|
1455
|
+
self.REGEX['keywords'] = re.compile(f'({keywords})', flags)
|
1370
1456
|
self.REGEX['subquery'] = re.compile(r'(\w\.)*\w+ +in +\(SELECT.*?\)', flags)
|
1371
1457
|
|
1372
1458
|
def eval(self, txt: str):
|
@@ -2116,4 +2202,3 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
|
|
2116
2202
|
result += query
|
2117
2203
|
return result
|
2118
2204
|
# ===========================================================================================//
|
2119
|
-
|
{sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sql_blocks
|
3
|
-
Version: 1.25.
|
3
|
+
Version: 1.25.6139999999999
|
4
4
|
Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
|
5
5
|
Home-page: https://github.com/julio-cascalles/sql_blocks
|
6
6
|
Author: Júlio Cascalles
|
@@ -0,0 +1,7 @@
|
|
1
|
+
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
+
sql_blocks/sql_blocks.py,sha256=qDwpII5rYY8tqpp93bKKHW9udOqI3SRb-0wfxjAsYD4,74694
|
3
|
+
sql_blocks-1.25.6139999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
+
sql_blocks-1.25.6139999999999.dist-info/METADATA,sha256=Pmk2nqJ3hV7PZVGE8V0bqw8TnSa1_rVIiylJad-nDCk,22236
|
5
|
+
sql_blocks-1.25.6139999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
sql_blocks-1.25.6139999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
+
sql_blocks-1.25.6139999999999.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
-
sql_blocks/sql_blocks.py,sha256=09o87wu2xc82AMMyV-OBiYmv0d_kfL_DIH6G--3-DIA,71615
|
3
|
-
sql_blocks-1.25.6109999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
-
sql_blocks-1.25.6109999999999.dist-info/METADATA,sha256=KpFEm1tvvHvoURZ3kV1VjvBvSIEGGAWuskacBXg0Xp4,22236
|
5
|
-
sql_blocks-1.25.6109999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
sql_blocks-1.25.6109999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
-
sql_blocks-1.25.6109999999999.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.25.6139999999999.dist-info}/top_level.txt
RENAMED
File without changes
|