sql-blocks 1.25.6109999999999__py3-none-any.whl → 1.25.6139999999999__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sql_blocks/sql_blocks.py CHANGED
@@ -81,7 +81,9 @@ class SQLObject:
81
81
 
82
82
  @staticmethod
83
83
  def get_separator(key: str) -> str:
84
- appendix = {WHERE: r'\s+and\s+|', FROM: r'\s+join\s+|\s+JOIN\s+'}
84
+ if key == WHERE:
85
+ return r'\s+and\s+|\s+AND\s+'
86
+ appendix = {FROM: r'\s+join\s+|\s+JOIN\s+'}
85
87
  return KEYWORD[key][0].format(appendix.get(key, ''))
86
88
 
87
89
  @staticmethod
@@ -92,6 +94,7 @@ class SQLObject:
92
94
  def split_fields(cls, text: str, key: str) -> list:
93
95
  if key == SELECT and cls.contains_CASE_statement(text):
94
96
  return Case.parse(text)
97
+ text = re.sub(r'\s+', ' ', text)
95
98
  separator = cls.get_separator(key)
96
99
  return re.split(separator, text)
97
100
 
@@ -1220,6 +1223,10 @@ class PandasLanguage(DataAnalysisLanguage):
1220
1223
  pattern = '{_from}{where}{select}{group_by}{order_by}'
1221
1224
  has_default = {key: False for key in KEYWORD}
1222
1225
  file_extension = FileExtension.CSV
1226
+ HEADER_IMPORT_LIB = ['import pandas as pd']
1227
+ LIB_INITIALIZATION = ''
1228
+ FIELD_LIST_FMT = '[[{}{}]]'
1229
+ PREFIX_LIBRARY = 'pd.'
1223
1230
 
1224
1231
  def add_field(self, values: list) -> str:
1225
1232
  def line_field_fmt(field: str) -> str:
@@ -1228,30 +1235,43 @@ class PandasLanguage(DataAnalysisLanguage):
1228
1235
  )
1229
1236
  common_fields = self.split_agg_fields(values)
1230
1237
  if common_fields:
1231
- return '[[{}\n]]'.format(
1232
- ','.join(line_field_fmt(fld) for fld in common_fields)
1238
+ return self.FIELD_LIST_FMT.format(
1239
+ ','.join(line_field_fmt(fld) for fld in common_fields),
1240
+ self.LINE_BREAK
1233
1241
  )
1234
1242
  return ''
1235
1243
 
1244
+ def merge_tables(self, elements: list, main_table: str) -> str:
1245
+ a1, f1, a2, f2 = elements
1246
+ return "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
1247
+ main_table, self.names[a1], self.names[a2], f1, f2, 'inner'
1248
+ )
1249
+
1236
1250
  def get_tables(self, values: list) -> str:
1237
- result = 'import pandas as pd'
1238
- names = {}
1251
+ result = '\n'.join(self.HEADER_IMPORT_LIB) + '\n'
1252
+ if self.LIB_INITIALIZATION:
1253
+ result += f'\n{self.LIB_INITIALIZATION}'
1254
+ self.names = {}
1239
1255
  for table in values:
1240
1256
  table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
1241
1257
  alias, table = SQLObject.split_alias(table)
1242
- result += "\ndf_{table} = pd.{func}('{table}.{ext}')".format(
1243
- table=table, func=self.file_extension.value, ext=self.file_extension.name.lower()
1258
+ result += "\ndf_{table} = {prefix}{func}('{table}.{ext}')".format(
1259
+ prefix=self.PREFIX_LIBRARY, func=self.file_extension.value,
1260
+ table=table, ext=self.file_extension.name.lower()
1244
1261
  )
1245
- names[alias] = table
1262
+ self.names[alias] = table
1246
1263
  if join:
1247
- a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', join[-1]) if r]
1248
- result += "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
1249
- last_table, names[a1], names[a2], f1, f2, 'inner'
1250
- )
1264
+ result += self.merge_tables([
1265
+ r.strip() for r in re.split('[().=]', join[-1]) if r
1266
+ ], last_table)
1251
1267
  last_table = table
1252
1268
  _, table = SQLObject.split_alias(values[0])
1253
- result += f'\ndf = df_{table}\n\ndf = df\n'
1269
+ result += f'\ndf = df_{table}\n\ndf = df'
1254
1270
  return result
1271
+
1272
+ def split_condition_elements(self, expr: str) -> list:
1273
+ expr = self.remove_alias(expr)
1274
+ return [t for t in re.split(r'(\w+)', expr) if t.strip()]
1255
1275
 
1256
1276
  def extract_conditions(self, values: list) -> str:
1257
1277
  conditions = []
@@ -1261,8 +1281,7 @@ class PandasLanguage(DataAnalysisLanguage):
1261
1281
  3: '.str.contains(',
1262
1282
  }
1263
1283
  for expr in values:
1264
- expr = self.remove_alias(expr)
1265
- field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
1284
+ field, op, *const = self.split_condition_elements(expr)
1266
1285
  if op.upper() == 'LIKE' and len(const) == 3:
1267
1286
  level = 0
1268
1287
  if '%' in const[0]:
@@ -1319,6 +1338,73 @@ class PandasLanguage(DataAnalysisLanguage):
1319
1338
  return ''
1320
1339
 
1321
1340
 
1341
+ class SparkLanguage(PandasLanguage):
1342
+ HEADER_IMPORT_LIB = [
1343
+ 'from pyspark.sql import SparkSession',
1344
+ 'from pyspark.sql.functions import col, avg, sum, count'
1345
+ ]
1346
+ FIELD_LIST_FMT = '.select({}{})'
1347
+ PREFIX_LIBRARY = 'pyspark.pandas.'
1348
+
1349
+ def merge_tables(self, elements: list, main_table: str) -> str:
1350
+ a1, f1, a2, f2 = elements
1351
+ COMMAND_FMT = """{cr}
1352
+ df_{result} = df_{table1}.join(
1353
+ {indent}df_{table2},
1354
+ {indent}df_{table1}.{fk_field}{op}df_{table2}.{primary_key}{cr}
1355
+ )
1356
+ """
1357
+ return re.sub(r'\s+', '', COMMAND_FMT).format(
1358
+ result=main_table, cr=self.LINE_BREAK, indent=self.TABULATION,
1359
+ table1=self.names[a1], table2=self.names[a2],
1360
+ fk_field=f1, primary_key=f2, op=' == '
1361
+ )
1362
+
1363
+ def extract_conditions(self, values: list) -> str:
1364
+ conditions = []
1365
+ for expr in values:
1366
+ field, op, *const = self.split_condition_elements(expr)
1367
+ const = ''.join(const)
1368
+ if op.upper() == 'LIKE':
1369
+ line = f"\n\t( col('{field}').like({const}) )"
1370
+ else:
1371
+ line = f"\n\t( col('{field}') {op} {const} )"
1372
+ conditions.append(line)
1373
+ if not conditions:
1374
+ return ''
1375
+ return '.filter({}\n)'.format(
1376
+ '\n\t&'.join(conditions)
1377
+ )
1378
+
1379
+ def sort_by(self, values: list) -> str:
1380
+ if not values:
1381
+ return ''
1382
+ return '.orderBy({}{}{})'.format(
1383
+ self.TABULATION,
1384
+ self.clean_values(values),
1385
+ self.LINE_BREAK
1386
+ )
1387
+
1388
+ def set_group(self, values: list) -> str:
1389
+ result = '.groupBy({}{}{})'.format(
1390
+ self.TABULATION,
1391
+ self.clean_values(values),
1392
+ self.LINE_BREAK
1393
+ )
1394
+ if self.aggregation_fields:
1395
+ result += '.agg('
1396
+ for field in self.aggregation_fields:
1397
+ func, field, *alias = re.split(r'[()]|\s+as\s+|\s+AS\s+', field)
1398
+ result += "{}{}('{}')".format(
1399
+ self.TABULATION, func.lower(),
1400
+ field if field else '*'
1401
+ )
1402
+ if alias:
1403
+ result += f".alias('{alias[-1]}')"
1404
+ result += '\n)'
1405
+ return result
1406
+
1407
+
1322
1408
  class Parser:
1323
1409
  REGEX = {}
1324
1410
 
@@ -1366,7 +1452,7 @@ class SQLParser(Parser):
1366
1452
  def prepare(self):
1367
1453
  keywords = '|'.join(k + r'\b' for k in KEYWORD)
1368
1454
  flags = re.IGNORECASE + re.MULTILINE
1369
- self.REGEX['keywords'] = re.compile(f'({keywords}|[*])', flags)
1455
+ self.REGEX['keywords'] = re.compile(f'({keywords})', flags)
1370
1456
  self.REGEX['subquery'] = re.compile(r'(\w\.)*\w+ +in +\(SELECT.*?\)', flags)
1371
1457
 
1372
1458
  def eval(self, txt: str):
@@ -2116,4 +2202,3 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
2116
2202
  result += query
2117
2203
  return result
2118
2204
  # ===========================================================================================//
2119
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sql_blocks
3
- Version: 1.25.6109999999999
3
+ Version: 1.25.6139999999999
4
4
  Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
5
5
  Home-page: https://github.com/julio-cascalles/sql_blocks
6
6
  Author: Júlio Cascalles
@@ -0,0 +1,7 @@
1
+ sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
+ sql_blocks/sql_blocks.py,sha256=qDwpII5rYY8tqpp93bKKHW9udOqI3SRb-0wfxjAsYD4,74694
3
+ sql_blocks-1.25.6139999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
+ sql_blocks-1.25.6139999999999.dist-info/METADATA,sha256=Pmk2nqJ3hV7PZVGE8V0bqw8TnSa1_rVIiylJad-nDCk,22236
5
+ sql_blocks-1.25.6139999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ sql_blocks-1.25.6139999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
+ sql_blocks-1.25.6139999999999.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
- sql_blocks/sql_blocks.py,sha256=09o87wu2xc82AMMyV-OBiYmv0d_kfL_DIH6G--3-DIA,71615
3
- sql_blocks-1.25.6109999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
- sql_blocks-1.25.6109999999999.dist-info/METADATA,sha256=KpFEm1tvvHvoURZ3kV1VjvBvSIEGGAWuskacBXg0Xp4,22236
5
- sql_blocks-1.25.6109999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- sql_blocks-1.25.6109999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
- sql_blocks-1.25.6109999999999.dist-info/RECORD,,