sql-blocks 1.25.610999999999__py3-none-any.whl → 1.25.6139999999999__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sql_blocks/sql_blocks.py CHANGED
@@ -81,9 +81,23 @@ class SQLObject:
81
81
 
82
82
  @staticmethod
83
83
  def get_separator(key: str) -> str:
84
- appendix = {WHERE: r'\s+and\s+|', FROM: r'\s+join\s+|\s+JOIN\s+'}
84
+ if key == WHERE:
85
+ return r'\s+and\s+|\s+AND\s+'
86
+ appendix = {FROM: r'\s+join\s+|\s+JOIN\s+'}
85
87
  return KEYWORD[key][0].format(appendix.get(key, ''))
86
88
 
89
+ @staticmethod
90
+ def contains_CASE_statement(text: str) -> bool:
91
+ return re.search(r'\bCASE\b', text, re.IGNORECASE)
92
+
93
+ @classmethod
94
+ def split_fields(cls, text: str, key: str) -> list:
95
+ if key == SELECT and cls.contains_CASE_statement(text):
96
+ return Case.parse(text)
97
+ text = re.sub(r'\s+', ' ', text)
98
+ separator = cls.get_separator(key)
99
+ return re.split(separator, text)
100
+
87
101
  @staticmethod
88
102
  def is_named_field(fld: str, name: str='') -> bool:
89
103
  return re.search(fr'(\s+as\s+|\s+AS\s+){name}', fld)
@@ -103,16 +117,13 @@ class SQLObject:
103
117
  result += re.split(r'([=()]|<>|\s+ON\s+|\s+on\s+)', fld)
104
118
  return result
105
119
  def cleanup(text: str) -> str:
106
- if re.search(r'^CASE\b', text):
120
+ # if re.search(r'^CASE\b', text):
121
+ if self.contains_CASE_statement(text):
107
122
  return text
108
123
  text = re.sub(r'[\n\t]', ' ', text)
109
124
  if exact:
110
125
  text = text.lower()
111
126
  return text.strip()
112
- def split_fields(text: str) -> list:
113
- if key == SELECT:
114
- return Case.parse(text)
115
- return re.split(separator, text)
116
127
  def field_set(source: list) -> set:
117
128
  return set(
118
129
  (
@@ -122,14 +133,13 @@ class SQLObject:
122
133
  re.sub(pattern, '', cleanup(fld))
123
134
  )
124
135
  for string in disassemble(source)
125
- for fld in split_fields(string)
136
+ for fld in self.split_fields(string, key)
126
137
  )
127
138
  pattern = KEYWORD[key][1]
128
139
  if exact:
129
140
  if key == WHERE:
130
141
  pattern = r'["\']| '
131
142
  pattern += f'|{PATTERN_PREFIX}'
132
- separator = self.get_separator(key)
133
143
  s1 = field_set(search_list)
134
144
  s2 = field_set(self.values.get(key, []))
135
145
  if exact:
@@ -727,7 +737,7 @@ class Case:
727
737
  result += block.fields
728
738
  block.fields = []
729
739
  elif word not in RESERVED_WORDS:
730
- result.append(word)
740
+ result.append(word.replace(',', ''))
731
741
  last_word = word
732
742
  return result
733
743
 
@@ -1213,6 +1223,10 @@ class PandasLanguage(DataAnalysisLanguage):
1213
1223
  pattern = '{_from}{where}{select}{group_by}{order_by}'
1214
1224
  has_default = {key: False for key in KEYWORD}
1215
1225
  file_extension = FileExtension.CSV
1226
+ HEADER_IMPORT_LIB = ['import pandas as pd']
1227
+ LIB_INITIALIZATION = ''
1228
+ FIELD_LIST_FMT = '[[{}{}]]'
1229
+ PREFIX_LIBRARY = 'pd.'
1216
1230
 
1217
1231
  def add_field(self, values: list) -> str:
1218
1232
  def line_field_fmt(field: str) -> str:
@@ -1221,30 +1235,43 @@ class PandasLanguage(DataAnalysisLanguage):
1221
1235
  )
1222
1236
  common_fields = self.split_agg_fields(values)
1223
1237
  if common_fields:
1224
- return '[[{}\n]]'.format(
1225
- ','.join(line_field_fmt(fld) for fld in common_fields)
1238
+ return self.FIELD_LIST_FMT.format(
1239
+ ','.join(line_field_fmt(fld) for fld in common_fields),
1240
+ self.LINE_BREAK
1226
1241
  )
1227
1242
  return ''
1228
1243
 
1244
+ def merge_tables(self, elements: list, main_table: str) -> str:
1245
+ a1, f1, a2, f2 = elements
1246
+ return "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
1247
+ main_table, self.names[a1], self.names[a2], f1, f2, 'inner'
1248
+ )
1249
+
1229
1250
  def get_tables(self, values: list) -> str:
1230
- result = 'import pandas as pd'
1231
- names = {}
1251
+ result = '\n'.join(self.HEADER_IMPORT_LIB) + '\n'
1252
+ if self.LIB_INITIALIZATION:
1253
+ result += f'\n{self.LIB_INITIALIZATION}'
1254
+ self.names = {}
1232
1255
  for table in values:
1233
1256
  table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
1234
1257
  alias, table = SQLObject.split_alias(table)
1235
- result += "\ndf_{table} = pd.{func}('{table}.{ext}')".format(
1236
- table=table, func=self.file_extension.value, ext=self.file_extension.name.lower()
1258
+ result += "\ndf_{table} = {prefix}{func}('{table}.{ext}')".format(
1259
+ prefix=self.PREFIX_LIBRARY, func=self.file_extension.value,
1260
+ table=table, ext=self.file_extension.name.lower()
1237
1261
  )
1238
- names[alias] = table
1262
+ self.names[alias] = table
1239
1263
  if join:
1240
- a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', join[-1]) if r]
1241
- result += "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
1242
- last_table, names[a1], names[a2], f1, f2, 'inner'
1243
- )
1264
+ result += self.merge_tables([
1265
+ r.strip() for r in re.split('[().=]', join[-1]) if r
1266
+ ], last_table)
1244
1267
  last_table = table
1245
1268
  _, table = SQLObject.split_alias(values[0])
1246
- result += f'\ndf = df_{table}\n\ndf = df\n'
1269
+ result += f'\ndf = df_{table}\n\ndf = df'
1247
1270
  return result
1271
+
1272
+ def split_condition_elements(self, expr: str) -> list:
1273
+ expr = self.remove_alias(expr)
1274
+ return [t for t in re.split(r'(\w+)', expr) if t.strip()]
1248
1275
 
1249
1276
  def extract_conditions(self, values: list) -> str:
1250
1277
  conditions = []
@@ -1254,8 +1281,7 @@ class PandasLanguage(DataAnalysisLanguage):
1254
1281
  3: '.str.contains(',
1255
1282
  }
1256
1283
  for expr in values:
1257
- expr = self.remove_alias(expr)
1258
- field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
1284
+ field, op, *const = self.split_condition_elements(expr)
1259
1285
  if op.upper() == 'LIKE' and len(const) == 3:
1260
1286
  level = 0
1261
1287
  if '%' in const[0]:
@@ -1312,6 +1338,73 @@ class PandasLanguage(DataAnalysisLanguage):
1312
1338
  return ''
1313
1339
 
1314
1340
 
1341
+ class SparkLanguage(PandasLanguage):
1342
+ HEADER_IMPORT_LIB = [
1343
+ 'from pyspark.sql import SparkSession',
1344
+ 'from pyspark.sql.functions import col, avg, sum, count'
1345
+ ]
1346
+ FIELD_LIST_FMT = '.select({}{})'
1347
+ PREFIX_LIBRARY = 'pyspark.pandas.'
1348
+
1349
+ def merge_tables(self, elements: list, main_table: str) -> str:
1350
+ a1, f1, a2, f2 = elements
1351
+ COMMAND_FMT = """{cr}
1352
+ df_{result} = df_{table1}.join(
1353
+ {indent}df_{table2},
1354
+ {indent}df_{table1}.{fk_field}{op}df_{table2}.{primary_key}{cr}
1355
+ )
1356
+ """
1357
+ return re.sub(r'\s+', '', COMMAND_FMT).format(
1358
+ result=main_table, cr=self.LINE_BREAK, indent=self.TABULATION,
1359
+ table1=self.names[a1], table2=self.names[a2],
1360
+ fk_field=f1, primary_key=f2, op=' == '
1361
+ )
1362
+
1363
+ def extract_conditions(self, values: list) -> str:
1364
+ conditions = []
1365
+ for expr in values:
1366
+ field, op, *const = self.split_condition_elements(expr)
1367
+ const = ''.join(const)
1368
+ if op.upper() == 'LIKE':
1369
+ line = f"\n\t( col('{field}').like({const}) )"
1370
+ else:
1371
+ line = f"\n\t( col('{field}') {op} {const} )"
1372
+ conditions.append(line)
1373
+ if not conditions:
1374
+ return ''
1375
+ return '.filter({}\n)'.format(
1376
+ '\n\t&'.join(conditions)
1377
+ )
1378
+
1379
+ def sort_by(self, values: list) -> str:
1380
+ if not values:
1381
+ return ''
1382
+ return '.orderBy({}{}{})'.format(
1383
+ self.TABULATION,
1384
+ self.clean_values(values),
1385
+ self.LINE_BREAK
1386
+ )
1387
+
1388
+ def set_group(self, values: list) -> str:
1389
+ result = '.groupBy({}{}{})'.format(
1390
+ self.TABULATION,
1391
+ self.clean_values(values),
1392
+ self.LINE_BREAK
1393
+ )
1394
+ if self.aggregation_fields:
1395
+ result += '.agg('
1396
+ for field in self.aggregation_fields:
1397
+ func, field, *alias = re.split(r'[()]|\s+as\s+|\s+AS\s+', field)
1398
+ result += "{}{}('{}')".format(
1399
+ self.TABULATION, func.lower(),
1400
+ field if field else '*'
1401
+ )
1402
+ if alias:
1403
+ result += f".alias('{alias[-1]}')"
1404
+ result += '\n)'
1405
+ return result
1406
+
1407
+
1315
1408
  class Parser:
1316
1409
  REGEX = {}
1317
1410
 
@@ -1359,7 +1452,7 @@ class SQLParser(Parser):
1359
1452
  def prepare(self):
1360
1453
  keywords = '|'.join(k + r'\b' for k in KEYWORD)
1361
1454
  flags = re.IGNORECASE + re.MULTILINE
1362
- self.REGEX['keywords'] = re.compile(f'({keywords}|[*])', flags)
1455
+ self.REGEX['keywords'] = re.compile(f'({keywords})', flags)
1363
1456
  self.REGEX['subquery'] = re.compile(r'(\w\.)*\w+ +in +\(SELECT.*?\)', flags)
1364
1457
 
1365
1458
  def eval(self, txt: str):
@@ -1417,13 +1510,12 @@ class SQLParser(Parser):
1417
1510
  for key in USUAL_KEYS:
1418
1511
  if not key in values:
1419
1512
  continue
1420
- separator = self.class_type.get_separator(key)
1421
1513
  cls = {
1422
1514
  ORDER_BY: OrderBy, GROUP_BY: GroupBy
1423
1515
  }.get(key, Field)
1424
1516
  obj.values[key] = [
1425
1517
  cls.format(fld, obj)
1426
- for fld in re.split(separator, values[key])
1518
+ for fld in self.class_type.split_fields(values[key], key)
1427
1519
  if (fld != '*' and len(tables) == 1) or obj.match(fld, key)
1428
1520
  ]
1429
1521
  result[obj.alias] = obj
@@ -2110,5 +2202,3 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
2110
2202
  result += query
2111
2203
  return result
2112
2204
  # ===========================================================================================//
2113
-
2114
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sql_blocks
3
- Version: 1.25.610999999999
3
+ Version: 1.25.6139999999999
4
4
  Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
5
5
  Home-page: https://github.com/julio-cascalles/sql_blocks
6
6
  Author: Júlio Cascalles
@@ -0,0 +1,7 @@
1
+ sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
+ sql_blocks/sql_blocks.py,sha256=qDwpII5rYY8tqpp93bKKHW9udOqI3SRb-0wfxjAsYD4,74694
3
+ sql_blocks-1.25.6139999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
+ sql_blocks-1.25.6139999999999.dist-info/METADATA,sha256=Pmk2nqJ3hV7PZVGE8V0bqw8TnSa1_rVIiylJad-nDCk,22236
5
+ sql_blocks-1.25.6139999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ sql_blocks-1.25.6139999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
+ sql_blocks-1.25.6139999999999.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
- sql_blocks/sql_blocks.py,sha256=tdfGConHw2iosex_BSXAWxYTGufPyOVMSTyi3g6gqpM,71400
3
- sql_blocks-1.25.610999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
- sql_blocks-1.25.610999999999.dist-info/METADATA,sha256=09DIeKq_SinVNKt0OyYQQDOZ2_DDR6CmoP_n99t4ZXA,22235
5
- sql_blocks-1.25.610999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- sql_blocks-1.25.610999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
- sql_blocks-1.25.610999999999.dist-info/RECORD,,