sql-blocks 1.25.6109999999999__py3-none-any.whl → 1.2025.625__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sql_blocks/sql_blocks.py CHANGED
@@ -81,7 +81,9 @@ class SQLObject:
81
81
 
82
82
  @staticmethod
83
83
  def get_separator(key: str) -> str:
84
- appendix = {WHERE: r'\s+and\s+|', FROM: r'\s+join\s+|\s+JOIN\s+'}
84
+ if key == WHERE:
85
+ return r'\s+and\s+|\s+AND\s+'
86
+ appendix = {FROM: r'\s+join\s+|\s+JOIN\s+'}
85
87
  return KEYWORD[key][0].format(appendix.get(key, ''))
86
88
 
87
89
  @staticmethod
@@ -92,6 +94,7 @@ class SQLObject:
92
94
  def split_fields(cls, text: str, key: str) -> list:
93
95
  if key == SELECT and cls.contains_CASE_statement(text):
94
96
  return Case.parse(text)
97
+ text = re.sub(r'\s+', ' ', text)
95
98
  separator = cls.get_separator(key)
96
99
  return re.split(separator, text)
97
100
 
@@ -844,8 +847,15 @@ class Rows:
844
847
  )
845
848
 
846
849
 
850
+ class DescOrderBy:
851
+ @classmethod
852
+ def add(cls, name: str, main: SQLObject):
853
+ name = Clause.format(name, main)
854
+ main.values.setdefault(ORDER_BY, []).append(name + SortType.DESC.value)
855
+
847
856
  class OrderBy(Clause):
848
857
  sort: SortType = SortType.ASC
858
+ DESC = DescOrderBy
849
859
 
850
860
  @classmethod
851
861
  def add(cls, name: str, main: SQLObject):
@@ -1220,6 +1230,10 @@ class PandasLanguage(DataAnalysisLanguage):
1220
1230
  pattern = '{_from}{where}{select}{group_by}{order_by}'
1221
1231
  has_default = {key: False for key in KEYWORD}
1222
1232
  file_extension = FileExtension.CSV
1233
+ HEADER_IMPORT_LIB = ['import pandas as pd']
1234
+ LIB_INITIALIZATION = ''
1235
+ FIELD_LIST_FMT = '[[{}{}]]'
1236
+ PREFIX_LIBRARY = 'pd.'
1223
1237
 
1224
1238
  def add_field(self, values: list) -> str:
1225
1239
  def line_field_fmt(field: str) -> str:
@@ -1228,30 +1242,43 @@ class PandasLanguage(DataAnalysisLanguage):
1228
1242
  )
1229
1243
  common_fields = self.split_agg_fields(values)
1230
1244
  if common_fields:
1231
- return '[[{}\n]]'.format(
1232
- ','.join(line_field_fmt(fld) for fld in common_fields)
1245
+ return self.FIELD_LIST_FMT.format(
1246
+ ','.join(line_field_fmt(fld) for fld in common_fields),
1247
+ self.LINE_BREAK
1233
1248
  )
1234
1249
  return ''
1235
1250
 
1251
+ def merge_tables(self, elements: list, main_table: str) -> str:
1252
+ a1, f1, a2, f2 = elements
1253
+ return "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
1254
+ main_table, self.names[a1], self.names[a2], f1, f2, 'inner'
1255
+ )
1256
+
1236
1257
  def get_tables(self, values: list) -> str:
1237
- result = 'import pandas as pd'
1238
- names = {}
1258
+ result = '\n'.join(self.HEADER_IMPORT_LIB) + '\n'
1259
+ if self.LIB_INITIALIZATION:
1260
+ result += f'\n{self.LIB_INITIALIZATION}'
1261
+ self.names = {}
1239
1262
  for table in values:
1240
1263
  table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
1241
1264
  alias, table = SQLObject.split_alias(table)
1242
- result += "\ndf_{table} = pd.{func}('{table}.{ext}')".format(
1243
- table=table, func=self.file_extension.value, ext=self.file_extension.name.lower()
1265
+ result += "\ndf_{table} = {prefix}{func}('{table}.{ext}')".format(
1266
+ prefix=self.PREFIX_LIBRARY, func=self.file_extension.value,
1267
+ table=table, ext=self.file_extension.name.lower()
1244
1268
  )
1245
- names[alias] = table
1269
+ self.names[alias] = table
1246
1270
  if join:
1247
- a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', join[-1]) if r]
1248
- result += "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
1249
- last_table, names[a1], names[a2], f1, f2, 'inner'
1250
- )
1271
+ result += self.merge_tables([
1272
+ r.strip() for r in re.split('[().=]', join[-1]) if r
1273
+ ], last_table)
1251
1274
  last_table = table
1252
1275
  _, table = SQLObject.split_alias(values[0])
1253
- result += f'\ndf = df_{table}\n\ndf = df\n'
1276
+ result += f'\ndf = df_{table}\n\ndf = df'
1254
1277
  return result
1278
+
1279
+ def split_condition_elements(self, expr: str) -> list:
1280
+ expr = self.remove_alias(expr)
1281
+ return [t for t in re.split(r'(\w+)', expr) if t.strip()]
1255
1282
 
1256
1283
  def extract_conditions(self, values: list) -> str:
1257
1284
  conditions = []
@@ -1261,8 +1288,7 @@ class PandasLanguage(DataAnalysisLanguage):
1261
1288
  3: '.str.contains(',
1262
1289
  }
1263
1290
  for expr in values:
1264
- expr = self.remove_alias(expr)
1265
- field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
1291
+ field, op, *const = self.split_condition_elements(expr)
1266
1292
  if op.upper() == 'LIKE' and len(const) == 3:
1267
1293
  level = 0
1268
1294
  if '%' in const[0]:
@@ -1319,6 +1345,73 @@ class PandasLanguage(DataAnalysisLanguage):
1319
1345
  return ''
1320
1346
 
1321
1347
 
1348
+ class SparkLanguage(PandasLanguage):
1349
+ HEADER_IMPORT_LIB = [
1350
+ 'from pyspark.sql import SparkSession',
1351
+ 'from pyspark.sql.functions import col, avg, sum, count'
1352
+ ]
1353
+ FIELD_LIST_FMT = '.select({}{})'
1354
+ PREFIX_LIBRARY = 'pyspark.pandas.'
1355
+
1356
+ def merge_tables(self, elements: list, main_table: str) -> str:
1357
+ a1, f1, a2, f2 = elements
1358
+ COMMAND_FMT = """{cr}
1359
+ df_{result} = df_{table1}.join(
1360
+ {indent}df_{table2},
1361
+ {indent}df_{table1}.{fk_field}{op}df_{table2}.{primary_key}{cr}
1362
+ )
1363
+ """
1364
+ return re.sub(r'\s+', '', COMMAND_FMT).format(
1365
+ result=main_table, cr=self.LINE_BREAK, indent=self.TABULATION,
1366
+ table1=self.names[a1], table2=self.names[a2],
1367
+ fk_field=f1, primary_key=f2, op=' == '
1368
+ )
1369
+
1370
+ def extract_conditions(self, values: list) -> str:
1371
+ conditions = []
1372
+ for expr in values:
1373
+ field, op, *const = self.split_condition_elements(expr)
1374
+ const = ''.join(const)
1375
+ if op.upper() == 'LIKE':
1376
+ line = f"\n\t( col('{field}').like({const}) )"
1377
+ else:
1378
+ line = f"\n\t( col('{field}') {op} {const} )"
1379
+ conditions.append(line)
1380
+ if not conditions:
1381
+ return ''
1382
+ return '.filter({}\n)'.format(
1383
+ '\n\t&'.join(conditions)
1384
+ )
1385
+
1386
+ def sort_by(self, values: list) -> str:
1387
+ if not values:
1388
+ return ''
1389
+ return '.orderBy({}{}{})'.format(
1390
+ self.TABULATION,
1391
+ self.clean_values(values),
1392
+ self.LINE_BREAK
1393
+ )
1394
+
1395
+ def set_group(self, values: list) -> str:
1396
+ result = '.groupBy({}{}{})'.format(
1397
+ self.TABULATION,
1398
+ self.clean_values(values),
1399
+ self.LINE_BREAK
1400
+ )
1401
+ if self.aggregation_fields:
1402
+ result += '.agg('
1403
+ for field in self.aggregation_fields:
1404
+ func, field, *alias = re.split(r'[()]|\s+as\s+|\s+AS\s+', field)
1405
+ result += "{}{}('{}')".format(
1406
+ self.TABULATION, func.lower(),
1407
+ field if field else '*'
1408
+ )
1409
+ if alias:
1410
+ result += f".alias('{alias[-1]}')"
1411
+ result += '\n)'
1412
+ return result
1413
+
1414
+
1322
1415
  class Parser:
1323
1416
  REGEX = {}
1324
1417
 
@@ -1366,7 +1459,7 @@ class SQLParser(Parser):
1366
1459
  def prepare(self):
1367
1460
  keywords = '|'.join(k + r'\b' for k in KEYWORD)
1368
1461
  flags = re.IGNORECASE + re.MULTILINE
1369
- self.REGEX['keywords'] = re.compile(f'({keywords}|[*])', flags)
1462
+ self.REGEX['keywords'] = re.compile(f'({keywords})', flags)
1370
1463
  self.REGEX['subquery'] = re.compile(r'(\w\.)*\w+ +in +\(SELECT.*?\)', flags)
1371
1464
 
1372
1465
  def eval(self, txt: str):
@@ -1887,13 +1980,12 @@ class NotSelectIN(SelectIN):
1887
1980
 
1888
1981
  class CTE(Select):
1889
1982
  prefix = ''
1983
+ show_query = True
1890
1984
 
1891
- def __init__(self, table_name: str, query_list: list[Select]):
1985
+ def __init__(self, table_name: str, query_list: list[Select]=[]):
1892
1986
  super().__init__(table_name)
1893
- for query in query_list:
1894
- query.break_lines = False
1895
1987
  self.query_list = query_list
1896
- self.break_lines = False
1988
+ self.break_lines = False
1897
1989
 
1898
1990
  def __str__(self) -> str:
1899
1991
  size = 0
@@ -1903,6 +1995,7 @@ class CTE(Select):
1903
1995
  self.break_lines = True
1904
1996
  # ---------------------------------------------------------
1905
1997
  def justify(query: Select) -> str:
1998
+ query.break_lines = False
1906
1999
  result, line = [], ''
1907
2000
  keywords = '|'.join(KEYWORD)
1908
2001
  for word in re.split(fr'({keywords}|AND|OR|,)', str(query)):
@@ -1918,7 +2011,7 @@ class CTE(Select):
1918
2011
  self.prefix, self.table_name,
1919
2012
  '\nUNION ALL\n '.join(
1920
2013
  justify(q) for q in self.query_list
1921
- ), super().__str__()
2014
+ ), super().__str__() if self.show_query else ''
1922
2015
  )
1923
2016
 
1924
2017
  def join(self, pattern: str, fields: list | str, format: str=''):
@@ -1972,6 +2065,56 @@ class Recursive(CTE):
1972
2065
  return self
1973
2066
 
1974
2067
 
2068
+ MAIN_TAG = '__main__'
2069
+
2070
+ class CTEFactory:
2071
+ def __init__(self, txt: str):
2072
+ """
2073
+ Syntax:
2074
+ ---
2075
+ **SELECT ...
2076
+ FROM** ( `sub_query1` ) **AS** `alias_1`
2077
+ JOIN ( `sub_query2` ) **AS** `alias_2` **ON** `__join__`
2078
+ """
2079
+ summary = self.extract_subqueries(txt)
2080
+ self.main = detect( summary.pop(MAIN_TAG) )
2081
+ self.cte_list = [
2082
+ CTE(alias, [
2083
+ Select.parse(query)[0]
2084
+ ])
2085
+ for alias, query in summary.items()
2086
+ ]
2087
+
2088
+ def __str__(self):
2089
+ CTE.show_query = False
2090
+ lines = [str(cte) for cte in self.cte_list]
2091
+ return ',\n'.join(lines) + '\n' + str(self.main)
2092
+
2093
+ @staticmethod
2094
+ def extract_subqueries(txt: str) -> dict:
2095
+ result = {}
2096
+ for found in re.finditer(r'(FROM|JOIN)\s*[(]\s*SELECT', txt, re.IGNORECASE):
2097
+ start = found.start()
2098
+ alias = ''
2099
+ pos = start
2100
+ while not alias:
2101
+ found = re.search(r'[)]\s*AS\s+\w+', txt[pos:], re.IGNORECASE)
2102
+ if not found:
2103
+ break
2104
+ end = found.end() + pos
2105
+ elements = txt[start: end].split()
2106
+ if '(' not in elements[-3]:
2107
+ _, alias = elements[-2:]
2108
+ pos = end
2109
+ first_word = elements.pop(0)
2110
+ if not result:
2111
+ result[MAIN_TAG] = txt[:start]
2112
+ result[MAIN_TAG] += f' {first_word} {alias} {alias}'
2113
+ result[alias] = ' '.join(elements[1: -3])
2114
+ result[MAIN_TAG] += txt[end:]
2115
+ return result
2116
+
2117
+
1975
2118
  # ----- Rules -----
1976
2119
 
1977
2120
  class RulePutLimit(Rule):
@@ -2116,4 +2259,3 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
2116
2259
  result += query
2117
2260
  return result
2118
2261
  # ===========================================================================================//
2119
-
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sql_blocks
3
- Version: 1.25.6109999999999
3
+ Version: 1.2025.625
4
4
  Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
5
5
  Home-page: https://github.com/julio-cascalles/sql_blocks
6
6
  Author: Júlio Cascalles
@@ -888,3 +888,49 @@ R2 = Recursive.create(
888
888
 
889
889
  >> Note: Comments added later.
890
890
  ---
891
+
892
+ ### CTEFactory class
893
+ CTEFactory exchanges subqueries for CTEs, simply by passing the text of the "dirty" query:
894
+
895
+ *Example*:
896
+ ```
897
+ print(
898
+ CTEFactory("""
899
+ SELECT u001.name, agg_sales.total
900
+ FROM (
901
+ SELECT * FROM Users u
902
+ WHERE u.status = 'active'
903
+ ) AS u001
904
+ JOIN (
905
+ SELECT s.user_id, Sum(s.value) as total
906
+ FROM Sales s
907
+ GROUP BY s.user_id
908
+ )
909
+ As agg_sales
910
+ ON u001.id = agg_sales.user_id
911
+ ORDER BY u001.name
912
+ """)
913
+ )
914
+ ```
915
+ results...
916
+ ```
917
+ WITH u001 AS (
918
+ SELECT * FROM Users u
919
+ WHERE u.status = 'active'
920
+ ),
921
+ WITH agg_sales AS (
922
+ SELECT s.user_id, Sum(s.value) as total
923
+ FROM Sales s
924
+ GROUP BY s.user_id
925
+ )
926
+ SELECT
927
+ u001.name,
928
+ agg_sales.total
929
+ FROM
930
+ u001 u001
931
+ JOIN agg_sales agg_sales ON
932
+ (u001.id = agg_sales.user_id)
933
+ ORDER BY
934
+ u001.name
935
+ ```
936
+ ---
@@ -0,0 +1,7 @@
1
+ sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
+ sql_blocks/sql_blocks.py,sha256=J7zEJ5JNRxI3F-7TIypb0myb5OgdE5Stv4boZCTVBLM,76610
3
+ sql_blocks-1.2025.625.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
+ sql_blocks-1.2025.625.dist-info/METADATA,sha256=I6KTi5S_usvCjJEQl6lm8LG4DaxHhX0NlGRwhHpKBT8,23328
5
+ sql_blocks-1.2025.625.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ sql_blocks-1.2025.625.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
+ sql_blocks-1.2025.625.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
- sql_blocks/sql_blocks.py,sha256=09o87wu2xc82AMMyV-OBiYmv0d_kfL_DIH6G--3-DIA,71615
3
- sql_blocks-1.25.6109999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
- sql_blocks-1.25.6109999999999.dist-info/METADATA,sha256=KpFEm1tvvHvoURZ3kV1VjvBvSIEGGAWuskacBXg0Xp4,22236
5
- sql_blocks-1.25.6109999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- sql_blocks-1.25.6109999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
- sql_blocks-1.25.6109999999999.dist-info/RECORD,,