PyPI - sql-blocks - Versions diffs - 1.25.6109999999999__py3-none-any.whl → 1.2025.625__py3-none-any.whl - Mend

sql-blocks 1.25.6109999999999py3-none-any.whl → 1.2025.625py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

sql_blocks/sql_blocks.py CHANGED Viewed

@@ -81,7 +81,9 @@ class SQLObject:
     @staticmethod
     def get_separator(key: str) -> str:
-        appendix = {WHERE: r'\s+and\s+|', FROM: r'\s+join\s+|\s+JOIN\s+'}
+        if key == WHERE:
+            return r'\s+and\s+|\s+AND\s+'
+        appendix = {FROM: r'\s+join\s+|\s+JOIN\s+'}
         return KEYWORD[key][0].format(appendix.get(key, ''))
     @staticmethod
@@ -92,6 +94,7 @@ class SQLObject:
     def split_fields(cls, text: str, key: str) -> list:
         if key == SELECT and cls.contains_CASE_statement(text):
             return Case.parse(text)
+        text = re.sub(r'\s+', ' ', text)
         separator = cls.get_separator(key)
         return re.split(separator, text)
@@ -844,8 +847,15 @@ class Rows:
         )
+class DescOrderBy:
+    @classmethod
+    def add(cls, name: str, main: SQLObject):
+        name = Clause.format(name, main)
+        main.values.setdefault(ORDER_BY, []).append(name + SortType.DESC.value)
 class OrderBy(Clause):
     sort: SortType = SortType.ASC
+    DESC = DescOrderBy
     @classmethod
     def add(cls, name: str, main: SQLObject):
@@ -1220,6 +1230,10 @@ class PandasLanguage(DataAnalysisLanguage):
     pattern = '{_from}{where}{select}{group_by}{order_by}'
     has_default = {key: False for key in KEYWORD}
     file_extension = FileExtension.CSV
+    HEADER_IMPORT_LIB  = ['import pandas as pd']
+    LIB_INITIALIZATION = ''
+    FIELD_LIST_FMT = '[[{}{}]]'
+    PREFIX_LIBRARY = 'pd.'
     def add_field(self, values: list) -> str:
         def line_field_fmt(field: str) -> str:
@@ -1228,30 +1242,43 @@ class PandasLanguage(DataAnalysisLanguage):
             )
         common_fields = self.split_agg_fields(values)
         if common_fields:
-            return '[[{}\n]]'.format(
-                ','.join(line_field_fmt(fld) for fld in common_fields)
+            return self.FIELD_LIST_FMT.format(
+                ','.join(line_field_fmt(fld) for fld in common_fields),
+                self.LINE_BREAK
             )
         return ''
+    def merge_tables(self, elements: list, main_table: str) -> str:
+        a1, f1, a2, f2 = elements
+        return "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
+            main_table, self.names[a1], self.names[a2], f1, f2, 'inner'
+        )
     def get_tables(self, values: list) -> str:
-        result = 'import pandas as pd'
-        names = {}
+        result = '\n'.join(self.HEADER_IMPORT_LIB) + '\n'
+        if self.LIB_INITIALIZATION:
+            result += f'\n{self.LIB_INITIALIZATION}'
+        self.names = {}
         for table in values:
             table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
             alias, table = SQLObject.split_alias(table)
-            result += "\ndf_{table} = pd.{func}('{table}.{ext}')".format(
-                table=table, func=self.file_extension.value, ext=self.file_extension.name.lower()
+            result += "\ndf_{table} = {prefix}{func}('{table}.{ext}')".format(
+                prefix=self.PREFIX_LIBRARY, func=self.file_extension.value,
+                table=table, ext=self.file_extension.name.lower()
             )
-            names[alias] = table
+            self.names[alias] = table
             if join:
-                a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', join[-1]) if r]
-                result += "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
-                    last_table, names[a1], names[a2], f1, f2, 'inner'
-                )
+                result += self.merge_tables([
+                    r.strip() for r in re.split('[().=]', join[-1]) if r
+                ], last_table)
             last_table = table
         _, table = SQLObject.split_alias(values[0])
-        result += f'\ndf = df_{table}\n\ndf = df\n'
+        result += f'\ndf = df_{table}\n\ndf = df'
         return result
+    def split_condition_elements(self, expr: str) -> list:
+        expr = self.remove_alias(expr)
+        return [t for t in re.split(r'(\w+)', expr) if t.strip()]
     def extract_conditions(self, values: list) -> str:
         conditions = []
@@ -1261,8 +1288,7 @@ class PandasLanguage(DataAnalysisLanguage):
             3: '.str.contains(',
         }
         for expr in values:
-            expr = self.remove_alias(expr)
-            field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
+            field, op, *const = self.split_condition_elements(expr)
             if op.upper() == 'LIKE' and len(const) == 3:
                 level = 0
                 if '%' in const[0]:
@@ -1319,6 +1345,73 @@ class PandasLanguage(DataAnalysisLanguage):
         return ''
+class SparkLanguage(PandasLanguage):
+    HEADER_IMPORT_LIB = [
+        'from pyspark.sql import SparkSession',
+        'from pyspark.sql.functions import col, avg, sum, count'
+    ]
+    FIELD_LIST_FMT = '.select({}{})'
+    PREFIX_LIBRARY = 'pyspark.pandas.'
+    def merge_tables(self, elements: list, main_table: str) -> str:
+        a1, f1, a2, f2 = elements
+        COMMAND_FMT = """{cr}
+        df_{result} = df_{table1}.join(
+            {indent}df_{table2},
+            {indent}df_{table1}.{fk_field}{op}df_{table2}.{primary_key}{cr}
+        )
+        """
+        return re.sub(r'\s+', '', COMMAND_FMT).format(
+            result=main_table, cr=self.LINE_BREAK, indent=self.TABULATION,
+            table1=self.names[a1], table2=self.names[a2],
+            fk_field=f1, primary_key=f2, op=' == '
+        )
+    def extract_conditions(self, values: list) -> str:
+        conditions = []
+        for expr in values:
+            field, op, *const = self.split_condition_elements(expr)
+            const = ''.join(const)
+            if op.upper() == 'LIKE':
+                line = f"\n\t( col('{field}').like({const}) )"
+            else:
+                line = f"\n\t( col('{field}') {op} {const} )"
+            conditions.append(line)
+        if not conditions:
+            return ''
+        return '.filter({}\n)'.format(
+            '\n\t&'.join(conditions)
+        )
+    def sort_by(self, values: list) -> str:
+        if not values:
+            return ''
+        return '.orderBy({}{}{})'.format(
+            self.TABULATION,
+            self.clean_values(values),
+            self.LINE_BREAK
+        )
+    def set_group(self, values: list) -> str:
+        result = '.groupBy({}{}{})'.format(
+            self.TABULATION,
+            self.clean_values(values),
+            self.LINE_BREAK
+        )
+        if self.aggregation_fields:
+            result += '.agg('
+            for field in self.aggregation_fields:
+                func, field, *alias = re.split(r'[()]|\s+as\s+|\s+AS\s+', field)
+                result += "{}{}('{}')".format(
+                    self.TABULATION, func.lower(),
+                    field if field else '*'
+                )
+                if alias:
+                    result += f".alias('{alias[-1]}')"
+            result += '\n)'
+        return result
 class Parser:
     REGEX = {}
@@ -1366,7 +1459,7 @@ class SQLParser(Parser):
     def prepare(self):
         keywords = '|'.join(k + r'\b' for k in KEYWORD)
         flags = re.IGNORECASE + re.MULTILINE
-        self.REGEX['keywords'] = re.compile(f'({keywords}|[*])', flags)
+        self.REGEX['keywords'] = re.compile(f'({keywords})', flags)
         self.REGEX['subquery'] = re.compile(r'(\w\.)*\w+ +in +\(SELECT.*?\)', flags)
     def eval(self, txt: str):
@@ -1887,13 +1980,12 @@ class NotSelectIN(SelectIN):
 class CTE(Select):
     prefix = ''
+    show_query = True
-    def __init__(self, table_name: str, query_list: list[Select]):
+    def __init__(self, table_name: str, query_list: list[Select]=[]):
         super().__init__(table_name)
-        for query in query_list:
-            query.break_lines = False
         self.query_list = query_list
-        self.break_lines = False
+        self.break_lines = False
     def __str__(self) -> str:
         size = 0
@@ -1903,6 +1995,7 @@ class CTE(Select):
             self.break_lines = True
         # ---------------------------------------------------------
         def justify(query: Select) -> str:
+            query.break_lines = False
             result, line = [], ''
             keywords = '|'.join(KEYWORD)
             for word in re.split(fr'({keywords}|AND|OR|,)', str(query)):
@@ -1918,7 +2011,7 @@ class CTE(Select):
             self.prefix, self.table_name,
             '\nUNION ALL\n    '.join(
                 justify(q) for q in self.query_list
-            ), super().__str__()
+            ), super().__str__() if self.show_query else ''
         )
     def join(self, pattern: str, fields: list | str, format: str=''):
@@ -1972,6 +2065,56 @@ class Recursive(CTE):
         return self
+MAIN_TAG = '__main__'
+class CTEFactory:
+    def __init__(self, txt: str):
+        """
+        Syntax:
+        ---
+        **SELECT ...
+        FROM** ( `sub_query1` ) **AS** `alias_1`
+        JOIN ( `sub_query2` ) **AS** `alias_2` **ON** `__join__`
+        """
+        summary = self.extract_subqueries(txt)
+        self.main = detect( summary.pop(MAIN_TAG) )
+        self.cte_list = [
+            CTE(alias, [
+                Select.parse(query)[0]
+            ])
+            for alias, query in summary.items()
+        ]
+    def __str__(self):
+        CTE.show_query = False
+        lines = [str(cte) for cte in self.cte_list]
+        return ',\n'.join(lines) + '\n' + str(self.main)
+    @staticmethod
+    def extract_subqueries(txt: str) -> dict:
+        result = {}
+        for found in re.finditer(r'(FROM|JOIN)\s*[(]\s*SELECT', txt, re.IGNORECASE):
+            start = found.start()
+            alias = ''
+            pos = start
+            while not alias:
+                found = re.search(r'[)]\s*AS\s+\w+', txt[pos:], re.IGNORECASE)
+                if not found:
+                    break
+                end = found.end() + pos
+                elements = txt[start: end].split()
+                if '(' not in elements[-3]:
+                    _, alias = elements[-2:]
+                pos = end
+            first_word = elements.pop(0)
+            if not result:
+                result[MAIN_TAG] = txt[:start]
+            result[MAIN_TAG] += f' {first_word} {alias} {alias}'
+            result[alias] = ' '.join(elements[1: -3])
+        result[MAIN_TAG] += txt[end:]
+        return result
 # ----- Rules -----
 class RulePutLimit(Rule):
@@ -2116,4 +2259,3 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
         result += query
     return result
 # ===========================================================================================//

{sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.2025.625.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sql_blocks
-Version: 1.25.6109999999999
+Version: 1.2025.625
 Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
 Home-page: https://github.com/julio-cascalles/sql_blocks
 Author: Júlio Cascalles
@@ -888,3 +888,49 @@ R2 = Recursive.create(
 >> Note: Comments added later.
 ---
+### CTEFactory class
+CTEFactory exchanges subqueries for CTEs, simply by passing the text of the "dirty" query:
+*Example*:
+```
+print(
+        CTEFactory("""
+            SELECT u001.name, agg_sales.total
+            FROM (
+                SELECT * FROM Users u
+                WHERE u.status = 'active'
+            ) AS u001
+            JOIN (
+                SELECT s.user_id, Sum(s.value) as total
+                FROM Sales s
+                GROUP BY s.user_id
+            )
+            As agg_sales
+            ON u001.id = agg_sales.user_id
+            ORDER BY u001.name
+        """)
+)
+```
+results...
+```
+    WITH u001 AS (
+        SELECT * FROM Users u
+        WHERE u.status = 'active'
+    ),
+    WITH agg_sales AS (
+        SELECT s.user_id, Sum(s.value) as total
+        FROM Sales s
+        GROUP BY s.user_id
+    )
+    SELECT
+            u001.name,
+            agg_sales.total
+    FROM
+            u001 u001
+            JOIN agg_sales agg_sales ON
+            (u001.id = agg_sales.user_id)
+    ORDER BY
+            u001.name
+```
+---

sql_blocks-1.2025.625.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
+sql_blocks/sql_blocks.py,sha256=J7zEJ5JNRxI3F-7TIypb0myb5OgdE5Stv4boZCTVBLM,76610
+sql_blocks-1.2025.625.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
+sql_blocks-1.2025.625.dist-info/METADATA,sha256=I6KTi5S_usvCjJEQl6lm8LG4DaxHhX0NlGRwhHpKBT8,23328
+sql_blocks-1.2025.625.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+sql_blocks-1.2025.625.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
+sql_blocks-1.2025.625.dist-info/RECORD,,

sql_blocks-1.25.6109999999999.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
-sql_blocks/sql_blocks.py,sha256=09o87wu2xc82AMMyV-OBiYmv0d_kfL_DIH6G--3-DIA,71615
-sql_blocks-1.25.6109999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
-sql_blocks-1.25.6109999999999.dist-info/METADATA,sha256=KpFEm1tvvHvoURZ3kV1VjvBvSIEGGAWuskacBXg0Xp4,22236
-sql_blocks-1.25.6109999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-sql_blocks-1.25.6109999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
-sql_blocks-1.25.6109999999999.dist-info/RECORD,,

{sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.2025.625.dist-info}/LICENSE RENAMED Viewed

File without changes

{sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.2025.625.dist-info}/WHEEL RENAMED Viewed

File without changes

{sql_blocks-1.25.6109999999999.dist-info → sql_blocks-1.2025.625.dist-info}/top_level.txt RENAMED Viewed

File without changes

sql-blocks 1.25.6109999999999__py3-none-any.whl → 1.2025.625__py3-none-any.whl

sql-blocks 1.25.6109999999999py3-none-any.whl → 1.2025.625py3-none-any.whl