PyPI - sql-blocks - Versions diffs - 1.25.51999999999__py3-none-any.whl → 1.25.516999999999__py3-none-any.whl - Mend

sql-blocks 1.25.51999999999py3-none-any.whl → 1.25.516999999999py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

sql_blocks/sql_blocks.py CHANGED Viewed

@@ -38,10 +38,8 @@ class SQLObject:
         self.key_field = ''
         self.set_table(table_name)
-    def set_table(self, table_name: str):
-        if not table_name:
-            return
-        cls = SQLObject
+    @classmethod
+    def split_alias(cls, table_name: str) -> tuple:
         is_file_name = any([
             '/' in table_name, '.' in table_name
         ])
@@ -49,16 +47,21 @@ class SQLObject:
         if is_file_name:
             ref = table_name.split('/')[-1].split('.')[0]
         if cls.ALIAS_FUNC:
-            self.__alias = cls.ALIAS_FUNC(ref)
+            return cls.ALIAS_FUNC(ref), table_name
         elif ' ' in table_name.strip():
-            table_name, self.__alias = table_name.split()
+            table_name, alias = table_name.split()
+            return alias, table_name
         elif '_' in ref:
-            self.__alias = ''.join(
+            return ''.join(
                 word[0].lower()
                 for word in ref.split('_')
-            )
-        else:
-            self.__alias = ref.lower()[:3]
+            ), table_name
+        return ref.lower()[:3], table_name
+    def set_table(self, table_name: str):
+        if not table_name:
+            return
+        self.__alias, table_name = self.split_alias(table_name)
         self.values.setdefault(FROM, []).append(f'{table_name} {self.alias}')
     @property
@@ -99,10 +102,11 @@ class SQLObject:
             for fld in source:
                 result += re.split(r'([=()]|<>|\s+ON\s+|\s+on\s+)', fld)
             return result
-        def cleanup(fld: str) -> str:
+        def cleanup(text: str) -> str:
+            text = re.sub(r'[\n\t]', ' ', text)
             if exact:
-                fld = fld.lower()
-            return fld.strip()
+                text = text.lower()
+            return text.strip()
         def field_set(source: list) -> set:
             return set(
                 (
@@ -597,10 +601,11 @@ class Where:
         main.values[FROM].append(f',{query.table_name} {query.alias}')
         for key in USUAL_KEYS:
             main.update_values(key, query.values.get(key, []))
-        main.values.setdefault(WHERE, []).append('({a1}.{f1} = {a2}.{f2})'.format(
-            a1=main.alias, f1=name,
-            a2=query.alias, f2=query.key_field
-        ))
+        if query.key_field:
+            main.values.setdefault(WHERE, []).append('({a1}.{f1} = {a2}.{f2})'.format(
+                a1=main.alias, f1=name,
+                a2=query.alias, f2=query.key_field
+            ))
     def add(self, name: str, main: SQLObject):
         func_type = FUNCTION_CLASS.get(name.lower())
@@ -667,16 +672,14 @@ class Options:
         self.__children: dict = values
     def add(self, logical_separator: str, main: SQLObject):
-        if logical_separator not in ('AND', 'OR'):
+        if logical_separator.upper() not in ('AND', 'OR'):
             raise ValueError('`logical_separator` must be AND or OR')
-        conditions: list[str] = []
+        temp = Select(f'{main.table_name} {main.alias}')
         child: Where
         for field, child in self.__children.items():
-            conditions.append(' {} {} '.format(
-                Field.format(field, main), child.content
-            ))
+            child.add(field, temp)
         main.values.setdefault(WHERE, []).append(
-            '(' + logical_separator.join(conditions) + ')'
+            '(' + f'\n\t{logical_separator} '.join(temp.values[WHERE]) + ')'
         )
@@ -777,6 +780,20 @@ class OrderBy(Clause):
         name = cls.format(name, main)
         main.values.setdefault(ORDER_BY, []).append(name+cls.sort.value)
+    @staticmethod
+    def ascending(value: str) -> bool:
+        if re.findall(r'\s+(DESC)\s*$', value):
+            return False
+        return True
+    @classmethod
+    def format(cls, name: str, main: SQLObject) -> str:
+        if cls.ascending(name):
+            cls.sort = SortType.ASC
+        else:
+            cls.sort = SortType.DESC
+        return super().format(name, main)
     @classmethod
     def cls_to_str(cls) -> str:
         return ORDER_BY
@@ -836,8 +853,16 @@ class QueryLanguage:
     has_default = {key: bool(key == SELECT) for key in KEYWORD}
     @staticmethod
-    def remove_alias(fld: str) -> str:
-        return ''.join(re.split(r'\w+[.]', fld))
+    def remove_alias(text: str) -> str:
+        value, sep = '', ''
+        text = re.sub('[\n\t]', ' ', text)
+        if ':' in text:
+            text, value = text.split(':', maxsplit=1)
+            sep = ':'
+        return '{}{}{}'.format(
+            ''.join(re.split(r'\w+[.]', text)),
+            sep, value.replace("'", '"')
+        )
     def join_with_tabs(self, values: list, sep: str='') -> str:
         sep = sep + self.TABULATION
@@ -855,6 +880,8 @@ class QueryLanguage:
         return  self.join_with_tabs(values, ' AND ')
     def sort_by(self, values: list) -> str:
+        if OrderBy.sort == SortType.DESC:
+            values[-1] += ' DESC'
         return self.join_with_tabs(values, ',')
     def set_group(self, values: list) -> str:
@@ -905,7 +932,8 @@ class MongoDBLanguage(QueryLanguage):
     LOGICAL_OP_TO_MONGO_FUNC = {
         '>': '$gt',  '>=': '$gte',
         '<': '$lt',  '<=': '$lte',
-        '=': '$eq',  '<>': '$ne',
+        '=': '$eq',  '<>': '$ne',
+        'like': '$regex', 'LIKE': '$regex',
     }
     OPERATORS = '|'.join(op for op in LOGICAL_OP_TO_MONGO_FUNC)
     REGEX = {
@@ -958,7 +986,7 @@ class MongoDBLanguage(QueryLanguage):
             field, *op, const = tokens
             op = ''.join(op)
             expr = '{begin}{op}:{const}{end}'.format(
-                begin='{', const=const, end='}',
+                begin='{', const=const.replace('%', '.*'), end='}',
                 op=cls.LOGICAL_OP_TO_MONGO_FUNC[op],
             )
             where_list.append(f'{field}:{expr}')
@@ -1067,6 +1095,149 @@ class Neo4JLanguage(QueryLanguage):
         return ''
+class DataAnalysisLanguage(QueryLanguage):
+    def __init__(self, target: 'Select'):
+        super().__init__(target)
+        self.aggregation_fields = []
+    def split_agg_fields(self, values: list) -> list:
+        AGG_FUNC_REGEX = re.compile(
+            r'({})[(]'.format(
+                '|'.join(cls.__name__ for cls in Aggregate.__subclasses__())
+            ),
+            re.IGNORECASE
+        )
+        common_fields = []
+        for field in values:
+            field = self.remove_alias(field)
+            if AGG_FUNC_REGEX.findall(field):
+                self.aggregation_fields.append(field)
+            else:
+                common_fields.append(field)
+        return common_fields
+class DatabricksLanguage(DataAnalysisLanguage):
+    pattern = '{_from}{where}{group_by}{order_by}{select}{limit}'
+    has_default = {key: bool(key == SELECT) for key in KEYWORD}
+    def add_field(self, values: list) -> str:
+        return super().add_field(
+            self.split_agg_fields(values)
+        )
+    def prefix(self, key: str) -> str:
+        def get_aggregate() -> str:
+            return 'AGGREGATE {} '.format(
+                ','.join(self.aggregation_fields)
+            )
+        return '{}{}{}{}{}'.format(
+            self.LINE_BREAK,
+            '|> ' if key != FROM else '',
+            get_aggregate() if key == GROUP_BY else '',
+            key, self.TABULATION
+        )
+class PandasLanguage(DataAnalysisLanguage):
+    pattern = '{_from}{where}{select}{group_by}{order_by}'
+    has_default = {key: False for key in KEYWORD}
+    def add_field(self, values: list) -> str:
+        def line_field_fmt(field: str) -> str:
+            return "{}'{}'".format(
+                self.TABULATION, field
+            )
+        common_fields = self.split_agg_fields(values)
+        if common_fields:
+            return '[[\n{}\n]]'.format(
+                ','.join(line_field_fmt(fld) for fld in common_fields)
+            )
+        return ''
+    def get_tables(self, values: list) -> str:
+        result = 'import pandas as pd'
+        names = {}
+        for table in values:
+            table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
+            alias, table = SQLObject.split_alias(table)
+            result += f"\ndf_{table} = pd.read_csv('{table}.csv')"
+            names[alias] = table
+            if join:
+                a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', join[-1]) if r]
+                result += "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
+                    last_table, names[a1], names[a2], f1, f2, 'inner'
+                )
+            last_table = table
+        _, table = SQLObject.split_alias(values[0])
+        result += f'\ndf = df_{table}\n\ndf = df\n'
+        return result
+    def extract_conditions(self, values: list) -> str:
+        conditions = []
+        STR_FUNC = {
+            1: '.str.startswith(',
+            2: '.str.endswith(',
+            3: '.str.contains(',
+        }
+        for expr in values:
+            expr = self.remove_alias(expr)
+            field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
+            if op.upper() == 'LIKE' and len(const) == 3:
+                level = 0
+                if '%' in const[0]:
+                    level += 2
+                if '%' in const[2]:
+                    level += 1
+                const = f"'{const[1]}'"
+                op = STR_FUNC[level]
+            else:
+                const = ''.join(const)
+            conditions.append(
+                f"(df['{field}']{op}{const})"
+            )
+        if not conditions:
+            return ''
+        return '[\n{}\n]'.format(
+            '&'.join(f'\t{c}' for c in conditions),
+        )
+    def clean_values(self, values: list) -> str:
+        for i in range(len(values)):
+            content = self.remove_alias(values[i])
+            values[i] = f"'{content}'"
+        return ','.join(values)
+    def sort_by(self, values: list) -> str:
+        if not values:
+            return ''
+        return '.sort_values(\n{},\n\tascending = {}\n)'.format(
+            '\t'+self.clean_values(values), OrderBy.ascending(values[-1])
+        )
+    def set_group(self, values: list) -> str:
+        result = '.groupby([\n\t{}\n])'.format(
+            self.clean_values(values)
+        )
+        if self.aggregation_fields:
+            PANDAS_AGG_FUNC = {'Avg': 'mean', 'Count': 'size'}
+            result += '.agg({'
+            for field in self.aggregation_fields:
+                func, field, *alias = re.split('[()]', field) # [To-Do: Use `alias`]
+                result += "{}'{}': ['{}']".format(
+                    self.TABULATION, field,
+                    PANDAS_AGG_FUNC.get(func, func)
+                )
+            result += '\n})'
+        return result
+    def __init__(self, target: 'Select'):
+        super().__init__(target)
+        self.result['function'] = 'find'
+    def prefix(self, key: str):
+        return ''
 class Parser:
     REGEX = {}
@@ -1422,7 +1593,18 @@ class MongoParser(Parser):
     def begin_conditions(self, value: str):
         self.where_list = {}
+        self.field_method = self.first_ORfield
         return Where
+    def first_ORfield(self, text: str):
+        if text.startswith('$'):
+            return
+        found = re.search(r'\w+[:]', text)
+        if not found:
+            return
+        self.field_method = None
+        p1, p2 = found.span()
+        self.last_field = text[p1: p2-1]
     def increment_brackets(self, value: str):
         self.brackets[value] += 1
@@ -1431,6 +1613,7 @@ class MongoParser(Parser):
         self.method = self.new_query
         self.last_field = ''
         self.where_list = None
+        self.field_method = None
         self.PARAM_BY_FUNCTION = {
             'find': Where, 'aggregate': GroupBy, 'sort': OrderBy
         }
@@ -1460,6 +1643,8 @@ class MongoParser(Parser):
                 self.close_brackets(
                     BRACKET_PAIR[token]
                 )
+            elif self.field_method:
+                self.field_method(token)
             self.method = self.TOKEN_METHODS.get(token)
 # ----------------------------
@@ -1467,6 +1652,7 @@ class MongoParser(Parser):
 class Select(SQLObject):
     join_type: JoinType = JoinType.INNER
     EQUIVALENT_NAMES = {}
+    DefaultLanguage = QueryLanguage
     def __init__(self, table_name: str='', **values):
         super().__init__(table_name)
@@ -1526,7 +1712,7 @@ class Select(SQLObject):
         return query
     def __str__(self) -> str:
-        return self.translate_to(QueryLanguage)
+        return self.translate_to(self.DefaultLanguage)
     def __call__(self, **values):
         for name, params in values.items():
@@ -1846,3 +2032,27 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
         result += query
     return result
 # ===========================================================================================//
+if __name__ == "__main__":
+    query = detect('''
+        SELECT
+            e.gender, d.region,
+            Avg(e.age)
+        FROM
+            Employees e
+            LEFT JOIN Department d ON (e.depto_id = d.id)
+        WHERE
+            e.name LIKE 'C%'
+        GROUP BY
+            e.gender, d.region
+        ORDER BY
+            d.region DESC
+    ''')
+    print('='*50)
+    print(query)
+    print('-'*50)
+    # Select.DefaultLanguage = DatabricksLanguage
+    Select.DefaultLanguage = PandasLanguage
+    print(query)
+    print('='*50)

{sql_blocks-1.25.51999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: sql_blocks
-Version: 1.25.51999999999
+Version: 1.25.516999999999
 Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
 Home-page: https://github.com/julio-cascalles/sql_blocks
 Author: Júlio Cascalles
@@ -652,10 +652,12 @@ Automatically assigns aliases to each side of the relationship (In this example,
 ---
 ### `translate_to` method
-It consists of the inverse process of parsing: From a Select object, it returns the text to a script in any of the languages below:
+From a Select object, it returns the text to a script in any of the languages below:
 * QueryLanguage - default
 * MongoDBLanguage
 * Neo4JLanguage
+* DatabricksLanguage
+* PandasLanguage
 ---
 ### 14 - Window Function

sql_blocks-1.25.516999999999.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
+sql_blocks/sql_blocks.py,sha256=kM3hBe1P9qDZOORqoX9tTS80EMtEFpBxtOEo8miLEd4,69102
+sql_blocks-1.25.516999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
+sql_blocks-1.25.516999999999.dist-info/METADATA,sha256=CHRNGYWScyUPpE2GbRriFkMoELDb8WYNGGEVLlnxa38,22235
+sql_blocks-1.25.516999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+sql_blocks-1.25.516999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
+sql_blocks-1.25.516999999999.dist-info/RECORD,,

sql_blocks-1.25.51999999999.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
-sql_blocks/sql_blocks.py,sha256=ZdCFtPShmn-nHrE2tpJCWMnJYmPsc742CIkrPc_hSs4,61854
-sql_blocks-1.25.51999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
-sql_blocks-1.25.51999999999.dist-info/METADATA,sha256=ZK0V4KW5v8VtqFML82WFBbN_NpDN7iHbGjMo09fiRbc,22241
-sql_blocks-1.25.51999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-sql_blocks-1.25.51999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
-sql_blocks-1.25.51999999999.dist-info/RECORD,,

{sql_blocks-1.25.51999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/LICENSE RENAMED Viewed

File without changes

{sql_blocks-1.25.51999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/WHEEL RENAMED Viewed

File without changes

{sql_blocks-1.25.51999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/top_level.txt RENAMED Viewed

File without changes

sql-blocks 1.25.51999999999__py3-none-any.whl → 1.25.516999999999__py3-none-any.whl

sql-blocks 1.25.51999999999py3-none-any.whl → 1.25.516999999999py3-none-any.whl