sql-blocks 1.25.51999999999__py3-none-any.whl → 1.25.516999999999__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sql_blocks/sql_blocks.py CHANGED
@@ -38,10 +38,8 @@ class SQLObject:
38
38
  self.key_field = ''
39
39
  self.set_table(table_name)
40
40
 
41
- def set_table(self, table_name: str):
42
- if not table_name:
43
- return
44
- cls = SQLObject
41
+ @classmethod
42
+ def split_alias(cls, table_name: str) -> tuple:
45
43
  is_file_name = any([
46
44
  '/' in table_name, '.' in table_name
47
45
  ])
@@ -49,16 +47,21 @@ class SQLObject:
49
47
  if is_file_name:
50
48
  ref = table_name.split('/')[-1].split('.')[0]
51
49
  if cls.ALIAS_FUNC:
52
- self.__alias = cls.ALIAS_FUNC(ref)
50
+ return cls.ALIAS_FUNC(ref), table_name
53
51
  elif ' ' in table_name.strip():
54
- table_name, self.__alias = table_name.split()
52
+ table_name, alias = table_name.split()
53
+ return alias, table_name
55
54
  elif '_' in ref:
56
- self.__alias = ''.join(
55
+ return ''.join(
57
56
  word[0].lower()
58
57
  for word in ref.split('_')
59
- )
60
- else:
61
- self.__alias = ref.lower()[:3]
58
+ ), table_name
59
+ return ref.lower()[:3], table_name
60
+
61
+ def set_table(self, table_name: str):
62
+ if not table_name:
63
+ return
64
+ self.__alias, table_name = self.split_alias(table_name)
62
65
  self.values.setdefault(FROM, []).append(f'{table_name} {self.alias}')
63
66
 
64
67
  @property
@@ -99,10 +102,11 @@ class SQLObject:
99
102
  for fld in source:
100
103
  result += re.split(r'([=()]|<>|\s+ON\s+|\s+on\s+)', fld)
101
104
  return result
102
- def cleanup(fld: str) -> str:
105
+ def cleanup(text: str) -> str:
106
+ text = re.sub(r'[\n\t]', ' ', text)
103
107
  if exact:
104
- fld = fld.lower()
105
- return fld.strip()
108
+ text = text.lower()
109
+ return text.strip()
106
110
  def field_set(source: list) -> set:
107
111
  return set(
108
112
  (
@@ -597,10 +601,11 @@ class Where:
597
601
  main.values[FROM].append(f',{query.table_name} {query.alias}')
598
602
  for key in USUAL_KEYS:
599
603
  main.update_values(key, query.values.get(key, []))
600
- main.values.setdefault(WHERE, []).append('({a1}.{f1} = {a2}.{f2})'.format(
601
- a1=main.alias, f1=name,
602
- a2=query.alias, f2=query.key_field
603
- ))
604
+ if query.key_field:
605
+ main.values.setdefault(WHERE, []).append('({a1}.{f1} = {a2}.{f2})'.format(
606
+ a1=main.alias, f1=name,
607
+ a2=query.alias, f2=query.key_field
608
+ ))
604
609
 
605
610
  def add(self, name: str, main: SQLObject):
606
611
  func_type = FUNCTION_CLASS.get(name.lower())
@@ -667,16 +672,14 @@ class Options:
667
672
  self.__children: dict = values
668
673
 
669
674
  def add(self, logical_separator: str, main: SQLObject):
670
- if logical_separator not in ('AND', 'OR'):
675
+ if logical_separator.upper() not in ('AND', 'OR'):
671
676
  raise ValueError('`logical_separator` must be AND or OR')
672
- conditions: list[str] = []
677
+ temp = Select(f'{main.table_name} {main.alias}')
673
678
  child: Where
674
679
  for field, child in self.__children.items():
675
- conditions.append(' {} {} '.format(
676
- Field.format(field, main), child.content
677
- ))
680
+ child.add(field, temp)
678
681
  main.values.setdefault(WHERE, []).append(
679
- '(' + logical_separator.join(conditions) + ')'
682
+ '(' + f'\n\t{logical_separator} '.join(temp.values[WHERE]) + ')'
680
683
  )
681
684
 
682
685
 
@@ -777,6 +780,20 @@ class OrderBy(Clause):
777
780
  name = cls.format(name, main)
778
781
  main.values.setdefault(ORDER_BY, []).append(name+cls.sort.value)
779
782
 
783
+ @staticmethod
784
+ def ascending(value: str) -> bool:
785
+ if re.findall(r'\s+(DESC)\s*$', value):
786
+ return False
787
+ return True
788
+
789
+ @classmethod
790
+ def format(cls, name: str, main: SQLObject) -> str:
791
+ if cls.ascending(name):
792
+ cls.sort = SortType.ASC
793
+ else:
794
+ cls.sort = SortType.DESC
795
+ return super().format(name, main)
796
+
780
797
  @classmethod
781
798
  def cls_to_str(cls) -> str:
782
799
  return ORDER_BY
@@ -836,8 +853,16 @@ class QueryLanguage:
836
853
  has_default = {key: bool(key == SELECT) for key in KEYWORD}
837
854
 
838
855
  @staticmethod
839
- def remove_alias(fld: str) -> str:
840
- return ''.join(re.split(r'\w+[.]', fld))
856
+ def remove_alias(text: str) -> str:
857
+ value, sep = '', ''
858
+ text = re.sub('[\n\t]', ' ', text)
859
+ if ':' in text:
860
+ text, value = text.split(':', maxsplit=1)
861
+ sep = ':'
862
+ return '{}{}{}'.format(
863
+ ''.join(re.split(r'\w+[.]', text)),
864
+ sep, value.replace("'", '"')
865
+ )
841
866
 
842
867
  def join_with_tabs(self, values: list, sep: str='') -> str:
843
868
  sep = sep + self.TABULATION
@@ -855,6 +880,8 @@ class QueryLanguage:
855
880
  return self.join_with_tabs(values, ' AND ')
856
881
 
857
882
  def sort_by(self, values: list) -> str:
883
+ if OrderBy.sort == SortType.DESC:
884
+ values[-1] += ' DESC'
858
885
  return self.join_with_tabs(values, ',')
859
886
 
860
887
  def set_group(self, values: list) -> str:
@@ -905,7 +932,8 @@ class MongoDBLanguage(QueryLanguage):
905
932
  LOGICAL_OP_TO_MONGO_FUNC = {
906
933
  '>': '$gt', '>=': '$gte',
907
934
  '<': '$lt', '<=': '$lte',
908
- '=': '$eq', '<>': '$ne',
935
+ '=': '$eq', '<>': '$ne',
936
+ 'like': '$regex', 'LIKE': '$regex',
909
937
  }
910
938
  OPERATORS = '|'.join(op for op in LOGICAL_OP_TO_MONGO_FUNC)
911
939
  REGEX = {
@@ -958,7 +986,7 @@ class MongoDBLanguage(QueryLanguage):
958
986
  field, *op, const = tokens
959
987
  op = ''.join(op)
960
988
  expr = '{begin}{op}:{const}{end}'.format(
961
- begin='{', const=const, end='}',
989
+ begin='{', const=const.replace('%', '.*'), end='}',
962
990
  op=cls.LOGICAL_OP_TO_MONGO_FUNC[op],
963
991
  )
964
992
  where_list.append(f'{field}:{expr}')
@@ -1067,6 +1095,149 @@ class Neo4JLanguage(QueryLanguage):
1067
1095
  return ''
1068
1096
 
1069
1097
 
1098
+ class DataAnalysisLanguage(QueryLanguage):
1099
+ def __init__(self, target: 'Select'):
1100
+ super().__init__(target)
1101
+ self.aggregation_fields = []
1102
+
1103
+ def split_agg_fields(self, values: list) -> list:
1104
+ AGG_FUNC_REGEX = re.compile(
1105
+ r'({})[(]'.format(
1106
+ '|'.join(cls.__name__ for cls in Aggregate.__subclasses__())
1107
+ ),
1108
+ re.IGNORECASE
1109
+ )
1110
+ common_fields = []
1111
+ for field in values:
1112
+ field = self.remove_alias(field)
1113
+ if AGG_FUNC_REGEX.findall(field):
1114
+ self.aggregation_fields.append(field)
1115
+ else:
1116
+ common_fields.append(field)
1117
+ return common_fields
1118
+
1119
+ class DatabricksLanguage(DataAnalysisLanguage):
1120
+ pattern = '{_from}{where}{group_by}{order_by}{select}{limit}'
1121
+ has_default = {key: bool(key == SELECT) for key in KEYWORD}
1122
+
1123
+ def add_field(self, values: list) -> str:
1124
+ return super().add_field(
1125
+ self.split_agg_fields(values)
1126
+ )
1127
+
1128
+ def prefix(self, key: str) -> str:
1129
+ def get_aggregate() -> str:
1130
+ return 'AGGREGATE {} '.format(
1131
+ ','.join(self.aggregation_fields)
1132
+ )
1133
+ return '{}{}{}{}{}'.format(
1134
+ self.LINE_BREAK,
1135
+ '|> ' if key != FROM else '',
1136
+ get_aggregate() if key == GROUP_BY else '',
1137
+ key, self.TABULATION
1138
+ )
1139
+
1140
+
1141
+ class PandasLanguage(DataAnalysisLanguage):
1142
+ pattern = '{_from}{where}{select}{group_by}{order_by}'
1143
+ has_default = {key: False for key in KEYWORD}
1144
+
1145
+ def add_field(self, values: list) -> str:
1146
+ def line_field_fmt(field: str) -> str:
1147
+ return "{}'{}'".format(
1148
+ self.TABULATION, field
1149
+ )
1150
+ common_fields = self.split_agg_fields(values)
1151
+ if common_fields:
1152
+ return '[[\n{}\n]]'.format(
1153
+ ','.join(line_field_fmt(fld) for fld in common_fields)
1154
+ )
1155
+ return ''
1156
+
1157
+ def get_tables(self, values: list) -> str:
1158
+ result = 'import pandas as pd'
1159
+ names = {}
1160
+ for table in values:
1161
+ table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
1162
+ alias, table = SQLObject.split_alias(table)
1163
+ result += f"\ndf_{table} = pd.read_csv('{table}.csv')"
1164
+ names[alias] = table
1165
+ if join:
1166
+ a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', join[-1]) if r]
1167
+ result += "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
1168
+ last_table, names[a1], names[a2], f1, f2, 'inner'
1169
+ )
1170
+ last_table = table
1171
+ _, table = SQLObject.split_alias(values[0])
1172
+ result += f'\ndf = df_{table}\n\ndf = df\n'
1173
+ return result
1174
+
1175
+ def extract_conditions(self, values: list) -> str:
1176
+ conditions = []
1177
+ STR_FUNC = {
1178
+ 1: '.str.startswith(',
1179
+ 2: '.str.endswith(',
1180
+ 3: '.str.contains(',
1181
+ }
1182
+ for expr in values:
1183
+ expr = self.remove_alias(expr)
1184
+ field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
1185
+ if op.upper() == 'LIKE' and len(const) == 3:
1186
+ level = 0
1187
+ if '%' in const[0]:
1188
+ level += 2
1189
+ if '%' in const[2]:
1190
+ level += 1
1191
+ const = f"'{const[1]}'"
1192
+ op = STR_FUNC[level]
1193
+ else:
1194
+ const = ''.join(const)
1195
+ conditions.append(
1196
+ f"(df['{field}']{op}{const})"
1197
+ )
1198
+ if not conditions:
1199
+ return ''
1200
+ return '[\n{}\n]'.format(
1201
+ '&'.join(f'\t{c}' for c in conditions),
1202
+ )
1203
+
1204
+ def clean_values(self, values: list) -> str:
1205
+ for i in range(len(values)):
1206
+ content = self.remove_alias(values[i])
1207
+ values[i] = f"'{content}'"
1208
+ return ','.join(values)
1209
+
1210
+ def sort_by(self, values: list) -> str:
1211
+ if not values:
1212
+ return ''
1213
+ return '.sort_values(\n{},\n\tascending = {}\n)'.format(
1214
+ '\t'+self.clean_values(values), OrderBy.ascending(values[-1])
1215
+ )
1216
+
1217
+ def set_group(self, values: list) -> str:
1218
+ result = '.groupby([\n\t{}\n])'.format(
1219
+ self.clean_values(values)
1220
+ )
1221
+ if self.aggregation_fields:
1222
+ PANDAS_AGG_FUNC = {'Avg': 'mean', 'Count': 'size'}
1223
+ result += '.agg({'
1224
+ for field in self.aggregation_fields:
1225
+ func, field, *alias = re.split('[()]', field) # [To-Do: Use `alias`]
1226
+ result += "{}'{}': ['{}']".format(
1227
+ self.TABULATION, field,
1228
+ PANDAS_AGG_FUNC.get(func, func)
1229
+ )
1230
+ result += '\n})'
1231
+ return result
1232
+
1233
+ def __init__(self, target: 'Select'):
1234
+ super().__init__(target)
1235
+ self.result['function'] = 'find'
1236
+
1237
+ def prefix(self, key: str):
1238
+ return ''
1239
+
1240
+
1070
1241
  class Parser:
1071
1242
  REGEX = {}
1072
1243
 
@@ -1422,7 +1593,18 @@ class MongoParser(Parser):
1422
1593
 
1423
1594
  def begin_conditions(self, value: str):
1424
1595
  self.where_list = {}
1596
+ self.field_method = self.first_ORfield
1425
1597
  return Where
1598
+
1599
+ def first_ORfield(self, text: str):
1600
+ if text.startswith('$'):
1601
+ return
1602
+ found = re.search(r'\w+[:]', text)
1603
+ if not found:
1604
+ return
1605
+ self.field_method = None
1606
+ p1, p2 = found.span()
1607
+ self.last_field = text[p1: p2-1]
1426
1608
 
1427
1609
  def increment_brackets(self, value: str):
1428
1610
  self.brackets[value] += 1
@@ -1431,6 +1613,7 @@ class MongoParser(Parser):
1431
1613
  self.method = self.new_query
1432
1614
  self.last_field = ''
1433
1615
  self.where_list = None
1616
+ self.field_method = None
1434
1617
  self.PARAM_BY_FUNCTION = {
1435
1618
  'find': Where, 'aggregate': GroupBy, 'sort': OrderBy
1436
1619
  }
@@ -1460,6 +1643,8 @@ class MongoParser(Parser):
1460
1643
  self.close_brackets(
1461
1644
  BRACKET_PAIR[token]
1462
1645
  )
1646
+ elif self.field_method:
1647
+ self.field_method(token)
1463
1648
  self.method = self.TOKEN_METHODS.get(token)
1464
1649
  # ----------------------------
1465
1650
 
@@ -1467,6 +1652,7 @@ class MongoParser(Parser):
1467
1652
  class Select(SQLObject):
1468
1653
  join_type: JoinType = JoinType.INNER
1469
1654
  EQUIVALENT_NAMES = {}
1655
+ DefaultLanguage = QueryLanguage
1470
1656
 
1471
1657
  def __init__(self, table_name: str='', **values):
1472
1658
  super().__init__(table_name)
@@ -1526,7 +1712,7 @@ class Select(SQLObject):
1526
1712
  return query
1527
1713
 
1528
1714
  def __str__(self) -> str:
1529
- return self.translate_to(QueryLanguage)
1715
+ return self.translate_to(self.DefaultLanguage)
1530
1716
 
1531
1717
  def __call__(self, **values):
1532
1718
  for name, params in values.items():
@@ -1846,3 +2032,27 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
1846
2032
  result += query
1847
2033
  return result
1848
2034
  # ===========================================================================================//
2035
+
2036
+
2037
+ if __name__ == "__main__":
2038
+ query = detect('''
2039
+ SELECT
2040
+ e.gender, d.region,
2041
+ Avg(e.age)
2042
+ FROM
2043
+ Employees e
2044
+ LEFT JOIN Department d ON (e.depto_id = d.id)
2045
+ WHERE
2046
+ e.name LIKE 'C%'
2047
+ GROUP BY
2048
+ e.gender, d.region
2049
+ ORDER BY
2050
+ d.region DESC
2051
+ ''')
2052
+ print('='*50)
2053
+ print(query)
2054
+ print('-'*50)
2055
+ # Select.DefaultLanguage = DatabricksLanguage
2056
+ Select.DefaultLanguage = PandasLanguage
2057
+ print(query)
2058
+ print('='*50)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sql_blocks
3
- Version: 1.25.51999999999
3
+ Version: 1.25.516999999999
4
4
  Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
5
5
  Home-page: https://github.com/julio-cascalles/sql_blocks
6
6
  Author: Júlio Cascalles
@@ -652,10 +652,12 @@ Automatically assigns aliases to each side of the relationship (In this example,
652
652
 
653
653
  ---
654
654
  ### `translate_to` method
655
- It consists of the inverse process of parsing: From a Select object, it returns the text to a script in any of the languages ​​below:
655
+ From a Select object, it returns the text to a script in any of the languages ​​below:
656
656
  * QueryLanguage - default
657
657
  * MongoDBLanguage
658
658
  * Neo4JLanguage
659
+ * DatabricksLanguage
660
+ * PandasLanguage
659
661
 
660
662
  ---
661
663
  ### 14 - Window Function
@@ -0,0 +1,7 @@
1
+ sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
+ sql_blocks/sql_blocks.py,sha256=kM3hBe1P9qDZOORqoX9tTS80EMtEFpBxtOEo8miLEd4,69102
3
+ sql_blocks-1.25.516999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
+ sql_blocks-1.25.516999999999.dist-info/METADATA,sha256=CHRNGYWScyUPpE2GbRriFkMoELDb8WYNGGEVLlnxa38,22235
5
+ sql_blocks-1.25.516999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ sql_blocks-1.25.516999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
+ sql_blocks-1.25.516999999999.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
- sql_blocks/sql_blocks.py,sha256=ZdCFtPShmn-nHrE2tpJCWMnJYmPsc742CIkrPc_hSs4,61854
3
- sql_blocks-1.25.51999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
- sql_blocks-1.25.51999999999.dist-info/METADATA,sha256=ZK0V4KW5v8VtqFML82WFBbN_NpDN7iHbGjMo09fiRbc,22241
5
- sql_blocks-1.25.51999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- sql_blocks-1.25.51999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
- sql_blocks-1.25.51999999999.dist-info/RECORD,,