sql-blocks 1.25.514999999999__py3-none-any.whl → 1.25.516999999999__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sql_blocks/sql_blocks.py CHANGED
@@ -38,10 +38,8 @@ class SQLObject:
38
38
  self.key_field = ''
39
39
  self.set_table(table_name)
40
40
 
41
- def set_table(self, table_name: str):
42
- if not table_name:
43
- return
44
- cls = SQLObject
41
+ @classmethod
42
+ def split_alias(cls, table_name: str) -> tuple:
45
43
  is_file_name = any([
46
44
  '/' in table_name, '.' in table_name
47
45
  ])
@@ -49,16 +47,21 @@ class SQLObject:
49
47
  if is_file_name:
50
48
  ref = table_name.split('/')[-1].split('.')[0]
51
49
  if cls.ALIAS_FUNC:
52
- self.__alias = cls.ALIAS_FUNC(ref)
50
+ return cls.ALIAS_FUNC(ref), table_name
53
51
  elif ' ' in table_name.strip():
54
- table_name, self.__alias = table_name.split()
52
+ table_name, alias = table_name.split()
53
+ return alias, table_name
55
54
  elif '_' in ref:
56
- self.__alias = ''.join(
55
+ return ''.join(
57
56
  word[0].lower()
58
57
  for word in ref.split('_')
59
- )
60
- else:
61
- self.__alias = ref.lower()[:3]
58
+ ), table_name
59
+ return ref.lower()[:3], table_name
60
+
61
+ def set_table(self, table_name: str):
62
+ if not table_name:
63
+ return
64
+ self.__alias, table_name = self.split_alias(table_name)
62
65
  self.values.setdefault(FROM, []).append(f'{table_name} {self.alias}')
63
66
 
64
67
  @property
@@ -777,6 +780,20 @@ class OrderBy(Clause):
777
780
  name = cls.format(name, main)
778
781
  main.values.setdefault(ORDER_BY, []).append(name+cls.sort.value)
779
782
 
783
+ @staticmethod
784
+ def ascending(value: str) -> bool:
785
+ if re.findall(r'\s+(DESC)\s*$', value):
786
+ return False
787
+ return True
788
+
789
+ @classmethod
790
+ def format(cls, name: str, main: SQLObject) -> str:
791
+ if cls.ascending(name):
792
+ cls.sort = SortType.ASC
793
+ else:
794
+ cls.sort = SortType.DESC
795
+ return super().format(name, main)
796
+
780
797
  @classmethod
781
798
  def cls_to_str(cls) -> str:
782
799
  return ORDER_BY
@@ -863,6 +880,8 @@ class QueryLanguage:
863
880
  return self.join_with_tabs(values, ' AND ')
864
881
 
865
882
  def sort_by(self, values: list) -> str:
883
+ if OrderBy.sort == SortType.DESC:
884
+ values[-1] += ' DESC'
866
885
  return self.join_with_tabs(values, ',')
867
886
 
868
887
  def set_group(self, values: list) -> str:
@@ -1076,28 +1095,35 @@ class Neo4JLanguage(QueryLanguage):
1076
1095
  return ''
1077
1096
 
1078
1097
 
1079
- class DatabricksLanguage(QueryLanguage):
1080
- pattern = '{_from}{where}{group_by}{order_by}{select}{limit}'
1081
- has_default = {key: bool(key == SELECT) for key in KEYWORD}
1082
-
1098
+ class DataAnalysisLanguage(QueryLanguage):
1083
1099
  def __init__(self, target: 'Select'):
1084
1100
  super().__init__(target)
1085
1101
  self.aggregation_fields = []
1086
1102
 
1087
- def add_field(self, values: list) -> str:
1088
- AGG_FUNCS = '|'.join(cls.__name__ for cls in Aggregate.__subclasses__())
1089
- # --------------------------------------------------------------
1090
- def is_agg_field(fld: str) -> bool:
1091
- return re.findall(fr'({AGG_FUNCS})[(]', fld, re.IGNORECASE)
1092
- # --------------------------------------------------------------
1093
- new_values = []
1094
- for val in values:
1095
- if is_agg_field(val):
1096
- self.aggregation_fields.append(val)
1103
+ def split_agg_fields(self, values: list) -> list:
1104
+ AGG_FUNC_REGEX = re.compile(
1105
+ r'({})[(]'.format(
1106
+ '|'.join(cls.__name__ for cls in Aggregate.__subclasses__())
1107
+ ),
1108
+ re.IGNORECASE
1109
+ )
1110
+ common_fields = []
1111
+ for field in values:
1112
+ field = self.remove_alias(field)
1113
+ if AGG_FUNC_REGEX.findall(field):
1114
+ self.aggregation_fields.append(field)
1097
1115
  else:
1098
- new_values.append(val)
1099
- values = new_values
1100
- return super().add_field(values)
1116
+ common_fields.append(field)
1117
+ return common_fields
1118
+
1119
+ class DatabricksLanguage(DataAnalysisLanguage):
1120
+ pattern = '{_from}{where}{group_by}{order_by}{select}{limit}'
1121
+ has_default = {key: bool(key == SELECT) for key in KEYWORD}
1122
+
1123
+ def add_field(self, values: list) -> str:
1124
+ return super().add_field(
1125
+ self.split_agg_fields(values)
1126
+ )
1101
1127
 
1102
1128
  def prefix(self, key: str) -> str:
1103
1129
  def get_aggregate() -> str:
@@ -1105,24 +1131,111 @@ class DatabricksLanguage(QueryLanguage):
1105
1131
  ','.join(self.aggregation_fields)
1106
1132
  )
1107
1133
  return '{}{}{}{}{}'.format(
1108
- '|> ' if key != FROM else '',
1109
1134
  self.LINE_BREAK,
1135
+ '|> ' if key != FROM else '',
1110
1136
  get_aggregate() if key == GROUP_BY else '',
1111
1137
  key, self.TABULATION
1112
1138
  )
1113
1139
 
1114
- # def get_tables(self, values: list) -> str:
1115
- # return self.join_with_tabs(values)
1116
1140
 
1117
- # def extract_conditions(self, values: list) -> str:
1118
- # return self.join_with_tabs(values, ' AND ')
1141
+ class PandasLanguage(DataAnalysisLanguage):
1142
+ pattern = '{_from}{where}{select}{group_by}{order_by}'
1143
+ has_default = {key: False for key in KEYWORD}
1144
+
1145
+ def add_field(self, values: list) -> str:
1146
+ def line_field_fmt(field: str) -> str:
1147
+ return "{}'{}'".format(
1148
+ self.TABULATION, field
1149
+ )
1150
+ common_fields = self.split_agg_fields(values)
1151
+ if common_fields:
1152
+ return '[[\n{}\n]]'.format(
1153
+ ','.join(line_field_fmt(fld) for fld in common_fields)
1154
+ )
1155
+ return ''
1156
+
1157
+ def get_tables(self, values: list) -> str:
1158
+ result = 'import pandas as pd'
1159
+ names = {}
1160
+ for table in values:
1161
+ table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
1162
+ alias, table = SQLObject.split_alias(table)
1163
+ result += f"\ndf_{table} = pd.read_csv('{table}.csv')"
1164
+ names[alias] = table
1165
+ if join:
1166
+ a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', join[-1]) if r]
1167
+ result += "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
1168
+ last_table, names[a1], names[a2], f1, f2, 'inner'
1169
+ )
1170
+ last_table = table
1171
+ _, table = SQLObject.split_alias(values[0])
1172
+ result += f'\ndf = df_{table}\n\ndf = df\n'
1173
+ return result
1119
1174
 
1120
- # def sort_by(self, values: list) -> str:
1121
- # return self.join_with_tabs(values, ',')
1175
+ def extract_conditions(self, values: list) -> str:
1176
+ conditions = []
1177
+ STR_FUNC = {
1178
+ 1: '.str.startswith(',
1179
+ 2: '.str.endswith(',
1180
+ 3: '.str.contains(',
1181
+ }
1182
+ for expr in values:
1183
+ expr = self.remove_alias(expr)
1184
+ field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
1185
+ if op.upper() == 'LIKE' and len(const) == 3:
1186
+ level = 0
1187
+ if '%' in const[0]:
1188
+ level += 2
1189
+ if '%' in const[2]:
1190
+ level += 1
1191
+ const = f"'{const[1]}'"
1192
+ op = STR_FUNC[level]
1193
+ else:
1194
+ const = ''.join(const)
1195
+ conditions.append(
1196
+ f"(df['{field}']{op}{const})"
1197
+ )
1198
+ if not conditions:
1199
+ return ''
1200
+ return '[\n{}\n]'.format(
1201
+ '&'.join(f'\t{c}' for c in conditions),
1202
+ )
1203
+
1204
+ def clean_values(self, values: list) -> str:
1205
+ for i in range(len(values)):
1206
+ content = self.remove_alias(values[i])
1207
+ values[i] = f"'{content}'"
1208
+ return ','.join(values)
1209
+
1210
+ def sort_by(self, values: list) -> str:
1211
+ if not values:
1212
+ return ''
1213
+ return '.sort_values(\n{},\n\tascending = {}\n)'.format(
1214
+ '\t'+self.clean_values(values), OrderBy.ascending(values[-1])
1215
+ )
1122
1216
 
1123
1217
  def set_group(self, values: list) -> str:
1124
- return self.join_with_tabs(values, ',')
1218
+ result = '.groupby([\n\t{}\n])'.format(
1219
+ self.clean_values(values)
1220
+ )
1221
+ if self.aggregation_fields:
1222
+ PANDAS_AGG_FUNC = {'Avg': 'mean', 'Count': 'size'}
1223
+ result += '.agg({'
1224
+ for field in self.aggregation_fields:
1225
+ func, field, *alias = re.split('[()]', field) # [To-Do: Use `alias`]
1226
+ result += "{}'{}': ['{}']".format(
1227
+ self.TABULATION, field,
1228
+ PANDAS_AGG_FUNC.get(func, func)
1229
+ )
1230
+ result += '\n})'
1231
+ return result
1232
+
1233
+ def __init__(self, target: 'Select'):
1234
+ super().__init__(target)
1235
+ self.result['function'] = 'find'
1125
1236
 
1237
+ def prefix(self, key: str):
1238
+ return ''
1126
1239
 
1127
1240
 
1128
1241
  class Parser:
@@ -1539,6 +1652,7 @@ class MongoParser(Parser):
1539
1652
  class Select(SQLObject):
1540
1653
  join_type: JoinType = JoinType.INNER
1541
1654
  EQUIVALENT_NAMES = {}
1655
+ DefaultLanguage = QueryLanguage
1542
1656
 
1543
1657
  def __init__(self, table_name: str='', **values):
1544
1658
  super().__init__(table_name)
@@ -1598,7 +1712,7 @@ class Select(SQLObject):
1598
1712
  return query
1599
1713
 
1600
1714
  def __str__(self) -> str:
1601
- return self.translate_to(QueryLanguage)
1715
+ return self.translate_to(self.DefaultLanguage)
1602
1716
 
1603
1717
  def __call__(self, **values):
1604
1718
  for name, params in values.items():
@@ -1921,47 +2035,24 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
1921
2035
 
1922
2036
 
1923
2037
  if __name__ == "__main__":
1924
- # def identifica_suspeitos() -> Select:
1925
- # """Mostra quais pessoas tem caracteríosticas iguais à descrição do suspeito"""
1926
- # Select.join_type = JoinType.LEFT
1927
- # return Select(
1928
- # 'Suspeito s', id=Field,
1929
- # _=Where.join(
1930
- # Select('Pessoa p',
1931
- # OR=Options(
1932
- # pessoa=Where('= s.id'),
1933
- # altura=Where.formula('ABS(% - s.{f}) < 0.5'),
1934
- # peso=Where.formula('ABS(% - s.{f}) < 0.5'),
1935
- # cabelo=Where.formula('% = s.{f}'),
1936
- # olhos=Where.formula('% = s.{f}'),
1937
- # sexo=Where.formula('% = s.{f}'),
1938
- # ),
1939
- # nome=Field
1940
- # )
1941
- # )
1942
- # )
1943
- # query = identifica_suspeitos()
1944
- # print('='*50)
1945
- # print(query)
1946
- # print('-'*50)
1947
- script = '''
1948
- db.people.find({
1949
- {
1950
- $or: [
1951
- status:{$eq:"B"},
1952
- age:{$lt:50}
1953
- ]
1954
- },
1955
- age:{$gte:18}, status:{$eq:"A"}
1956
- },{
1957
- name: 1, user_id: 1
1958
- }).sort({
1959
- '''
2038
+ query = detect('''
2039
+ SELECT
2040
+ e.gender, d.region,
2041
+ Avg(e.age)
2042
+ FROM
2043
+ Employees e
2044
+ LEFT JOIN Department d ON (e.depto_id = d.id)
2045
+ WHERE
2046
+ e.name LIKE 'C%'
2047
+ GROUP BY
2048
+ e.gender, d.region
2049
+ ORDER BY
2050
+ d.region DESC
2051
+ ''')
1960
2052
  print('='*50)
1961
- q1 = Select.parse(script, MongoParser)[0]
1962
- print(q1)
2053
+ print(query)
1963
2054
  print('-'*50)
1964
- q2 = q1.translate_to(MongoDBLanguage)
1965
- print(q2)
1966
- # print('-'*50)
1967
- print('='*50)
2055
+ # Select.DefaultLanguage = DatabricksLanguage
2056
+ Select.DefaultLanguage = PandasLanguage
2057
+ print(query)
2058
+ print('='*50)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sql_blocks
3
- Version: 1.25.514999999999
3
+ Version: 1.25.516999999999
4
4
  Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
5
5
  Home-page: https://github.com/julio-cascalles/sql_blocks
6
6
  Author: Júlio Cascalles
@@ -652,10 +652,12 @@ Automatically assigns aliases to each side of the relationship (In this example,
652
652
 
653
653
  ---
654
654
  ### `translate_to` method
655
- It consists of the inverse process of parsing: From a Select object, it returns the text to a script in any of the languages ​​below:
655
+ From a Select object, it returns the text to a script in any of the languages ​​below:
656
656
  * QueryLanguage - default
657
657
  * MongoDBLanguage
658
658
  * Neo4JLanguage
659
+ * DatabricksLanguage
660
+ * PandasLanguage
659
661
 
660
662
  ---
661
663
  ### 14 - Window Function
@@ -0,0 +1,7 @@
1
+ sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
+ sql_blocks/sql_blocks.py,sha256=kM3hBe1P9qDZOORqoX9tTS80EMtEFpBxtOEo8miLEd4,69102
3
+ sql_blocks-1.25.516999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
+ sql_blocks-1.25.516999999999.dist-info/METADATA,sha256=CHRNGYWScyUPpE2GbRriFkMoELDb8WYNGGEVLlnxa38,22235
5
+ sql_blocks-1.25.516999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
+ sql_blocks-1.25.516999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
+ sql_blocks-1.25.516999999999.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
2
- sql_blocks/sql_blocks.py,sha256=8msHsR5Ttp8vpCJbhU7wd91IP-TboC0XAc1204kLKXE,65953
3
- sql_blocks-1.25.514999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
4
- sql_blocks-1.25.514999999999.dist-info/METADATA,sha256=vxHahM3KUO84oALwycgcIdR2szRmrKUo-9RjDZffWhk,22242
5
- sql_blocks-1.25.514999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
6
- sql_blocks-1.25.514999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
7
- sql_blocks-1.25.514999999999.dist-info/RECORD,,