sql-blocks 1.25.514999999999__py3-none-any.whl → 1.25.516999999999__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_blocks/sql_blocks.py +169 -78
- {sql_blocks-1.25.514999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/METADATA +4 -2
- sql_blocks-1.25.516999999999.dist-info/RECORD +7 -0
- sql_blocks-1.25.514999999999.dist-info/RECORD +0 -7
- {sql_blocks-1.25.514999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/LICENSE +0 -0
- {sql_blocks-1.25.514999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/WHEEL +0 -0
- {sql_blocks-1.25.514999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/top_level.txt +0 -0
sql_blocks/sql_blocks.py
CHANGED
@@ -38,10 +38,8 @@ class SQLObject:
|
|
38
38
|
self.key_field = ''
|
39
39
|
self.set_table(table_name)
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
return
|
44
|
-
cls = SQLObject
|
41
|
+
@classmethod
|
42
|
+
def split_alias(cls, table_name: str) -> tuple:
|
45
43
|
is_file_name = any([
|
46
44
|
'/' in table_name, '.' in table_name
|
47
45
|
])
|
@@ -49,16 +47,21 @@ class SQLObject:
|
|
49
47
|
if is_file_name:
|
50
48
|
ref = table_name.split('/')[-1].split('.')[0]
|
51
49
|
if cls.ALIAS_FUNC:
|
52
|
-
|
50
|
+
return cls.ALIAS_FUNC(ref), table_name
|
53
51
|
elif ' ' in table_name.strip():
|
54
|
-
table_name,
|
52
|
+
table_name, alias = table_name.split()
|
53
|
+
return alias, table_name
|
55
54
|
elif '_' in ref:
|
56
|
-
|
55
|
+
return ''.join(
|
57
56
|
word[0].lower()
|
58
57
|
for word in ref.split('_')
|
59
|
-
)
|
60
|
-
|
61
|
-
|
58
|
+
), table_name
|
59
|
+
return ref.lower()[:3], table_name
|
60
|
+
|
61
|
+
def set_table(self, table_name: str):
|
62
|
+
if not table_name:
|
63
|
+
return
|
64
|
+
self.__alias, table_name = self.split_alias(table_name)
|
62
65
|
self.values.setdefault(FROM, []).append(f'{table_name} {self.alias}')
|
63
66
|
|
64
67
|
@property
|
@@ -777,6 +780,20 @@ class OrderBy(Clause):
|
|
777
780
|
name = cls.format(name, main)
|
778
781
|
main.values.setdefault(ORDER_BY, []).append(name+cls.sort.value)
|
779
782
|
|
783
|
+
@staticmethod
|
784
|
+
def ascending(value: str) -> bool:
|
785
|
+
if re.findall(r'\s+(DESC)\s*$', value):
|
786
|
+
return False
|
787
|
+
return True
|
788
|
+
|
789
|
+
@classmethod
|
790
|
+
def format(cls, name: str, main: SQLObject) -> str:
|
791
|
+
if cls.ascending(name):
|
792
|
+
cls.sort = SortType.ASC
|
793
|
+
else:
|
794
|
+
cls.sort = SortType.DESC
|
795
|
+
return super().format(name, main)
|
796
|
+
|
780
797
|
@classmethod
|
781
798
|
def cls_to_str(cls) -> str:
|
782
799
|
return ORDER_BY
|
@@ -863,6 +880,8 @@ class QueryLanguage:
|
|
863
880
|
return self.join_with_tabs(values, ' AND ')
|
864
881
|
|
865
882
|
def sort_by(self, values: list) -> str:
|
883
|
+
if OrderBy.sort == SortType.DESC:
|
884
|
+
values[-1] += ' DESC'
|
866
885
|
return self.join_with_tabs(values, ',')
|
867
886
|
|
868
887
|
def set_group(self, values: list) -> str:
|
@@ -1076,28 +1095,35 @@ class Neo4JLanguage(QueryLanguage):
|
|
1076
1095
|
return ''
|
1077
1096
|
|
1078
1097
|
|
1079
|
-
class
|
1080
|
-
pattern = '{_from}{where}{group_by}{order_by}{select}{limit}'
|
1081
|
-
has_default = {key: bool(key == SELECT) for key in KEYWORD}
|
1082
|
-
|
1098
|
+
class DataAnalysisLanguage(QueryLanguage):
|
1083
1099
|
def __init__(self, target: 'Select'):
|
1084
1100
|
super().__init__(target)
|
1085
1101
|
self.aggregation_fields = []
|
1086
1102
|
|
1087
|
-
def
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1103
|
+
def split_agg_fields(self, values: list) -> list:
|
1104
|
+
AGG_FUNC_REGEX = re.compile(
|
1105
|
+
r'({})[(]'.format(
|
1106
|
+
'|'.join(cls.__name__ for cls in Aggregate.__subclasses__())
|
1107
|
+
),
|
1108
|
+
re.IGNORECASE
|
1109
|
+
)
|
1110
|
+
common_fields = []
|
1111
|
+
for field in values:
|
1112
|
+
field = self.remove_alias(field)
|
1113
|
+
if AGG_FUNC_REGEX.findall(field):
|
1114
|
+
self.aggregation_fields.append(field)
|
1097
1115
|
else:
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1116
|
+
common_fields.append(field)
|
1117
|
+
return common_fields
|
1118
|
+
|
1119
|
+
class DatabricksLanguage(DataAnalysisLanguage):
|
1120
|
+
pattern = '{_from}{where}{group_by}{order_by}{select}{limit}'
|
1121
|
+
has_default = {key: bool(key == SELECT) for key in KEYWORD}
|
1122
|
+
|
1123
|
+
def add_field(self, values: list) -> str:
|
1124
|
+
return super().add_field(
|
1125
|
+
self.split_agg_fields(values)
|
1126
|
+
)
|
1101
1127
|
|
1102
1128
|
def prefix(self, key: str) -> str:
|
1103
1129
|
def get_aggregate() -> str:
|
@@ -1105,24 +1131,111 @@ class DatabricksLanguage(QueryLanguage):
|
|
1105
1131
|
','.join(self.aggregation_fields)
|
1106
1132
|
)
|
1107
1133
|
return '{}{}{}{}{}'.format(
|
1108
|
-
'|> ' if key != FROM else '',
|
1109
1134
|
self.LINE_BREAK,
|
1135
|
+
'|> ' if key != FROM else '',
|
1110
1136
|
get_aggregate() if key == GROUP_BY else '',
|
1111
1137
|
key, self.TABULATION
|
1112
1138
|
)
|
1113
1139
|
|
1114
|
-
# def get_tables(self, values: list) -> str:
|
1115
|
-
# return self.join_with_tabs(values)
|
1116
1140
|
|
1117
|
-
|
1118
|
-
|
1141
|
+
class PandasLanguage(DataAnalysisLanguage):
|
1142
|
+
pattern = '{_from}{where}{select}{group_by}{order_by}'
|
1143
|
+
has_default = {key: False for key in KEYWORD}
|
1144
|
+
|
1145
|
+
def add_field(self, values: list) -> str:
|
1146
|
+
def line_field_fmt(field: str) -> str:
|
1147
|
+
return "{}'{}'".format(
|
1148
|
+
self.TABULATION, field
|
1149
|
+
)
|
1150
|
+
common_fields = self.split_agg_fields(values)
|
1151
|
+
if common_fields:
|
1152
|
+
return '[[\n{}\n]]'.format(
|
1153
|
+
','.join(line_field_fmt(fld) for fld in common_fields)
|
1154
|
+
)
|
1155
|
+
return ''
|
1156
|
+
|
1157
|
+
def get_tables(self, values: list) -> str:
|
1158
|
+
result = 'import pandas as pd'
|
1159
|
+
names = {}
|
1160
|
+
for table in values:
|
1161
|
+
table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
|
1162
|
+
alias, table = SQLObject.split_alias(table)
|
1163
|
+
result += f"\ndf_{table} = pd.read_csv('{table}.csv')"
|
1164
|
+
names[alias] = table
|
1165
|
+
if join:
|
1166
|
+
a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', join[-1]) if r]
|
1167
|
+
result += "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
|
1168
|
+
last_table, names[a1], names[a2], f1, f2, 'inner'
|
1169
|
+
)
|
1170
|
+
last_table = table
|
1171
|
+
_, table = SQLObject.split_alias(values[0])
|
1172
|
+
result += f'\ndf = df_{table}\n\ndf = df\n'
|
1173
|
+
return result
|
1119
1174
|
|
1120
|
-
|
1121
|
-
|
1175
|
+
def extract_conditions(self, values: list) -> str:
|
1176
|
+
conditions = []
|
1177
|
+
STR_FUNC = {
|
1178
|
+
1: '.str.startswith(',
|
1179
|
+
2: '.str.endswith(',
|
1180
|
+
3: '.str.contains(',
|
1181
|
+
}
|
1182
|
+
for expr in values:
|
1183
|
+
expr = self.remove_alias(expr)
|
1184
|
+
field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
|
1185
|
+
if op.upper() == 'LIKE' and len(const) == 3:
|
1186
|
+
level = 0
|
1187
|
+
if '%' in const[0]:
|
1188
|
+
level += 2
|
1189
|
+
if '%' in const[2]:
|
1190
|
+
level += 1
|
1191
|
+
const = f"'{const[1]}'"
|
1192
|
+
op = STR_FUNC[level]
|
1193
|
+
else:
|
1194
|
+
const = ''.join(const)
|
1195
|
+
conditions.append(
|
1196
|
+
f"(df['{field}']{op}{const})"
|
1197
|
+
)
|
1198
|
+
if not conditions:
|
1199
|
+
return ''
|
1200
|
+
return '[\n{}\n]'.format(
|
1201
|
+
'&'.join(f'\t{c}' for c in conditions),
|
1202
|
+
)
|
1203
|
+
|
1204
|
+
def clean_values(self, values: list) -> str:
|
1205
|
+
for i in range(len(values)):
|
1206
|
+
content = self.remove_alias(values[i])
|
1207
|
+
values[i] = f"'{content}'"
|
1208
|
+
return ','.join(values)
|
1209
|
+
|
1210
|
+
def sort_by(self, values: list) -> str:
|
1211
|
+
if not values:
|
1212
|
+
return ''
|
1213
|
+
return '.sort_values(\n{},\n\tascending = {}\n)'.format(
|
1214
|
+
'\t'+self.clean_values(values), OrderBy.ascending(values[-1])
|
1215
|
+
)
|
1122
1216
|
|
1123
1217
|
def set_group(self, values: list) -> str:
|
1124
|
-
|
1218
|
+
result = '.groupby([\n\t{}\n])'.format(
|
1219
|
+
self.clean_values(values)
|
1220
|
+
)
|
1221
|
+
if self.aggregation_fields:
|
1222
|
+
PANDAS_AGG_FUNC = {'Avg': 'mean', 'Count': 'size'}
|
1223
|
+
result += '.agg({'
|
1224
|
+
for field in self.aggregation_fields:
|
1225
|
+
func, field, *alias = re.split('[()]', field) # [To-Do: Use `alias`]
|
1226
|
+
result += "{}'{}': ['{}']".format(
|
1227
|
+
self.TABULATION, field,
|
1228
|
+
PANDAS_AGG_FUNC.get(func, func)
|
1229
|
+
)
|
1230
|
+
result += '\n})'
|
1231
|
+
return result
|
1232
|
+
|
1233
|
+
def __init__(self, target: 'Select'):
|
1234
|
+
super().__init__(target)
|
1235
|
+
self.result['function'] = 'find'
|
1125
1236
|
|
1237
|
+
def prefix(self, key: str):
|
1238
|
+
return ''
|
1126
1239
|
|
1127
1240
|
|
1128
1241
|
class Parser:
|
@@ -1539,6 +1652,7 @@ class MongoParser(Parser):
|
|
1539
1652
|
class Select(SQLObject):
|
1540
1653
|
join_type: JoinType = JoinType.INNER
|
1541
1654
|
EQUIVALENT_NAMES = {}
|
1655
|
+
DefaultLanguage = QueryLanguage
|
1542
1656
|
|
1543
1657
|
def __init__(self, table_name: str='', **values):
|
1544
1658
|
super().__init__(table_name)
|
@@ -1598,7 +1712,7 @@ class Select(SQLObject):
|
|
1598
1712
|
return query
|
1599
1713
|
|
1600
1714
|
def __str__(self) -> str:
|
1601
|
-
return self.translate_to(
|
1715
|
+
return self.translate_to(self.DefaultLanguage)
|
1602
1716
|
|
1603
1717
|
def __call__(self, **values):
|
1604
1718
|
for name, params in values.items():
|
@@ -1921,47 +2035,24 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
|
|
1921
2035
|
|
1922
2036
|
|
1923
2037
|
if __name__ == "__main__":
|
1924
|
-
|
1925
|
-
|
1926
|
-
|
1927
|
-
|
1928
|
-
|
1929
|
-
|
1930
|
-
|
1931
|
-
|
1932
|
-
|
1933
|
-
|
1934
|
-
|
1935
|
-
|
1936
|
-
|
1937
|
-
|
1938
|
-
# ),
|
1939
|
-
# nome=Field
|
1940
|
-
# )
|
1941
|
-
# )
|
1942
|
-
# )
|
1943
|
-
# query = identifica_suspeitos()
|
1944
|
-
# print('='*50)
|
1945
|
-
# print(query)
|
1946
|
-
# print('-'*50)
|
1947
|
-
script = '''
|
1948
|
-
db.people.find({
|
1949
|
-
{
|
1950
|
-
$or: [
|
1951
|
-
status:{$eq:"B"},
|
1952
|
-
age:{$lt:50}
|
1953
|
-
]
|
1954
|
-
},
|
1955
|
-
age:{$gte:18}, status:{$eq:"A"}
|
1956
|
-
},{
|
1957
|
-
name: 1, user_id: 1
|
1958
|
-
}).sort({
|
1959
|
-
'''
|
2038
|
+
query = detect('''
|
2039
|
+
SELECT
|
2040
|
+
e.gender, d.region,
|
2041
|
+
Avg(e.age)
|
2042
|
+
FROM
|
2043
|
+
Employees e
|
2044
|
+
LEFT JOIN Department d ON (e.depto_id = d.id)
|
2045
|
+
WHERE
|
2046
|
+
e.name LIKE 'C%'
|
2047
|
+
GROUP BY
|
2048
|
+
e.gender, d.region
|
2049
|
+
ORDER BY
|
2050
|
+
d.region DESC
|
2051
|
+
''')
|
1960
2052
|
print('='*50)
|
1961
|
-
|
1962
|
-
print(q1)
|
2053
|
+
print(query)
|
1963
2054
|
print('-'*50)
|
1964
|
-
|
1965
|
-
|
1966
|
-
|
1967
|
-
print('='*50)
|
2055
|
+
# Select.DefaultLanguage = DatabricksLanguage
|
2056
|
+
Select.DefaultLanguage = PandasLanguage
|
2057
|
+
print(query)
|
2058
|
+
print('='*50)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sql_blocks
|
3
|
-
Version: 1.25.
|
3
|
+
Version: 1.25.516999999999
|
4
4
|
Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
|
5
5
|
Home-page: https://github.com/julio-cascalles/sql_blocks
|
6
6
|
Author: Júlio Cascalles
|
@@ -652,10 +652,12 @@ Automatically assigns aliases to each side of the relationship (In this example,
|
|
652
652
|
|
653
653
|
---
|
654
654
|
### `translate_to` method
|
655
|
-
|
655
|
+
From a Select object, it returns the text to a script in any of the languages below:
|
656
656
|
* QueryLanguage - default
|
657
657
|
* MongoDBLanguage
|
658
658
|
* Neo4JLanguage
|
659
|
+
* DatabricksLanguage
|
660
|
+
* PandasLanguage
|
659
661
|
|
660
662
|
---
|
661
663
|
### 14 - Window Function
|
@@ -0,0 +1,7 @@
|
|
1
|
+
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
+
sql_blocks/sql_blocks.py,sha256=kM3hBe1P9qDZOORqoX9tTS80EMtEFpBxtOEo8miLEd4,69102
|
3
|
+
sql_blocks-1.25.516999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
+
sql_blocks-1.25.516999999999.dist-info/METADATA,sha256=CHRNGYWScyUPpE2GbRriFkMoELDb8WYNGGEVLlnxa38,22235
|
5
|
+
sql_blocks-1.25.516999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
sql_blocks-1.25.516999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
+
sql_blocks-1.25.516999999999.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
-
sql_blocks/sql_blocks.py,sha256=8msHsR5Ttp8vpCJbhU7wd91IP-TboC0XAc1204kLKXE,65953
|
3
|
-
sql_blocks-1.25.514999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
-
sql_blocks-1.25.514999999999.dist-info/METADATA,sha256=vxHahM3KUO84oALwycgcIdR2szRmrKUo-9RjDZffWhk,22242
|
5
|
-
sql_blocks-1.25.514999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
sql_blocks-1.25.514999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
-
sql_blocks-1.25.514999999999.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{sql_blocks-1.25.514999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/top_level.txt
RENAMED
File without changes
|