sql-blocks 1.25.51999999999__py3-none-any.whl → 1.25.516999999999__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sql_blocks/sql_blocks.py +238 -28
- {sql_blocks-1.25.51999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/METADATA +4 -2
- sql_blocks-1.25.516999999999.dist-info/RECORD +7 -0
- sql_blocks-1.25.51999999999.dist-info/RECORD +0 -7
- {sql_blocks-1.25.51999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/LICENSE +0 -0
- {sql_blocks-1.25.51999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/WHEEL +0 -0
- {sql_blocks-1.25.51999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/top_level.txt +0 -0
sql_blocks/sql_blocks.py
CHANGED
@@ -38,10 +38,8 @@ class SQLObject:
|
|
38
38
|
self.key_field = ''
|
39
39
|
self.set_table(table_name)
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
return
|
44
|
-
cls = SQLObject
|
41
|
+
@classmethod
|
42
|
+
def split_alias(cls, table_name: str) -> tuple:
|
45
43
|
is_file_name = any([
|
46
44
|
'/' in table_name, '.' in table_name
|
47
45
|
])
|
@@ -49,16 +47,21 @@ class SQLObject:
|
|
49
47
|
if is_file_name:
|
50
48
|
ref = table_name.split('/')[-1].split('.')[0]
|
51
49
|
if cls.ALIAS_FUNC:
|
52
|
-
|
50
|
+
return cls.ALIAS_FUNC(ref), table_name
|
53
51
|
elif ' ' in table_name.strip():
|
54
|
-
table_name,
|
52
|
+
table_name, alias = table_name.split()
|
53
|
+
return alias, table_name
|
55
54
|
elif '_' in ref:
|
56
|
-
|
55
|
+
return ''.join(
|
57
56
|
word[0].lower()
|
58
57
|
for word in ref.split('_')
|
59
|
-
)
|
60
|
-
|
61
|
-
|
58
|
+
), table_name
|
59
|
+
return ref.lower()[:3], table_name
|
60
|
+
|
61
|
+
def set_table(self, table_name: str):
|
62
|
+
if not table_name:
|
63
|
+
return
|
64
|
+
self.__alias, table_name = self.split_alias(table_name)
|
62
65
|
self.values.setdefault(FROM, []).append(f'{table_name} {self.alias}')
|
63
66
|
|
64
67
|
@property
|
@@ -99,10 +102,11 @@ class SQLObject:
|
|
99
102
|
for fld in source:
|
100
103
|
result += re.split(r'([=()]|<>|\s+ON\s+|\s+on\s+)', fld)
|
101
104
|
return result
|
102
|
-
def cleanup(
|
105
|
+
def cleanup(text: str) -> str:
|
106
|
+
text = re.sub(r'[\n\t]', ' ', text)
|
103
107
|
if exact:
|
104
|
-
|
105
|
-
return
|
108
|
+
text = text.lower()
|
109
|
+
return text.strip()
|
106
110
|
def field_set(source: list) -> set:
|
107
111
|
return set(
|
108
112
|
(
|
@@ -597,10 +601,11 @@ class Where:
|
|
597
601
|
main.values[FROM].append(f',{query.table_name} {query.alias}')
|
598
602
|
for key in USUAL_KEYS:
|
599
603
|
main.update_values(key, query.values.get(key, []))
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
+
if query.key_field:
|
605
|
+
main.values.setdefault(WHERE, []).append('({a1}.{f1} = {a2}.{f2})'.format(
|
606
|
+
a1=main.alias, f1=name,
|
607
|
+
a2=query.alias, f2=query.key_field
|
608
|
+
))
|
604
609
|
|
605
610
|
def add(self, name: str, main: SQLObject):
|
606
611
|
func_type = FUNCTION_CLASS.get(name.lower())
|
@@ -667,16 +672,14 @@ class Options:
|
|
667
672
|
self.__children: dict = values
|
668
673
|
|
669
674
|
def add(self, logical_separator: str, main: SQLObject):
|
670
|
-
if logical_separator not in ('AND', 'OR'):
|
675
|
+
if logical_separator.upper() not in ('AND', 'OR'):
|
671
676
|
raise ValueError('`logical_separator` must be AND or OR')
|
672
|
-
|
677
|
+
temp = Select(f'{main.table_name} {main.alias}')
|
673
678
|
child: Where
|
674
679
|
for field, child in self.__children.items():
|
675
|
-
|
676
|
-
Field.format(field, main), child.content
|
677
|
-
))
|
680
|
+
child.add(field, temp)
|
678
681
|
main.values.setdefault(WHERE, []).append(
|
679
|
-
'(' + logical_separator.join(
|
682
|
+
'(' + f'\n\t{logical_separator} '.join(temp.values[WHERE]) + ')'
|
680
683
|
)
|
681
684
|
|
682
685
|
|
@@ -777,6 +780,20 @@ class OrderBy(Clause):
|
|
777
780
|
name = cls.format(name, main)
|
778
781
|
main.values.setdefault(ORDER_BY, []).append(name+cls.sort.value)
|
779
782
|
|
783
|
+
@staticmethod
|
784
|
+
def ascending(value: str) -> bool:
|
785
|
+
if re.findall(r'\s+(DESC)\s*$', value):
|
786
|
+
return False
|
787
|
+
return True
|
788
|
+
|
789
|
+
@classmethod
|
790
|
+
def format(cls, name: str, main: SQLObject) -> str:
|
791
|
+
if cls.ascending(name):
|
792
|
+
cls.sort = SortType.ASC
|
793
|
+
else:
|
794
|
+
cls.sort = SortType.DESC
|
795
|
+
return super().format(name, main)
|
796
|
+
|
780
797
|
@classmethod
|
781
798
|
def cls_to_str(cls) -> str:
|
782
799
|
return ORDER_BY
|
@@ -836,8 +853,16 @@ class QueryLanguage:
|
|
836
853
|
has_default = {key: bool(key == SELECT) for key in KEYWORD}
|
837
854
|
|
838
855
|
@staticmethod
|
839
|
-
def remove_alias(
|
840
|
-
|
856
|
+
def remove_alias(text: str) -> str:
|
857
|
+
value, sep = '', ''
|
858
|
+
text = re.sub('[\n\t]', ' ', text)
|
859
|
+
if ':' in text:
|
860
|
+
text, value = text.split(':', maxsplit=1)
|
861
|
+
sep = ':'
|
862
|
+
return '{}{}{}'.format(
|
863
|
+
''.join(re.split(r'\w+[.]', text)),
|
864
|
+
sep, value.replace("'", '"')
|
865
|
+
)
|
841
866
|
|
842
867
|
def join_with_tabs(self, values: list, sep: str='') -> str:
|
843
868
|
sep = sep + self.TABULATION
|
@@ -855,6 +880,8 @@ class QueryLanguage:
|
|
855
880
|
return self.join_with_tabs(values, ' AND ')
|
856
881
|
|
857
882
|
def sort_by(self, values: list) -> str:
|
883
|
+
if OrderBy.sort == SortType.DESC:
|
884
|
+
values[-1] += ' DESC'
|
858
885
|
return self.join_with_tabs(values, ',')
|
859
886
|
|
860
887
|
def set_group(self, values: list) -> str:
|
@@ -905,7 +932,8 @@ class MongoDBLanguage(QueryLanguage):
|
|
905
932
|
LOGICAL_OP_TO_MONGO_FUNC = {
|
906
933
|
'>': '$gt', '>=': '$gte',
|
907
934
|
'<': '$lt', '<=': '$lte',
|
908
|
-
'=': '$eq', '<>': '$ne',
|
935
|
+
'=': '$eq', '<>': '$ne',
|
936
|
+
'like': '$regex', 'LIKE': '$regex',
|
909
937
|
}
|
910
938
|
OPERATORS = '|'.join(op for op in LOGICAL_OP_TO_MONGO_FUNC)
|
911
939
|
REGEX = {
|
@@ -958,7 +986,7 @@ class MongoDBLanguage(QueryLanguage):
|
|
958
986
|
field, *op, const = tokens
|
959
987
|
op = ''.join(op)
|
960
988
|
expr = '{begin}{op}:{const}{end}'.format(
|
961
|
-
begin='{', const=const, end='}',
|
989
|
+
begin='{', const=const.replace('%', '.*'), end='}',
|
962
990
|
op=cls.LOGICAL_OP_TO_MONGO_FUNC[op],
|
963
991
|
)
|
964
992
|
where_list.append(f'{field}:{expr}')
|
@@ -1067,6 +1095,149 @@ class Neo4JLanguage(QueryLanguage):
|
|
1067
1095
|
return ''
|
1068
1096
|
|
1069
1097
|
|
1098
|
+
class DataAnalysisLanguage(QueryLanguage):
|
1099
|
+
def __init__(self, target: 'Select'):
|
1100
|
+
super().__init__(target)
|
1101
|
+
self.aggregation_fields = []
|
1102
|
+
|
1103
|
+
def split_agg_fields(self, values: list) -> list:
|
1104
|
+
AGG_FUNC_REGEX = re.compile(
|
1105
|
+
r'({})[(]'.format(
|
1106
|
+
'|'.join(cls.__name__ for cls in Aggregate.__subclasses__())
|
1107
|
+
),
|
1108
|
+
re.IGNORECASE
|
1109
|
+
)
|
1110
|
+
common_fields = []
|
1111
|
+
for field in values:
|
1112
|
+
field = self.remove_alias(field)
|
1113
|
+
if AGG_FUNC_REGEX.findall(field):
|
1114
|
+
self.aggregation_fields.append(field)
|
1115
|
+
else:
|
1116
|
+
common_fields.append(field)
|
1117
|
+
return common_fields
|
1118
|
+
|
1119
|
+
class DatabricksLanguage(DataAnalysisLanguage):
|
1120
|
+
pattern = '{_from}{where}{group_by}{order_by}{select}{limit}'
|
1121
|
+
has_default = {key: bool(key == SELECT) for key in KEYWORD}
|
1122
|
+
|
1123
|
+
def add_field(self, values: list) -> str:
|
1124
|
+
return super().add_field(
|
1125
|
+
self.split_agg_fields(values)
|
1126
|
+
)
|
1127
|
+
|
1128
|
+
def prefix(self, key: str) -> str:
|
1129
|
+
def get_aggregate() -> str:
|
1130
|
+
return 'AGGREGATE {} '.format(
|
1131
|
+
','.join(self.aggregation_fields)
|
1132
|
+
)
|
1133
|
+
return '{}{}{}{}{}'.format(
|
1134
|
+
self.LINE_BREAK,
|
1135
|
+
'|> ' if key != FROM else '',
|
1136
|
+
get_aggregate() if key == GROUP_BY else '',
|
1137
|
+
key, self.TABULATION
|
1138
|
+
)
|
1139
|
+
|
1140
|
+
|
1141
|
+
class PandasLanguage(DataAnalysisLanguage):
|
1142
|
+
pattern = '{_from}{where}{select}{group_by}{order_by}'
|
1143
|
+
has_default = {key: False for key in KEYWORD}
|
1144
|
+
|
1145
|
+
def add_field(self, values: list) -> str:
|
1146
|
+
def line_field_fmt(field: str) -> str:
|
1147
|
+
return "{}'{}'".format(
|
1148
|
+
self.TABULATION, field
|
1149
|
+
)
|
1150
|
+
common_fields = self.split_agg_fields(values)
|
1151
|
+
if common_fields:
|
1152
|
+
return '[[\n{}\n]]'.format(
|
1153
|
+
','.join(line_field_fmt(fld) for fld in common_fields)
|
1154
|
+
)
|
1155
|
+
return ''
|
1156
|
+
|
1157
|
+
def get_tables(self, values: list) -> str:
|
1158
|
+
result = 'import pandas as pd'
|
1159
|
+
names = {}
|
1160
|
+
for table in values:
|
1161
|
+
table, *join = [t.strip() for t in re.split('JOIN|LEFT|RIGHT|ON', table) if t.strip()]
|
1162
|
+
alias, table = SQLObject.split_alias(table)
|
1163
|
+
result += f"\ndf_{table} = pd.read_csv('{table}.csv')"
|
1164
|
+
names[alias] = table
|
1165
|
+
if join:
|
1166
|
+
a1, f1, a2, f2 = [r.strip() for r in re.split('[().=]', join[-1]) if r]
|
1167
|
+
result += "\n\ndf_{} = pd.merge(\n\tdf_{}, df_{}, left_on='{}', right_on='{}', how='{}'\n)\n".format(
|
1168
|
+
last_table, names[a1], names[a2], f1, f2, 'inner'
|
1169
|
+
)
|
1170
|
+
last_table = table
|
1171
|
+
_, table = SQLObject.split_alias(values[0])
|
1172
|
+
result += f'\ndf = df_{table}\n\ndf = df\n'
|
1173
|
+
return result
|
1174
|
+
|
1175
|
+
def extract_conditions(self, values: list) -> str:
|
1176
|
+
conditions = []
|
1177
|
+
STR_FUNC = {
|
1178
|
+
1: '.str.startswith(',
|
1179
|
+
2: '.str.endswith(',
|
1180
|
+
3: '.str.contains(',
|
1181
|
+
}
|
1182
|
+
for expr in values:
|
1183
|
+
expr = self.remove_alias(expr)
|
1184
|
+
field, op, *const = [t for t in re.split(r'(\w+)', expr) if t.strip()]
|
1185
|
+
if op.upper() == 'LIKE' and len(const) == 3:
|
1186
|
+
level = 0
|
1187
|
+
if '%' in const[0]:
|
1188
|
+
level += 2
|
1189
|
+
if '%' in const[2]:
|
1190
|
+
level += 1
|
1191
|
+
const = f"'{const[1]}'"
|
1192
|
+
op = STR_FUNC[level]
|
1193
|
+
else:
|
1194
|
+
const = ''.join(const)
|
1195
|
+
conditions.append(
|
1196
|
+
f"(df['{field}']{op}{const})"
|
1197
|
+
)
|
1198
|
+
if not conditions:
|
1199
|
+
return ''
|
1200
|
+
return '[\n{}\n]'.format(
|
1201
|
+
'&'.join(f'\t{c}' for c in conditions),
|
1202
|
+
)
|
1203
|
+
|
1204
|
+
def clean_values(self, values: list) -> str:
|
1205
|
+
for i in range(len(values)):
|
1206
|
+
content = self.remove_alias(values[i])
|
1207
|
+
values[i] = f"'{content}'"
|
1208
|
+
return ','.join(values)
|
1209
|
+
|
1210
|
+
def sort_by(self, values: list) -> str:
|
1211
|
+
if not values:
|
1212
|
+
return ''
|
1213
|
+
return '.sort_values(\n{},\n\tascending = {}\n)'.format(
|
1214
|
+
'\t'+self.clean_values(values), OrderBy.ascending(values[-1])
|
1215
|
+
)
|
1216
|
+
|
1217
|
+
def set_group(self, values: list) -> str:
|
1218
|
+
result = '.groupby([\n\t{}\n])'.format(
|
1219
|
+
self.clean_values(values)
|
1220
|
+
)
|
1221
|
+
if self.aggregation_fields:
|
1222
|
+
PANDAS_AGG_FUNC = {'Avg': 'mean', 'Count': 'size'}
|
1223
|
+
result += '.agg({'
|
1224
|
+
for field in self.aggregation_fields:
|
1225
|
+
func, field, *alias = re.split('[()]', field) # [To-Do: Use `alias`]
|
1226
|
+
result += "{}'{}': ['{}']".format(
|
1227
|
+
self.TABULATION, field,
|
1228
|
+
PANDAS_AGG_FUNC.get(func, func)
|
1229
|
+
)
|
1230
|
+
result += '\n})'
|
1231
|
+
return result
|
1232
|
+
|
1233
|
+
def __init__(self, target: 'Select'):
|
1234
|
+
super().__init__(target)
|
1235
|
+
self.result['function'] = 'find'
|
1236
|
+
|
1237
|
+
def prefix(self, key: str):
|
1238
|
+
return ''
|
1239
|
+
|
1240
|
+
|
1070
1241
|
class Parser:
|
1071
1242
|
REGEX = {}
|
1072
1243
|
|
@@ -1422,7 +1593,18 @@ class MongoParser(Parser):
|
|
1422
1593
|
|
1423
1594
|
def begin_conditions(self, value: str):
|
1424
1595
|
self.where_list = {}
|
1596
|
+
self.field_method = self.first_ORfield
|
1425
1597
|
return Where
|
1598
|
+
|
1599
|
+
def first_ORfield(self, text: str):
|
1600
|
+
if text.startswith('$'):
|
1601
|
+
return
|
1602
|
+
found = re.search(r'\w+[:]', text)
|
1603
|
+
if not found:
|
1604
|
+
return
|
1605
|
+
self.field_method = None
|
1606
|
+
p1, p2 = found.span()
|
1607
|
+
self.last_field = text[p1: p2-1]
|
1426
1608
|
|
1427
1609
|
def increment_brackets(self, value: str):
|
1428
1610
|
self.brackets[value] += 1
|
@@ -1431,6 +1613,7 @@ class MongoParser(Parser):
|
|
1431
1613
|
self.method = self.new_query
|
1432
1614
|
self.last_field = ''
|
1433
1615
|
self.where_list = None
|
1616
|
+
self.field_method = None
|
1434
1617
|
self.PARAM_BY_FUNCTION = {
|
1435
1618
|
'find': Where, 'aggregate': GroupBy, 'sort': OrderBy
|
1436
1619
|
}
|
@@ -1460,6 +1643,8 @@ class MongoParser(Parser):
|
|
1460
1643
|
self.close_brackets(
|
1461
1644
|
BRACKET_PAIR[token]
|
1462
1645
|
)
|
1646
|
+
elif self.field_method:
|
1647
|
+
self.field_method(token)
|
1463
1648
|
self.method = self.TOKEN_METHODS.get(token)
|
1464
1649
|
# ----------------------------
|
1465
1650
|
|
@@ -1467,6 +1652,7 @@ class MongoParser(Parser):
|
|
1467
1652
|
class Select(SQLObject):
|
1468
1653
|
join_type: JoinType = JoinType.INNER
|
1469
1654
|
EQUIVALENT_NAMES = {}
|
1655
|
+
DefaultLanguage = QueryLanguage
|
1470
1656
|
|
1471
1657
|
def __init__(self, table_name: str='', **values):
|
1472
1658
|
super().__init__(table_name)
|
@@ -1526,7 +1712,7 @@ class Select(SQLObject):
|
|
1526
1712
|
return query
|
1527
1713
|
|
1528
1714
|
def __str__(self) -> str:
|
1529
|
-
return self.translate_to(
|
1715
|
+
return self.translate_to(self.DefaultLanguage)
|
1530
1716
|
|
1531
1717
|
def __call__(self, **values):
|
1532
1718
|
for name, params in values.items():
|
@@ -1846,3 +2032,27 @@ def detect(text: str, join_queries: bool = True, format: str='') -> Select | lis
|
|
1846
2032
|
result += query
|
1847
2033
|
return result
|
1848
2034
|
# ===========================================================================================//
|
2035
|
+
|
2036
|
+
|
2037
|
+
if __name__ == "__main__":
|
2038
|
+
query = detect('''
|
2039
|
+
SELECT
|
2040
|
+
e.gender, d.region,
|
2041
|
+
Avg(e.age)
|
2042
|
+
FROM
|
2043
|
+
Employees e
|
2044
|
+
LEFT JOIN Department d ON (e.depto_id = d.id)
|
2045
|
+
WHERE
|
2046
|
+
e.name LIKE 'C%'
|
2047
|
+
GROUP BY
|
2048
|
+
e.gender, d.region
|
2049
|
+
ORDER BY
|
2050
|
+
d.region DESC
|
2051
|
+
''')
|
2052
|
+
print('='*50)
|
2053
|
+
print(query)
|
2054
|
+
print('-'*50)
|
2055
|
+
# Select.DefaultLanguage = DatabricksLanguage
|
2056
|
+
Select.DefaultLanguage = PandasLanguage
|
2057
|
+
print(query)
|
2058
|
+
print('='*50)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: sql_blocks
|
3
|
-
Version: 1.25.
|
3
|
+
Version: 1.25.516999999999
|
4
4
|
Summary: Allows you to create objects for parts of SQL query commands. Also to combine these objects by joining them, adding or removing parts...
|
5
5
|
Home-page: https://github.com/julio-cascalles/sql_blocks
|
6
6
|
Author: Júlio Cascalles
|
@@ -652,10 +652,12 @@ Automatically assigns aliases to each side of the relationship (In this example,
|
|
652
652
|
|
653
653
|
---
|
654
654
|
### `translate_to` method
|
655
|
-
|
655
|
+
From a Select object, it returns the text to a script in any of the languages below:
|
656
656
|
* QueryLanguage - default
|
657
657
|
* MongoDBLanguage
|
658
658
|
* Neo4JLanguage
|
659
|
+
* DatabricksLanguage
|
660
|
+
* PandasLanguage
|
659
661
|
|
660
662
|
---
|
661
663
|
### 14 - Window Function
|
@@ -0,0 +1,7 @@
|
|
1
|
+
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
+
sql_blocks/sql_blocks.py,sha256=kM3hBe1P9qDZOORqoX9tTS80EMtEFpBxtOEo8miLEd4,69102
|
3
|
+
sql_blocks-1.25.516999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
+
sql_blocks-1.25.516999999999.dist-info/METADATA,sha256=CHRNGYWScyUPpE2GbRriFkMoELDb8WYNGGEVLlnxa38,22235
|
5
|
+
sql_blocks-1.25.516999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
+
sql_blocks-1.25.516999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
+
sql_blocks-1.25.516999999999.dist-info/RECORD,,
|
@@ -1,7 +0,0 @@
|
|
1
|
-
sql_blocks/__init__.py,sha256=5ItzGCyqqa6kwY8wvF9kapyHsAiWJ7KEXCcC-OtdXKg,37
|
2
|
-
sql_blocks/sql_blocks.py,sha256=ZdCFtPShmn-nHrE2tpJCWMnJYmPsc742CIkrPc_hSs4,61854
|
3
|
-
sql_blocks-1.25.51999999999.dist-info/LICENSE,sha256=6kbiFSfobTZ7beWiKnHpN902HgBx-Jzgcme0SvKqhKY,1091
|
4
|
-
sql_blocks-1.25.51999999999.dist-info/METADATA,sha256=ZK0V4KW5v8VtqFML82WFBbN_NpDN7iHbGjMo09fiRbc,22241
|
5
|
-
sql_blocks-1.25.51999999999.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
6
|
-
sql_blocks-1.25.51999999999.dist-info/top_level.txt,sha256=57AbUvUjYNy4m1EqDaU3WHeP-uyIAfV0n8GAUp1a1YQ,11
|
7
|
-
sql_blocks-1.25.51999999999.dist-info/RECORD,,
|
File without changes
|
File without changes
|
{sql_blocks-1.25.51999999999.dist-info → sql_blocks-1.25.516999999999.dist-info}/top_level.txt
RENAMED
File without changes
|