sql-metadata 2.8.0__tar.gz → 2.18.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,22 @@
1
- Metadata-Version: 2.1
2
- Name: sql-metadata
3
- Version: 2.8.0
1
+ Metadata-Version: 2.4
2
+ Name: sql_metadata
3
+ Version: 2.18.0
4
4
  Summary: Uses tokenized query returned by python-sqlparse and generates query metadata
5
- Home-page: https://github.com/macbre/sql-metadata
6
5
  License: MIT
6
+ License-File: LICENSE
7
7
  Author: Maciej Brencz
8
8
  Author-email: maciej.brencz@gmail.com
9
- Requires-Python: >=3.7.2,<4.0.0
9
+ Requires-Python: >=3.9,<4.0
10
10
  Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.8
13
12
  Classifier: Programming Language :: Python :: 3.9
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
16
- Requires-Dist: sqlparse (>=0.4.1,<0.5.0)
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Requires-Dist: sqlparse (>=0.4.1,<0.6.0)
19
+ Project-URL: Homepage, https://github.com/macbre/sql-metadata
17
20
  Project-URL: Repository, https://github.com/macbre/sql-metadata
18
21
  Description-Content-Type: text/markdown
19
22
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sql_metadata"
3
- version = "2.8.0"
3
+ version = "2.18.0"
4
4
  license="MIT"
5
5
  description = "Uses tokenized query returned by python-sqlparse and generates query metadata"
6
6
  authors = ["Maciej Brencz <maciej.brencz@gmail.com>", "Radosław Drążkiewicz <collerek@gmail.com>"]
@@ -13,17 +13,16 @@ packages = [
13
13
  ]
14
14
 
15
15
  [tool.poetry.dependencies]
16
- python = "^3.7.2"
17
- sqlparse = "^0.4.1"
16
+ python = "^3.9"
17
+ sqlparse = ">=0.4.1,<0.6.0"
18
18
 
19
19
  [tool.poetry.dev-dependencies]
20
- black = "^23.3"
21
- coverage = {extras = ["toml"], version = "^6.5"}
22
- pylint = "^2.17.2"
23
- pytest = "^7.3.1"
24
- pytest-cov = "^4.0.0"
25
- coveralls = "^3.3.1"
26
- flake8 = "^5.0.4"
20
+ black = "^25.9"
21
+ coverage = {extras = ["toml"], version = "^7.10"}
22
+ pylint = "^3.3.9"
23
+ pytest = "^8.4.2"
24
+ pytest-cov = "^7.0.0"
25
+ flake8 = "^7.3.0"
27
26
 
28
27
  [build-system]
29
28
  requires = ["poetry-core>=1.0.0"]
@@ -2,6 +2,7 @@
2
2
  Module for parsing sql queries and returning columns,
3
3
  tables, names of with statements etc.
4
4
  """
5
+
5
6
  # pylint:disable=unsubscriptable-object
6
7
  from sql_metadata.parser import Parser
7
8
  from sql_metadata.keywords_lists import QueryType
@@ -11,6 +11,7 @@ into:
11
11
  from sql_metadata.compat import get_query_columns, get_query_tables
12
12
 
13
13
  """
14
+
14
15
  # pylint:disable=missing-function-docstring
15
16
  from typing import List, Optional, Tuple
16
17
 
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Module used to produce generalized sql out of given query
3
3
  """
4
+
4
5
  import re
5
6
  import sqlparse
6
7
 
@@ -47,7 +48,8 @@ class Generalizator:
47
48
  :rtype: str
48
49
  """
49
50
  sql = sqlparse.format(self._raw_query, strip_comments=True)
50
- sql = re.sub(r"\s{2,}", " ", sql)
51
+ sql = sql.replace("\n", " ")
52
+ sql = re.sub(r"[ \t]+", " ", sql)
51
53
  return sql
52
54
 
53
55
  @property
@@ -9,6 +9,7 @@ from enum import Enum
9
9
  KEYWORDS_BEFORE_COLUMNS = {
10
10
  "SELECT",
11
11
  "WHERE",
12
+ "HAVING",
12
13
  "ORDERBY",
13
14
  "GROUPBY",
14
15
  "ON",
@@ -28,6 +29,7 @@ TABLE_ADJUSTMENT_KEYWORDS = {
28
29
  "RIGHTJOIN",
29
30
  "LEFTOUTERJOIN",
30
31
  "RIGHTOUTERJOIN",
32
+ "NATURALJOIN",
31
33
  "INTO",
32
34
  "UPDATE",
33
35
  "TABLE",
@@ -48,6 +50,7 @@ SUBQUERY_PRECEDING_KEYWORDS = {
48
50
  "RIGHTJOIN",
49
51
  "LEFTOUTERJOIN",
50
52
  "RIGHTOUTERJOIN",
53
+ "NATURALJOIN",
51
54
  }
52
55
 
53
56
  # section of a query in which column can exists
@@ -55,12 +58,14 @@ SUBQUERY_PRECEDING_KEYWORDS = {
55
58
  COLUMNS_SECTIONS = {
56
59
  "SELECT": "select",
57
60
  "WHERE": "where",
61
+ "HAVING": "having",
58
62
  "ORDERBY": "order_by",
59
63
  "ON": "join",
60
64
  "USING": "join",
61
65
  "INTO": "insert",
62
66
  "SET": "update",
63
67
  "GROUPBY": "group_by",
68
+ "INNERJOIN": "inner_join",
64
69
  }
65
70
 
66
71
 
@@ -77,6 +82,7 @@ class QueryType(str, Enum):
77
82
  CREATE = "CREATE TABLE"
78
83
  ALTER = "ALTER TABLE"
79
84
  DROP = "DROP TABLE"
85
+ TRUNCATE = "TRUNCATE TABLE"
80
86
 
81
87
 
82
88
  class TokenType(str, Enum):
@@ -102,8 +108,11 @@ SUPPORTED_QUERY_TYPES = {
102
108
  "DELETE": QueryType.DELETE,
103
109
  "WITH": QueryType.SELECT,
104
110
  "CREATETABLE": QueryType.CREATE,
111
+ "CREATETEMPORARY": QueryType.CREATE,
105
112
  "ALTERTABLE": QueryType.ALTER,
106
113
  "DROPTABLE": QueryType.DROP,
114
+ "CREATEFUNCTION": QueryType.CREATE,
115
+ "TRUNCATETABLE": QueryType.TRUNCATE,
107
116
  }
108
117
 
109
118
  # all the keywords we care for - rest is ignored in assigning
@@ -118,6 +127,7 @@ RELEVANT_KEYWORDS = {
118
127
  "RETURNING",
119
128
  "VALUES",
120
129
  "INDEX",
130
+ "KEY",
121
131
  "WITH",
122
132
  "WINDOW",
123
133
  }
@@ -67,6 +67,7 @@ class Parser: # pylint: disable=R0902
67
67
  self._nested_level = 0
68
68
  self._parenthesis_level = 0
69
69
  self._open_parentheses: List[SQLToken] = []
70
+ self._preceded_keywords: List[SQLToken] = []
70
71
  self._aliases_to_check = None
71
72
  self._is_in_nested_function = False
72
73
  self._is_in_with_block = False
@@ -113,25 +114,32 @@ class Parser: # pylint: disable=R0902
113
114
  )
114
115
  .position
115
116
  )
116
- if tokens[index].normalized in ["CREATE", "ALTER", "DROP"]:
117
+ if tokens[index].normalized == "CREATE":
118
+ switch = self._get_switch_by_create_query(tokens, index)
119
+ elif tokens[index].normalized in ("ALTER", "DROP", "TRUNCATE"):
117
120
  switch = tokens[index].normalized + tokens[index + 1].normalized
118
121
  else:
119
122
  switch = tokens[index].normalized
120
123
  self._query_type = SUPPORTED_QUERY_TYPES.get(switch, "UNSUPPORTED")
121
124
  if self._query_type == "UNSUPPORTED":
122
- self._logger.error("Not supported query type: %s", self._raw_query)
125
+ # do not log the full query
126
+ # https://github.com/macbre/sql-metadata/issues/543
127
+ shorten_query = " ".join(self._raw_query.split(" ")[:3])
128
+
129
+ self._logger.error("Not supported query type: %s", shorten_query)
123
130
  raise ValueError("Not supported query type!")
124
131
  return self._query_type
125
132
 
126
133
  @property
127
- def tokens(self) -> List[SQLToken]:
134
+ def tokens(self) -> List[SQLToken]: # noqa: C901
128
135
  """
129
136
  Tokenizes the query
130
137
  """
131
138
  if self._tokens is not None:
132
139
  return self._tokens
133
140
 
134
- parsed = sqlparse.parse(self._query)
141
+ # allow parser to be overriden
142
+ parsed = self._parse(self._query)
135
143
  tokens = []
136
144
  # handle empty queries (#12)
137
145
  if not parsed:
@@ -164,6 +172,8 @@ class Parser: # pylint: disable=R0902
164
172
  elif token.is_right_parenthesis:
165
173
  token.token_type = TokenType.PARENTHESIS
166
174
  self._determine_closing_parenthesis_type(token=token)
175
+ if token.is_subquery_end:
176
+ last_keyword = self._preceded_keywords.pop()
167
177
 
168
178
  last_keyword = self._determine_last_relevant_keyword(
169
179
  token=token, last_keyword=last_keyword
@@ -214,7 +224,7 @@ class Parser: # pylint: disable=R0902
214
224
  self._handle_column_save(token=token, columns=columns)
215
225
 
216
226
  elif token.is_column_name_inside_insert_clause:
217
- column = str(token.value).strip("`")
227
+ column = str(token.value)
218
228
  self._add_to_columns_subsection(
219
229
  keyword=token.last_keyword_normalized, column=column
220
230
  )
@@ -364,10 +374,8 @@ class Parser: # pylint: disable=R0902
364
374
  and self.query_type == "INSERT"
365
375
  ):
366
376
  continue
367
-
368
- table_name = str(token.value.strip("`"))
369
377
  token.token_type = TokenType.TABLE
370
- tables.append(table_name)
378
+ tables.append(str(token.value))
371
379
 
372
380
  self._tables = tables - with_names
373
381
  return self._tables
@@ -460,7 +468,8 @@ class Parser: # pylint: disable=R0902
460
468
  while token.next_token and not token.is_with_query_end:
461
469
  token = token.next_token
462
470
  is_end_of_with_block = (
463
- token.next_token_not_comment.normalized
471
+ token.next_token_not_comment is None
472
+ or token.next_token_not_comment.normalized
464
473
  in WITH_ENDING_KEYWORDS
465
474
  )
466
475
  if is_end_of_with_block:
@@ -501,7 +510,7 @@ class Parser: # pylint: disable=R0902
501
510
  True, value_attribute="is_with_query_end", direction="right"
502
511
  )
503
512
  query_token = with_start.next_token
504
- while query_token != with_end:
513
+ while query_token is not None and query_token != with_end:
505
514
  current_with_query.append(query_token)
506
515
  query_token = query_token.next_token
507
516
  with_query_text = "".join([x.stringified_token for x in current_with_query])
@@ -530,12 +539,16 @@ class Parser: # pylint: disable=R0902
530
539
  ):
531
540
  current_subquery.append(inner_token)
532
541
  inner_token = inner_token.next_token
542
+
543
+ query_name = None
533
544
  if inner_token.next_token.value in self.subqueries_names:
534
545
  query_name = inner_token.next_token.value
535
- else:
546
+ elif inner_token.next_token.is_as_keyword:
536
547
  query_name = inner_token.next_token.next_token.value
548
+
537
549
  subquery_text = "".join([x.stringified_token for x in current_subquery])
538
- subqueries[query_name] = subquery_text
550
+ if query_name is not None:
551
+ subqueries[query_name] = subquery_text
539
552
 
540
553
  token = token.next_token
541
554
 
@@ -619,7 +632,7 @@ class Parser: # pylint: disable=R0902
619
632
  """
620
633
  Removes comments from SQL query
621
634
  """
622
- return Generalizator(self.query).without_comments
635
+ return Generalizator(self._raw_query).without_comments
623
636
 
624
637
  @property
625
638
  def generalize(self) -> str:
@@ -660,6 +673,10 @@ class Parser: # pylint: disable=R0902
660
673
  token.is_with_columns_end = True
661
674
  token.is_nested_function_end = False
662
675
  start_token = token.find_nearest_token("(")
676
+ # like: with (col1, col2) as (subquery) as ..., it enters an infinite loop.
677
+ # return exception
678
+ if start_token.is_with_query_start:
679
+ raise ValueError("This query is wrong")
663
680
  start_token.is_with_columns_start = True
664
681
  start_token.is_nested_function_start = False
665
682
  prev_token = start_token.previous_token
@@ -795,7 +812,8 @@ class Parser: # pylint: disable=R0902
795
812
  return column if isinstance(column, list) else [column]
796
813
 
797
814
  @staticmethod
798
- def _resolve_nested_query(
815
+ # pylint:disable=too-many-return-statements
816
+ def _resolve_nested_query( # noqa: C901
799
817
  subquery_alias: str,
800
818
  nested_queries_names: List[str],
801
819
  nested_queries: Dict,
@@ -831,6 +849,9 @@ class Parser: # pylint: disable=R0902
831
849
  # handle case when column name is used but subquery select all by wildcard
832
850
  if "*" in subparser.columns:
833
851
  return column_name
852
+ for table in subparser.tables:
853
+ if f"{table}.*" in subparser.columns:
854
+ return column_name
834
855
  raise exc # pragma: no cover
835
856
  resolved_column = subparser.columns[column_index]
836
857
  return [resolved_column]
@@ -856,12 +877,13 @@ class Parser: # pylint: disable=R0902
856
877
  # inside subquery / derived table
857
878
  token.is_subquery_start = True
858
879
  self._subquery_level += 1
880
+ self._preceded_keywords.append(token.last_keyword_normalized)
859
881
  token.subquery_level = self._subquery_level
860
882
  elif token.previous_token.normalized in KEYWORDS_BEFORE_COLUMNS.union({","}):
861
883
  # we are in columns and in a column subquery definition
862
884
  token.is_column_definition_start = True
863
885
  elif (
864
- token.previous_token.is_as_keyword
886
+ token.previous_token_not_comment.is_as_keyword
865
887
  and token.last_keyword_normalized != "WINDOW"
866
888
  ):
867
889
  # window clause also contains AS keyword, but it is not a query
@@ -964,12 +986,13 @@ class Parser: # pylint: disable=R0902
964
986
  # as double quotes are not properly handled in sqlparse
965
987
  query = re.sub(r"'.*?'", replace_quotes_in_string, self._raw_query)
966
988
  query = re.sub(r'"([^`]+?)"', r"`\1`", query)
967
- query = re.sub(r'"([^`]+?)"\."([^`]+?)"', r"`\1`.`\2`", query)
968
989
  query = re.sub(r"'.*?'", replace_back_quotes_in_string, query)
969
990
 
970
991
  return query
971
992
 
972
993
  def _determine_last_relevant_keyword(self, token: SQLToken, last_keyword: str):
994
+ if token.value == "," and token.last_keyword_normalized == "ON":
995
+ return "FROM"
973
996
  if token.is_keyword and "".join(token.normalized.split()) in RELEVANT_KEYWORDS:
974
997
  if (
975
998
  not (
@@ -993,6 +1016,8 @@ class Parser: # pylint: disable=R0902
993
1016
  Checks if token is a part of complex identifier like
994
1017
  <schema>.<table>.<column> or <table/sub_query>.<column>
995
1018
  """
1019
+ if token.is_keyword:
1020
+ return False
996
1021
  return str(token) == "." or (
997
1022
  index + 1 < self.tokens_length
998
1023
  and str(self.non_empty_tokens[index + 1]) == "."
@@ -1006,16 +1031,19 @@ class Parser: # pylint: disable=R0902
1006
1031
  is_complex = True
1007
1032
  while is_complex:
1008
1033
  value, is_complex = self._combine_tokens(index=index, value=value)
1009
- index = index - 2
1034
+ index = index - 1
1010
1035
  token.value = value
1011
1036
 
1012
1037
  def _combine_tokens(self, index: int, value: str) -> Tuple[str, bool]:
1013
1038
  """
1014
1039
  Checks if complex identifier is longer and follows back until it's finished
1015
1040
  """
1016
- if index > 1 and str(self.non_empty_tokens[index - 1]) == ".":
1017
- prev_value = self.non_empty_tokens[index - 2].value.strip("`").strip('"')
1018
- value = f"{prev_value}.{value}"
1041
+ if index > 1:
1042
+ prev_value = self.non_empty_tokens[index - 1]
1043
+ if not self._is_token_part_of_complex_identifier(prev_value, index - 1):
1044
+ return value, False
1045
+ prev_value = str(prev_value).strip("`")
1046
+ value = f"{prev_value}{value}"
1019
1047
  return value, True
1020
1048
  return value, False
1021
1049
 
@@ -1065,3 +1093,26 @@ class Parser: # pylint: disable=R0902
1065
1093
  yield tok
1066
1094
  else:
1067
1095
  yield token
1096
+
1097
+ @staticmethod
1098
+ def _get_switch_by_create_query(tokens: List[SQLToken], index: int) -> str:
1099
+ """
1100
+ Return the switch that creates query type.
1101
+ """
1102
+ switch = tokens[index].normalized + tokens[index + 1].normalized
1103
+
1104
+ # Hive CREATE FUNCTION
1105
+ if any(
1106
+ index + i < len(tokens) and tokens[index + i].normalized == "FUNCTION"
1107
+ for i in (1, 2)
1108
+ ):
1109
+ switch = "CREATEFUNCTION"
1110
+
1111
+ return switch
1112
+
1113
+ @staticmethod
1114
+ def _parse(sql: str) -> Tuple[sqlparse.sql.Statement]:
1115
+ """
1116
+ Parse the SQL query using sqlparse library
1117
+ """
1118
+ return sqlparse.parse(sql)
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Module contains internal SQLToken that creates linked list
3
3
  """
4
+
4
5
  from typing import Dict, List, Union
5
6
 
6
7
  import sqlparse.sql
@@ -186,7 +187,7 @@ class SQLToken: # pylint: disable=R0902, R0904
186
187
  """
187
188
  return (
188
189
  self.next_token.normalized in [",", "FROM"]
189
- and self.previous_token.normalized not in [",", ".", "(", "SELECT"]
190
+ and self.previous_token.normalized not in ["*", ",", ".", "(", "SELECT"]
190
191
  and not self.previous_token.is_keyword
191
192
  and (
192
193
  self.last_keyword_normalized == "SELECT"
@@ -369,6 +370,15 @@ class SQLToken: # pylint: disable=R0902, R0904
369
370
  return self.next_token.next_token_not_comment
370
371
  return self.next_token
371
372
 
373
+ @property
374
+ def previous_token_not_comment(self):
375
+ """
376
+ Property returning previous non-comment token
377
+ """
378
+ if self.previous_token and self.previous_token.is_comment:
379
+ return self.previous_token.previous_token_not_comment
380
+ return self.previous_token
381
+
372
382
  def is_constraint_definition_inside_create_table_clause(
373
383
  self, query_type: str
374
384
  ) -> bool:
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Module with various utils
3
3
  """
4
+
4
5
  from typing import Any, List, Sequence
5
6
 
6
7
 
File without changes
File without changes