sql-metadata 2.7.0__tar.gz → 2.18.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,22 @@
1
- Metadata-Version: 2.1
2
- Name: sql-metadata
3
- Version: 2.7.0
1
+ Metadata-Version: 2.4
2
+ Name: sql_metadata
3
+ Version: 2.18.0
4
4
  Summary: Uses tokenized query returned by python-sqlparse and generates query metadata
5
- Home-page: https://github.com/macbre/sql-metadata
6
5
  License: MIT
6
+ License-File: LICENSE
7
7
  Author: Maciej Brencz
8
8
  Author-email: maciej.brencz@gmail.com
9
- Requires-Python: >=3.7.2,<4.0.0
9
+ Requires-Python: >=3.9,<4.0
10
10
  Classifier: License :: OSI Approved :: MIT License
11
11
  Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.8
13
12
  Classifier: Programming Language :: Python :: 3.9
14
13
  Classifier: Programming Language :: Python :: 3.10
15
14
  Classifier: Programming Language :: Python :: 3.11
16
- Requires-Dist: sqlparse (>=0.4.1,<0.5.0)
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Requires-Dist: sqlparse (>=0.4.1,<0.6.0)
19
+ Project-URL: Homepage, https://github.com/macbre/sql-metadata
17
20
  Project-URL: Repository, https://github.com/macbre/sql-metadata
18
21
  Description-Content-Type: text/markdown
19
22
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "sql_metadata"
3
- version = "2.7.0"
3
+ version = "2.18.0"
4
4
  license="MIT"
5
5
  description = "Uses tokenized query returned by python-sqlparse and generates query metadata"
6
6
  authors = ["Maciej Brencz <maciej.brencz@gmail.com>", "Radosław Drążkiewicz <collerek@gmail.com>"]
@@ -13,17 +13,16 @@ packages = [
13
13
  ]
14
14
 
15
15
  [tool.poetry.dependencies]
16
- python = "^3.7.2"
17
- sqlparse = "^0.4.1"
16
+ python = "^3.9"
17
+ sqlparse = ">=0.4.1,<0.6.0"
18
18
 
19
19
  [tool.poetry.dev-dependencies]
20
- black = "^23.1"
21
- coverage = {extras = ["toml"], version = "^6.5"}
22
- pylint = "^2.17.1"
23
- pytest = "^7.2.2"
24
- pytest-cov = "^4.0.0"
25
- coveralls = "^3.3.1"
26
- flake8 = "^5.0.4"
20
+ black = "^25.9"
21
+ coverage = {extras = ["toml"], version = "^7.10"}
22
+ pylint = "^3.3.9"
23
+ pytest = "^8.4.2"
24
+ pytest-cov = "^7.0.0"
25
+ flake8 = "^7.3.0"
27
26
 
28
27
  [build-system]
29
28
  requires = ["poetry-core>=1.0.0"]
@@ -2,6 +2,7 @@
2
2
  Module for parsing sql queries and returning columns,
3
3
  tables, names of with statements etc.
4
4
  """
5
+
5
6
  # pylint:disable=unsubscriptable-object
6
7
  from sql_metadata.parser import Parser
7
8
  from sql_metadata.keywords_lists import QueryType
@@ -11,6 +11,7 @@ into:
11
11
  from sql_metadata.compat import get_query_columns, get_query_tables
12
12
 
13
13
  """
14
+
14
15
  # pylint:disable=missing-function-docstring
15
16
  from typing import List, Optional, Tuple
16
17
 
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Module used to produce generalized sql out of given query
3
3
  """
4
+
4
5
  import re
5
6
  import sqlparse
6
7
 
@@ -47,7 +48,8 @@ class Generalizator:
47
48
  :rtype: str
48
49
  """
49
50
  sql = sqlparse.format(self._raw_query, strip_comments=True)
50
- sql = re.sub(r"\s{2,}", " ", sql)
51
+ sql = sql.replace("\n", " ")
52
+ sql = re.sub(r"[ \t]+", " ", sql)
51
53
  return sql
52
54
 
53
55
  @property
@@ -9,6 +9,7 @@ from enum import Enum
9
9
  KEYWORDS_BEFORE_COLUMNS = {
10
10
  "SELECT",
11
11
  "WHERE",
12
+ "HAVING",
12
13
  "ORDERBY",
13
14
  "GROUPBY",
14
15
  "ON",
@@ -28,6 +29,7 @@ TABLE_ADJUSTMENT_KEYWORDS = {
28
29
  "RIGHTJOIN",
29
30
  "LEFTOUTERJOIN",
30
31
  "RIGHTOUTERJOIN",
32
+ "NATURALJOIN",
31
33
  "INTO",
32
34
  "UPDATE",
33
35
  "TABLE",
@@ -48,6 +50,7 @@ SUBQUERY_PRECEDING_KEYWORDS = {
48
50
  "RIGHTJOIN",
49
51
  "LEFTOUTERJOIN",
50
52
  "RIGHTOUTERJOIN",
53
+ "NATURALJOIN",
51
54
  }
52
55
 
53
56
  # section of a query in which column can exists
@@ -55,12 +58,14 @@ SUBQUERY_PRECEDING_KEYWORDS = {
55
58
  COLUMNS_SECTIONS = {
56
59
  "SELECT": "select",
57
60
  "WHERE": "where",
61
+ "HAVING": "having",
58
62
  "ORDERBY": "order_by",
59
63
  "ON": "join",
60
64
  "USING": "join",
61
65
  "INTO": "insert",
62
66
  "SET": "update",
63
67
  "GROUPBY": "group_by",
68
+ "INNERJOIN": "inner_join",
64
69
  }
65
70
 
66
71
 
@@ -77,6 +82,7 @@ class QueryType(str, Enum):
77
82
  CREATE = "CREATE TABLE"
78
83
  ALTER = "ALTER TABLE"
79
84
  DROP = "DROP TABLE"
85
+ TRUNCATE = "TRUNCATE TABLE"
80
86
 
81
87
 
82
88
  class TokenType(str, Enum):
@@ -102,8 +108,11 @@ SUPPORTED_QUERY_TYPES = {
102
108
  "DELETE": QueryType.DELETE,
103
109
  "WITH": QueryType.SELECT,
104
110
  "CREATETABLE": QueryType.CREATE,
111
+ "CREATETEMPORARY": QueryType.CREATE,
105
112
  "ALTERTABLE": QueryType.ALTER,
106
113
  "DROPTABLE": QueryType.DROP,
114
+ "CREATEFUNCTION": QueryType.CREATE,
115
+ "TRUNCATETABLE": QueryType.TRUNCATE,
107
116
  }
108
117
 
109
118
  # all the keywords we care for - rest is ignored in assigning
@@ -118,6 +127,7 @@ RELEVANT_KEYWORDS = {
118
127
  "RETURNING",
119
128
  "VALUES",
120
129
  "INDEX",
130
+ "KEY",
121
131
  "WITH",
122
132
  "WINDOW",
123
133
  }
@@ -67,6 +67,7 @@ class Parser: # pylint: disable=R0902
67
67
  self._nested_level = 0
68
68
  self._parenthesis_level = 0
69
69
  self._open_parentheses: List[SQLToken] = []
70
+ self._preceded_keywords: List[SQLToken] = []
70
71
  self._aliases_to_check = None
71
72
  self._is_in_nested_function = False
72
73
  self._is_in_with_block = False
@@ -113,25 +114,32 @@ class Parser: # pylint: disable=R0902
113
114
  )
114
115
  .position
115
116
  )
116
- if tokens[index].normalized in ["CREATE", "ALTER", "DROP"]:
117
+ if tokens[index].normalized == "CREATE":
118
+ switch = self._get_switch_by_create_query(tokens, index)
119
+ elif tokens[index].normalized in ("ALTER", "DROP", "TRUNCATE"):
117
120
  switch = tokens[index].normalized + tokens[index + 1].normalized
118
121
  else:
119
122
  switch = tokens[index].normalized
120
123
  self._query_type = SUPPORTED_QUERY_TYPES.get(switch, "UNSUPPORTED")
121
124
  if self._query_type == "UNSUPPORTED":
122
- self._logger.error("Not supported query type: %s", self._raw_query)
125
+ # do not log the full query
126
+ # https://github.com/macbre/sql-metadata/issues/543
127
+ shorten_query = " ".join(self._raw_query.split(" ")[:3])
128
+
129
+ self._logger.error("Not supported query type: %s", shorten_query)
123
130
  raise ValueError("Not supported query type!")
124
131
  return self._query_type
125
132
 
126
133
  @property
127
- def tokens(self) -> List[SQLToken]:
134
+ def tokens(self) -> List[SQLToken]: # noqa: C901
128
135
  """
129
136
  Tokenizes the query
130
137
  """
131
138
  if self._tokens is not None:
132
139
  return self._tokens
133
140
 
134
- parsed = sqlparse.parse(self._query)
141
+ # allow parser to be overriden
142
+ parsed = self._parse(self._query)
135
143
  tokens = []
136
144
  # handle empty queries (#12)
137
145
  if not parsed:
@@ -164,6 +172,8 @@ class Parser: # pylint: disable=R0902
164
172
  elif token.is_right_parenthesis:
165
173
  token.token_type = TokenType.PARENTHESIS
166
174
  self._determine_closing_parenthesis_type(token=token)
175
+ if token.is_subquery_end:
176
+ last_keyword = self._preceded_keywords.pop()
167
177
 
168
178
  last_keyword = self._determine_last_relevant_keyword(
169
179
  token=token, last_keyword=last_keyword
@@ -214,7 +224,7 @@ class Parser: # pylint: disable=R0902
214
224
  self._handle_column_save(token=token, columns=columns)
215
225
 
216
226
  elif token.is_column_name_inside_insert_clause:
217
- column = str(token.value).strip("`")
227
+ column = str(token.value)
218
228
  self._add_to_columns_subsection(
219
229
  keyword=token.last_keyword_normalized, column=column
220
230
  )
@@ -357,9 +367,15 @@ class Parser: # pylint: disable=R0902
357
367
  )
358
368
  ):
359
369
  continue
360
- table_name = str(token.value.strip("`"))
370
+
371
+ # handle INSERT INTO ON DUPLICATE KEY UPDATE queries
372
+ if (
373
+ token.last_keyword_normalized == "UPDATE"
374
+ and self.query_type == "INSERT"
375
+ ):
376
+ continue
361
377
  token.token_type = TokenType.TABLE
362
- tables.append(table_name)
378
+ tables.append(str(token.value))
363
379
 
364
380
  self._tables = tables - with_names
365
381
  return self._tables
@@ -382,8 +398,12 @@ class Parser: # pylint: disable=R0902
382
398
  elif token.last_keyword_normalized == "OFFSET":
383
399
  # OFFSET <offset>
384
400
  offset = int(token.value)
385
- elif token.previous_token.is_punctuation:
401
+ elif (
402
+ token.previous_token.is_punctuation
403
+ and token.last_keyword_normalized == "LIMIT"
404
+ ):
386
405
  # LIMIT <offset>,<limit>
406
+ # enter this condition only when the limit has already been parsed
387
407
  offset = limit
388
408
  limit = int(token.value)
389
409
 
@@ -448,7 +468,8 @@ class Parser: # pylint: disable=R0902
448
468
  while token.next_token and not token.is_with_query_end:
449
469
  token = token.next_token
450
470
  is_end_of_with_block = (
451
- token.next_token_not_comment.normalized
471
+ token.next_token_not_comment is None
472
+ or token.next_token_not_comment.normalized
452
473
  in WITH_ENDING_KEYWORDS
453
474
  )
454
475
  if is_end_of_with_block:
@@ -489,7 +510,7 @@ class Parser: # pylint: disable=R0902
489
510
  True, value_attribute="is_with_query_end", direction="right"
490
511
  )
491
512
  query_token = with_start.next_token
492
- while query_token != with_end:
513
+ while query_token is not None and query_token != with_end:
493
514
  current_with_query.append(query_token)
494
515
  query_token = query_token.next_token
495
516
  with_query_text = "".join([x.stringified_token for x in current_with_query])
@@ -518,12 +539,16 @@ class Parser: # pylint: disable=R0902
518
539
  ):
519
540
  current_subquery.append(inner_token)
520
541
  inner_token = inner_token.next_token
542
+
543
+ query_name = None
521
544
  if inner_token.next_token.value in self.subqueries_names:
522
545
  query_name = inner_token.next_token.value
523
- else:
546
+ elif inner_token.next_token.is_as_keyword:
524
547
  query_name = inner_token.next_token.next_token.value
548
+
525
549
  subquery_text = "".join([x.stringified_token for x in current_subquery])
526
- subqueries[query_name] = subquery_text
550
+ if query_name is not None:
551
+ subqueries[query_name] = subquery_text
527
552
 
528
553
  token = token.next_token
529
554
 
@@ -607,7 +632,7 @@ class Parser: # pylint: disable=R0902
607
632
  """
608
633
  Removes comments from SQL query
609
634
  """
610
- return Generalizator(self.query).without_comments
635
+ return Generalizator(self._raw_query).without_comments
611
636
 
612
637
  @property
613
638
  def generalize(self) -> str:
@@ -648,6 +673,10 @@ class Parser: # pylint: disable=R0902
648
673
  token.is_with_columns_end = True
649
674
  token.is_nested_function_end = False
650
675
  start_token = token.find_nearest_token("(")
676
+ # like: with (col1, col2) as (subquery) as ..., it enters an infinite loop.
677
+ # return exception
678
+ if start_token.is_with_query_start:
679
+ raise ValueError("This query is wrong")
651
680
  start_token.is_with_columns_start = True
652
681
  start_token.is_nested_function_start = False
653
682
  prev_token = start_token.previous_token
@@ -783,7 +812,8 @@ class Parser: # pylint: disable=R0902
783
812
  return column if isinstance(column, list) else [column]
784
813
 
785
814
  @staticmethod
786
- def _resolve_nested_query(
815
+ # pylint:disable=too-many-return-statements
816
+ def _resolve_nested_query( # noqa: C901
787
817
  subquery_alias: str,
788
818
  nested_queries_names: List[str],
789
819
  nested_queries: Dict,
@@ -819,6 +849,9 @@ class Parser: # pylint: disable=R0902
819
849
  # handle case when column name is used but subquery select all by wildcard
820
850
  if "*" in subparser.columns:
821
851
  return column_name
852
+ for table in subparser.tables:
853
+ if f"{table}.*" in subparser.columns:
854
+ return column_name
822
855
  raise exc # pragma: no cover
823
856
  resolved_column = subparser.columns[column_index]
824
857
  return [resolved_column]
@@ -844,12 +877,13 @@ class Parser: # pylint: disable=R0902
844
877
  # inside subquery / derived table
845
878
  token.is_subquery_start = True
846
879
  self._subquery_level += 1
880
+ self._preceded_keywords.append(token.last_keyword_normalized)
847
881
  token.subquery_level = self._subquery_level
848
882
  elif token.previous_token.normalized in KEYWORDS_BEFORE_COLUMNS.union({","}):
849
883
  # we are in columns and in a column subquery definition
850
884
  token.is_column_definition_start = True
851
885
  elif (
852
- token.previous_token.is_as_keyword
886
+ token.previous_token_not_comment.is_as_keyword
853
887
  and token.last_keyword_normalized != "WINDOW"
854
888
  ):
855
889
  # window clause also contains AS keyword, but it is not a query
@@ -952,12 +986,13 @@ class Parser: # pylint: disable=R0902
952
986
  # as double quotes are not properly handled in sqlparse
953
987
  query = re.sub(r"'.*?'", replace_quotes_in_string, self._raw_query)
954
988
  query = re.sub(r'"([^`]+?)"', r"`\1`", query)
955
- query = re.sub(r'"([^`]+?)"\."([^`]+?)"', r"`\1`.`\2`", query)
956
989
  query = re.sub(r"'.*?'", replace_back_quotes_in_string, query)
957
990
 
958
991
  return query
959
992
 
960
993
  def _determine_last_relevant_keyword(self, token: SQLToken, last_keyword: str):
994
+ if token.value == "," and token.last_keyword_normalized == "ON":
995
+ return "FROM"
961
996
  if token.is_keyword and "".join(token.normalized.split()) in RELEVANT_KEYWORDS:
962
997
  if (
963
998
  not (
@@ -981,6 +1016,8 @@ class Parser: # pylint: disable=R0902
981
1016
  Checks if token is a part of complex identifier like
982
1017
  <schema>.<table>.<column> or <table/sub_query>.<column>
983
1018
  """
1019
+ if token.is_keyword:
1020
+ return False
984
1021
  return str(token) == "." or (
985
1022
  index + 1 < self.tokens_length
986
1023
  and str(self.non_empty_tokens[index + 1]) == "."
@@ -994,16 +1031,19 @@ class Parser: # pylint: disable=R0902
994
1031
  is_complex = True
995
1032
  while is_complex:
996
1033
  value, is_complex = self._combine_tokens(index=index, value=value)
997
- index = index - 2
1034
+ index = index - 1
998
1035
  token.value = value
999
1036
 
1000
1037
  def _combine_tokens(self, index: int, value: str) -> Tuple[str, bool]:
1001
1038
  """
1002
1039
  Checks if complex identifier is longer and follows back until it's finished
1003
1040
  """
1004
- if index > 1 and str(self.non_empty_tokens[index - 1]) == ".":
1005
- prev_value = self.non_empty_tokens[index - 2].value.strip("`").strip('"')
1006
- value = f"{prev_value}.{value}"
1041
+ if index > 1:
1042
+ prev_value = self.non_empty_tokens[index - 1]
1043
+ if not self._is_token_part_of_complex_identifier(prev_value, index - 1):
1044
+ return value, False
1045
+ prev_value = str(prev_value).strip("`")
1046
+ value = f"{prev_value}{value}"
1007
1047
  return value, True
1008
1048
  return value, False
1009
1049
 
@@ -1053,3 +1093,26 @@ class Parser: # pylint: disable=R0902
1053
1093
  yield tok
1054
1094
  else:
1055
1095
  yield token
1096
+
1097
+ @staticmethod
1098
+ def _get_switch_by_create_query(tokens: List[SQLToken], index: int) -> str:
1099
+ """
1100
+ Return the switch that creates query type.
1101
+ """
1102
+ switch = tokens[index].normalized + tokens[index + 1].normalized
1103
+
1104
+ # Hive CREATE FUNCTION
1105
+ if any(
1106
+ index + i < len(tokens) and tokens[index + i].normalized == "FUNCTION"
1107
+ for i in (1, 2)
1108
+ ):
1109
+ switch = "CREATEFUNCTION"
1110
+
1111
+ return switch
1112
+
1113
+ @staticmethod
1114
+ def _parse(sql: str) -> Tuple[sqlparse.sql.Statement]:
1115
+ """
1116
+ Parse the SQL query using sqlparse library
1117
+ """
1118
+ return sqlparse.parse(sql)
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Module contains internal SQLToken that creates linked list
3
3
  """
4
+
4
5
  from typing import Dict, List, Union
5
6
 
6
7
  import sqlparse.sql
@@ -186,7 +187,7 @@ class SQLToken: # pylint: disable=R0902, R0904
186
187
  """
187
188
  return (
188
189
  self.next_token.normalized in [",", "FROM"]
189
- and self.previous_token.normalized not in [",", ".", "(", "SELECT"]
190
+ and self.previous_token.normalized not in ["*", ",", ".", "(", "SELECT"]
190
191
  and not self.previous_token.is_keyword
191
192
  and (
192
193
  self.last_keyword_normalized == "SELECT"
@@ -369,6 +370,15 @@ class SQLToken: # pylint: disable=R0902, R0904
369
370
  return self.next_token.next_token_not_comment
370
371
  return self.next_token
371
372
 
373
+ @property
374
+ def previous_token_not_comment(self):
375
+ """
376
+ Property returning previous non-comment token
377
+ """
378
+ if self.previous_token and self.previous_token.is_comment:
379
+ return self.previous_token.previous_token_not_comment
380
+ return self.previous_token
381
+
372
382
  def is_constraint_definition_inside_create_table_clause(
373
383
  self, query_type: str
374
384
  ) -> bool:
@@ -1,6 +1,7 @@
1
1
  """
2
2
  Module with various utils
3
3
  """
4
+
4
5
  from typing import Any, List, Sequence
5
6
 
6
7
 
File without changes
File without changes