ygo 1.0.10__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ygo might be problematic. Click here for more details.
- ygo/__init__.py +7 -4
- ygo/delay.py +89 -0
- ygo/{ygo.py → pool.py} +41 -215
- ygo/utils.py +137 -0
- ygo-1.1.0.dist-info/METADATA +160 -0
- ygo-1.1.0.dist-info/RECORD +12 -0
- {ygo-1.0.10.dist-info → ygo-1.1.0.dist-info}/WHEEL +1 -1
- {ygo-1.0.10.dist-info → ygo-1.1.0.dist-info}/top_level.txt +0 -1
- ylog/__init__.py +2 -0
- ycat/__init__.py +0 -33
- ycat/client.py +0 -172
- ycat/parse.py +0 -64
- ycat/qdf/__init__.py +0 -530
- ycat/qdf/errors.py +0 -65
- ycat/qdf/expr.py +0 -308
- ycat/qdf/qdf.py +0 -180
- ycat/qdf/udf/__init__.py +0 -14
- ycat/qdf/udf/base_udf.py +0 -145
- ycat/qdf/udf/cs_udf.py +0 -97
- ycat/qdf/udf/d_udf.py +0 -176
- ycat/qdf/udf/ind_udf.py +0 -202
- ycat/qdf/udf/ts_udf.py +0 -175
- ycat/updator.py +0 -101
- ygo-1.0.10.dist-info/METADATA +0 -102
- ygo-1.0.10.dist-info/RECORD +0 -24
- {ygo-1.0.10.dist-info → ygo-1.1.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ygo
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Project-URL: homepage, https://github.com/link-yundi/ygo
|
|
5
|
+
Project-URL: repository, https://github.com/link-yundi/ygo
|
|
6
|
+
Requires-Python: >=3.12
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: joblib>=1.5.0
|
|
10
|
+
Requires-Dist: loguru>=0.7.3
|
|
11
|
+
Requires-Dist: tqdm>=4.67.1
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# ygo
|
|
15
|
+
一个轻量级 Python 工具包,底层基于 joblib 和 tqdm 、loguru 实现,支持
|
|
16
|
+
- 并发执行(带进度条)
|
|
17
|
+
- 延迟调用
|
|
18
|
+
- 链式绑定参数
|
|
19
|
+
- 函数信息获取
|
|
20
|
+
- 模块/函数动态加载...
|
|
21
|
+
- 并结合 ylog 提供日志记录能力
|
|
22
|
+
|
|
23
|
+
### 安装
|
|
24
|
+
```shell
|
|
25
|
+
pip install -U ygo
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### 🧰 功能概览
|
|
29
|
+
|
|
30
|
+
| 模块 | 功能 |
|
|
31
|
+
| :----- | :----------------------------------------------------------- |
|
|
32
|
+
| `ygo` | 支持并发执行(带进度条)、延迟调用、函数信息获取以及模块/函数动态加载等功能 |
|
|
33
|
+
| `ylog` | 日志模块,提供统一的日志输出接口 |
|
|
34
|
+
|
|
35
|
+
### 示例
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
├── a
|
|
39
|
+
│ ├── __init__.py
|
|
40
|
+
│ └── b
|
|
41
|
+
│ ├── __init__.py
|
|
42
|
+
│ └── c.py
|
|
43
|
+
└── test.py
|
|
44
|
+
|
|
45
|
+
c.py 中定义了目标函数
|
|
46
|
+
def test_fn(a, b=2):
|
|
47
|
+
return a+b
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
#### 场景1: 并发执行
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import ygo
|
|
54
|
+
import ylog
|
|
55
|
+
from a.b.c import test_fn
|
|
56
|
+
|
|
57
|
+
with ygo.pool(job_name="test parallel", show_progress=True) as go:
|
|
58
|
+
for i in range(10):
|
|
59
|
+
go.submit(test_fn)(a=i, b=2*i)
|
|
60
|
+
for res in go.do():
|
|
61
|
+
ylog.info(res)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
#### ✅ `ygo.pool` 支持的参数
|
|
65
|
+
|
|
66
|
+
| 参数名 | 类型 | 描述 |
|
|
67
|
+
| ------------- | ---- | ------------------------------------------------------------ |
|
|
68
|
+
| n_jobs | int | 并行任务数(<=1 表示串行) |
|
|
69
|
+
| show_progress | bool | 是否显示进度条 |
|
|
70
|
+
| backend | str | 执行后端(默认 'threading',可选 'multiprocessing' 或 'loky') |
|
|
71
|
+
|
|
72
|
+
#### 场景2: 延迟调用
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
>>> fn = delay(test_fn)(a=1, b=2)
|
|
76
|
+
>>> fn()
|
|
77
|
+
3
|
|
78
|
+
>>> # 逐步传递参数
|
|
79
|
+
>>> fn1 = delay(lambda a, b, c: a+b+c)(a=1)
|
|
80
|
+
>>> fn2 = delay(fn1)(b=2)
|
|
81
|
+
>>> fn2(c=3)
|
|
82
|
+
6
|
|
83
|
+
>>> # 参数更改
|
|
84
|
+
>>> fn1 = delay(lambda a, b, c: a+b+c)(a=1, b=2)
|
|
85
|
+
>>> fn2 = delay(fn1)(c=3, b=5)
|
|
86
|
+
>>> fn2()
|
|
87
|
+
9
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
#### 场景3: 获取目标函数信息
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
>>> ygo.fn_info(test_fn)
|
|
94
|
+
=============================================================
|
|
95
|
+
a.b.c.test_fn(a, b=2)
|
|
96
|
+
=============================================================
|
|
97
|
+
def test_fn(a, b=2):
|
|
98
|
+
return a+b
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
#### 🔍 其他函数信息工具
|
|
102
|
+
|
|
103
|
+
| 方法名 | 描述 |
|
|
104
|
+
| ------------------------- | ---------------------------------------- |
|
|
105
|
+
| `fn_params(fn)` | 获取函数实参 |
|
|
106
|
+
| `fn_signature_params(fn)` | 获取函数定义的所有参数名 |
|
|
107
|
+
| `fn_code(fn)` | 获取函数源码字符串 |
|
|
108
|
+
| `fn_path(fn)` | 获取函数所属模块路径 |
|
|
109
|
+
| `fn_from_str(s)` | 根据字符串导入函数(如 "a.b.c.test_fn") |
|
|
110
|
+
| `module_from_str(s)` | 根据字符串导入模块 |
|
|
111
|
+
|
|
112
|
+
#### 场景4: 通过字符串解析函数并执行
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
>>> ygo.fn_from_str("a.b.c.test_fn")(a=1, b=5)
|
|
116
|
+
6
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### 📝 日志记录(ylog)
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
import ylog
|
|
123
|
+
|
|
124
|
+
ylog.info("这是一个信息日志")
|
|
125
|
+
ylog.warning("这是一个警告日志")
|
|
126
|
+
ylog.error("这是一个错误日志", exc_info=True)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# 为不同的模块使用不同的logger
|
|
131
|
+
logger_app1 = ylog.get_logger("app1", )
|
|
132
|
+
logger_app2 = ylog.get_logger("app2", )
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
#### 🔧 配置管理:`update_config`
|
|
136
|
+
|
|
137
|
+
你可以通过 update_config 方法动态修改日志配置,例如设置日志级别、格式、是否启用颜色等。
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
# 开启调试模式
|
|
141
|
+
ylog.update_config(debug_mode=True)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
#### 🧩 获取独立的 Logger 实例:`get_logger`
|
|
145
|
+
|
|
146
|
+
在大型项目中,你可能希望为不同模块或组件创建独立的 logger 实例以区分日志来源。
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
logger1 = ylog.get_logger("moduleA")
|
|
150
|
+
logger2 = ylog.get_logger("moduleB")
|
|
151
|
+
|
|
152
|
+
logger1.info("这是来自 moduleA 的日志")
|
|
153
|
+
logger2.warning("这是来自 moduleB 的警告")
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### 📌 使用建议
|
|
157
|
+
|
|
158
|
+
- 生产环境建议关闭 `debug_mode`,避免产生过多调试日志。
|
|
159
|
+
- 对于复杂项目,推荐使用 `get_logger` 创建命名 logger,便于日志分类与分析。
|
|
160
|
+
- 使用 `exc_info=True` 参数时,可自动打印异常堆栈信息,适用于错误捕获场景。
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
ygo/__init__.py,sha256=AlvzcS4Ge94nklq5AhPhCNCIg5D1F8gaZqhdzQpoXH4,679
|
|
2
|
+
ygo/delay.py,sha256=66xtPXqyD630FL7LWL5qJKAIZvyGDwZyM4qPfk8Czlg,2206
|
|
3
|
+
ygo/exceptions.py,sha256=0OYDYt_9KKo8mF2XBG5QkCMr3-ASp69VDSPOEwlIsrI,660
|
|
4
|
+
ygo/pool.py,sha256=bnHm4TtnRoFBv5UvV7WpuObJoK4FdoRf65mvf82yEyI,7052
|
|
5
|
+
ygo/utils.py,sha256=c-g4fJgeZp8diinkJhX4DAJBZEhH2tHYniUzRlt1EgU,3178
|
|
6
|
+
ygo-1.1.0.dist-info/licenses/LICENSE,sha256=6AKUWQ1xe-jwPSFv_H6FMQLNNWb7AYqzuEUTwlP2S8M,1067
|
|
7
|
+
ylog/__init__.py,sha256=aNrUp1n3JJFMUt1JFEsq33bckIhSQwCiTQCmV9rOMYk,452
|
|
8
|
+
ylog/core.py,sha256=d6QCFRDTvlyxgvS6JphUGOgX5Mgx9qPv9wB3g-4YOJw,9225
|
|
9
|
+
ygo-1.1.0.dist-info/METADATA,sha256=Eelk_nhMZZ6nwqCtTtmLJabPuLB3d0PacwbS3rP8uDE,4651
|
|
10
|
+
ygo-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
11
|
+
ygo-1.1.0.dist-info/top_level.txt,sha256=sY7lJBJ2ncfEMAxoNBVay0RVUixpVt9Osuwwy0_uWqU,9
|
|
12
|
+
ygo-1.1.0.dist-info/RECORD,,
|
ylog/__init__.py
CHANGED
ycat/__init__.py
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
---------------------------------------------
|
|
4
|
-
Created on 2025/5/14 18:29
|
|
5
|
-
@author: ZhangYundi
|
|
6
|
-
@email: yundi.xxii@outlook.com
|
|
7
|
-
---------------------------------------------
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
from .client import (
|
|
11
|
-
HOME,
|
|
12
|
-
CATDB,
|
|
13
|
-
get_settings,
|
|
14
|
-
sql,
|
|
15
|
-
put,
|
|
16
|
-
tb_path,
|
|
17
|
-
read_ck,
|
|
18
|
-
read_mysql,
|
|
19
|
-
)
|
|
20
|
-
from .qdf import from_polars
|
|
21
|
-
from .updator import Updator
|
|
22
|
-
|
|
23
|
-
__all__ = [
|
|
24
|
-
"HOME",
|
|
25
|
-
"CATDB",
|
|
26
|
-
"get_settings",
|
|
27
|
-
"sql",
|
|
28
|
-
"put",
|
|
29
|
-
"tb_path",
|
|
30
|
-
"read_ck",
|
|
31
|
-
"read_mysql",
|
|
32
|
-
"Updator",
|
|
33
|
-
]
|
ycat/client.py
DELETED
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
---------------------------------------------
|
|
4
|
-
Created on 2024/7/1 09:44
|
|
5
|
-
@author: ZhangYundi
|
|
6
|
-
@email: yundi.xxii@outlook.com
|
|
7
|
-
---------------------------------------------
|
|
8
|
-
"""
|
|
9
|
-
import os
|
|
10
|
-
import re
|
|
11
|
-
import urllib
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
|
|
14
|
-
import clickhouse_df
|
|
15
|
-
import polars as pl
|
|
16
|
-
from dynaconf import Dynaconf
|
|
17
|
-
|
|
18
|
-
import ylog
|
|
19
|
-
from .parse import extract_table_names_from_sql
|
|
20
|
-
|
|
21
|
-
# 配置文件在 “~/.catdb/setting.toml”
|
|
22
|
-
USERHOME = os.path.expanduser('~') # 用户家目录
|
|
23
|
-
NAME = "catdb"
|
|
24
|
-
CONFIG_PATH = os.path.join(USERHOME, f".{NAME}", "settings.toml")
|
|
25
|
-
if not os.path.exists(CONFIG_PATH):
|
|
26
|
-
try:
|
|
27
|
-
os.makedirs(os.path.dirname(CONFIG_PATH))
|
|
28
|
-
except FileExistsError as e:
|
|
29
|
-
...
|
|
30
|
-
except Exception as e:
|
|
31
|
-
ylog.error(f"配置文件生成失败: {e}")
|
|
32
|
-
catdb_path = os.path.join(USERHOME, NAME)
|
|
33
|
-
template_content = f"""[paths]
|
|
34
|
-
{NAME}="{catdb_path}" # 本地数据库,默认家目录
|
|
35
|
-
|
|
36
|
-
## 数据库配置:
|
|
37
|
-
[database]
|
|
38
|
-
[database.ck]
|
|
39
|
-
# urls=["<host1>:<port1>", "<host2>:<port2>",]
|
|
40
|
-
# user="xxx"
|
|
41
|
-
# password="xxxxxx"
|
|
42
|
-
[database.jy]
|
|
43
|
-
# url="<host>:<port>"
|
|
44
|
-
# user="xxxx"
|
|
45
|
-
# password="xxxxxx"
|
|
46
|
-
|
|
47
|
-
## 视情况自由增加其他配置
|
|
48
|
-
"""
|
|
49
|
-
with open(CONFIG_PATH, "w") as f:
|
|
50
|
-
f.write(template_content)
|
|
51
|
-
ylog.info(f"生成配置文件: {CONFIG_PATH}")
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
def get_settings():
|
|
55
|
-
try:
|
|
56
|
-
return Dynaconf(settings_files=[CONFIG_PATH])
|
|
57
|
-
except Exception as e:
|
|
58
|
-
ylog.error(f"读取配置文件失败: {e}")
|
|
59
|
-
return {}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
HOME = USERHOME
|
|
63
|
-
CATDB = os.path.join(HOME, NAME)
|
|
64
|
-
# 读取配置文件覆盖
|
|
65
|
-
SETTINGS = get_settings()
|
|
66
|
-
if SETTINGS is not None:
|
|
67
|
-
CATDB = SETTINGS["PATHS"][NAME]
|
|
68
|
-
if not CATDB.endswith(NAME):
|
|
69
|
-
CATDB = os.path.join(CATDB, NAME)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
# ======================== 本地数据库 catdb ========================
|
|
73
|
-
def tb_path(tb_name: str) -> Path:
|
|
74
|
-
"""
|
|
75
|
-
返回指定表名 完整的本地路径
|
|
76
|
-
Parameters
|
|
77
|
-
----------
|
|
78
|
-
tb_name: str
|
|
79
|
-
表名,路径写法: a/b/c
|
|
80
|
-
Returns
|
|
81
|
-
-------
|
|
82
|
-
full_abs_path: pathlib.Path
|
|
83
|
-
完整的本地绝对路径 $HOME/catdb/a/b/c
|
|
84
|
-
"""
|
|
85
|
-
return Path(CATDB, tb_name)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def put(df: pl.DataFrame, tb_name: str, partitions: list[str] | None = None, abs_path: bool = False):
|
|
89
|
-
if not abs_path:
|
|
90
|
-
tbpath = tb_path(tb_name)
|
|
91
|
-
else:
|
|
92
|
-
tbpath = tb_name
|
|
93
|
-
if not tbpath.exists():
|
|
94
|
-
os.makedirs(tbpath, exist_ok=True)
|
|
95
|
-
if partitions is not None:
|
|
96
|
-
df.write_parquet(tbpath, partition_by=partitions)
|
|
97
|
-
else:
|
|
98
|
-
df.write_parquet(tbpath / "data.parquet")
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def sql(query: str, abs_path: bool = False, lazy: bool = True):
|
|
102
|
-
tbs = extract_table_names_from_sql(query)
|
|
103
|
-
convertor = dict()
|
|
104
|
-
for tb in tbs:
|
|
105
|
-
if not abs_path:
|
|
106
|
-
db_path = tb_path(tb)
|
|
107
|
-
else:
|
|
108
|
-
db_path = tb
|
|
109
|
-
format_tb = f"read_parquet('{db_path}/**/*.parquet')"
|
|
110
|
-
convertor[tb] = format_tb
|
|
111
|
-
pattern = re.compile("|".join(re.escape(k) for k in convertor.keys()))
|
|
112
|
-
new_query = pattern.sub(lambda m: convertor[m.group(0)], query)
|
|
113
|
-
if not lazy:
|
|
114
|
-
return pl.sql(new_query).collect()
|
|
115
|
-
return pl.sql(new_query)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def read_mysql(query: str, db_conf: str = "database.mysql") -> pl.DataFrame:
|
|
119
|
-
"""
|
|
120
|
-
读取 mysql 返回 polars.DataFrame
|
|
121
|
-
:param query:
|
|
122
|
-
:param db_conf: .catdb/settings.toml 中的 database 配置
|
|
123
|
-
:return: polars.DataFrame
|
|
124
|
-
"""
|
|
125
|
-
try:
|
|
126
|
-
db_setting = get_settings().get(db_conf, {})
|
|
127
|
-
if not isinstance(db_setting, dict):
|
|
128
|
-
raise ValueError(f"Database configuration '{db_conf}' is not a dictionary.")
|
|
129
|
-
|
|
130
|
-
required_keys = ['user', 'password', 'url']
|
|
131
|
-
missing_keys = [key for key in required_keys if key not in db_setting]
|
|
132
|
-
if missing_keys:
|
|
133
|
-
raise KeyError(f"Missing required keys in database config: {missing_keys}")
|
|
134
|
-
|
|
135
|
-
user = urllib.parse.quote_plus(db_setting['user'])
|
|
136
|
-
password = urllib.parse.quote_plus(db_setting['password'])
|
|
137
|
-
uri = f"mysql://{user}:{password}@{db_setting['url']}"
|
|
138
|
-
return pl.read_database_uri(query, uri)
|
|
139
|
-
|
|
140
|
-
except KeyError as e:
|
|
141
|
-
raise RuntimeError("Database configuration error: missing required fields.") from e
|
|
142
|
-
except Exception as e:
|
|
143
|
-
raise RuntimeError(f"Failed to execute MySQL query: {e}") from e
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def read_ck(query: str, db_conf: str = "database.ck") -> pl.DataFrame:
|
|
147
|
-
"""
|
|
148
|
-
读取 clickhouse 集群 返回 polars.DataFrame
|
|
149
|
-
:param query:
|
|
150
|
-
:param db_conf: .catdb/settings.toml 中的 database 配置
|
|
151
|
-
:return: polars.DataFrame
|
|
152
|
-
"""
|
|
153
|
-
try:
|
|
154
|
-
db_setting = get_settings().get(db_conf, {})
|
|
155
|
-
if not isinstance(db_setting, dict):
|
|
156
|
-
raise ValueError(f"Database configuration '{db_conf}' is not a dictionary.")
|
|
157
|
-
|
|
158
|
-
required_keys = ['user', 'password', 'urls']
|
|
159
|
-
missing_keys = [key for key in required_keys if key not in db_setting]
|
|
160
|
-
if missing_keys:
|
|
161
|
-
raise KeyError(f"Missing required keys in database config: {missing_keys}")
|
|
162
|
-
|
|
163
|
-
user = urllib.parse.quote_plus(db_setting['user'])
|
|
164
|
-
password = urllib.parse.quote_plus(db_setting['password'])
|
|
165
|
-
|
|
166
|
-
with clickhouse_df.connect(db_setting['urls'], user=user, password=password):
|
|
167
|
-
return clickhouse_df.to_polars(query)
|
|
168
|
-
|
|
169
|
-
except KeyError as e:
|
|
170
|
-
raise RuntimeError("Database configuration error: missing required fields.") from e
|
|
171
|
-
except Exception as e:
|
|
172
|
-
raise RuntimeError(f"Failed to execute ClickHouse query: {e}") from e
|
ycat/parse.py
DELETED
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
---------------------------------------------
|
|
4
|
-
Created on 2024/11/6 下午7:25
|
|
5
|
-
@author: ZhangYundi
|
|
6
|
-
@email: yundi.xxii@outlook.com
|
|
7
|
-
---------------------------------------------
|
|
8
|
-
"""
|
|
9
|
-
import sqlparse
|
|
10
|
-
import re
|
|
11
|
-
|
|
12
|
-
def format_sql(sql_content):
|
|
13
|
-
"""将sql语句进行规范化,并去除sql中的注释,输入和输出均为字符串"""
|
|
14
|
-
parse_str = sqlparse.format(sql_content, reindent=True, strip_comments=True)
|
|
15
|
-
return parse_str
|
|
16
|
-
|
|
17
|
-
def extract_temp_tables(with_clause):
|
|
18
|
-
"""从WITH子句中提取临时表名,输出为列表"""
|
|
19
|
-
temp_tables = re.findall(r'\b(\w+)\s*as\s*\(', with_clause, re.IGNORECASE)
|
|
20
|
-
return temp_tables
|
|
21
|
-
|
|
22
|
-
def extract_table_names_from_sql(sql_query):
|
|
23
|
-
"""从sql中提取对应的表名称,输出为列表"""
|
|
24
|
-
table_names = set()
|
|
25
|
-
# 解析SQL语句
|
|
26
|
-
parsed = sqlparse.parse(sql_query)
|
|
27
|
-
# 正则表达式模式,用于匹配表名
|
|
28
|
-
table_name_pattern = r'\bFROM\s+([^\s\(\)\,]+)|\bJOIN\s+([^\s\(\)\,]+)'
|
|
29
|
-
|
|
30
|
-
# 用于存储WITH子句中的临时表名
|
|
31
|
-
remove_with_name = []
|
|
32
|
-
|
|
33
|
-
# 遍历解析后的语句块
|
|
34
|
-
for statement in parsed:
|
|
35
|
-
# 转换为字符串
|
|
36
|
-
statement_str = str(statement)# .lower()
|
|
37
|
-
|
|
38
|
-
# 将字符串中的特殊语法置空
|
|
39
|
-
statement_str = re.sub(r'(substring|extract)\s*\(((.|\s)*?)\)', '', statement_str)
|
|
40
|
-
|
|
41
|
-
# 查找匹配的表名
|
|
42
|
-
matches = re.findall(table_name_pattern, statement_str, re.IGNORECASE)
|
|
43
|
-
|
|
44
|
-
for match in matches:
|
|
45
|
-
# 提取非空的表名部分
|
|
46
|
-
for name in match:
|
|
47
|
-
if name:
|
|
48
|
-
# 对于可能包含命名空间的情况,只保留最后一部分作为表名
|
|
49
|
-
table_name = name.split('.')[-1]
|
|
50
|
-
# 去除表名中的特殊符号
|
|
51
|
-
table_name = re.sub(r'("|`|\'|;)', '', table_name)
|
|
52
|
-
table_names.add(table_name)
|
|
53
|
-
|
|
54
|
-
# 处理特殊的WITH语句
|
|
55
|
-
if 'with' in statement_str:
|
|
56
|
-
remove_with_name = extract_temp_tables(statement_str)
|
|
57
|
-
# 移除多余的表名
|
|
58
|
-
if remove_with_name:
|
|
59
|
-
table_names = list(set(table_names) - set(remove_with_name))
|
|
60
|
-
|
|
61
|
-
return table_names
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|