ygo 1.0.9__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ygo might be problematic. Click here for more details.
- ycat/__init__.py +7 -8
- ycat/client.py +62 -35
- ycat/parse.py +0 -2
- {qdf → ycat/qdf}/__init__.py +1 -0
- ycat/updator.py +101 -0
- ygo/__init__.py +25 -1
- ygo/exceptions.py +17 -1
- ygo/ygo.py +3 -15
- {ygo-1.0.9.dist-info → ygo-1.0.10.dist-info}/METADATA +7 -2
- ygo-1.0.10.dist-info/RECORD +24 -0
- {ygo-1.0.9.dist-info → ygo-1.0.10.dist-info}/WHEEL +1 -1
- {ygo-1.0.9.dist-info → ygo-1.0.10.dist-info}/top_level.txt +0 -1
- ylog/__init__.py +3 -2
- ylog/core.py +61 -28
- ycat/dtype.py +0 -389
- ycat/yck.py +0 -87
- ygo-1.0.9.dist-info/RECORD +0 -25
- {qdf → ycat/qdf}/errors.py +0 -0
- {qdf → ycat/qdf}/expr.py +0 -0
- {qdf → ycat/qdf}/qdf.py +0 -0
- {qdf → ycat/qdf}/udf/__init__.py +0 -0
- {qdf → ycat/qdf}/udf/base_udf.py +0 -0
- {qdf → ycat/qdf}/udf/cs_udf.py +0 -0
- {qdf → ycat/qdf}/udf/d_udf.py +0 -0
- {qdf → ycat/qdf}/udf/ind_udf.py +0 -0
- {qdf → ycat/qdf}/udf/ts_udf.py +0 -0
- {ygo-1.0.9.dist-info → ygo-1.0.10.dist-info}/licenses/LICENSE +0 -0
ycat/__init__.py
CHANGED
|
@@ -13,12 +13,12 @@ from .client import (
|
|
|
13
13
|
get_settings,
|
|
14
14
|
sql,
|
|
15
15
|
put,
|
|
16
|
-
create_engine_ck,
|
|
17
|
-
create_engine_mysql,
|
|
18
|
-
read_mysql,
|
|
19
|
-
read_ck,
|
|
20
16
|
tb_path,
|
|
17
|
+
read_ck,
|
|
18
|
+
read_mysql,
|
|
21
19
|
)
|
|
20
|
+
from .qdf import from_polars
|
|
21
|
+
from .updator import Updator
|
|
22
22
|
|
|
23
23
|
__all__ = [
|
|
24
24
|
"HOME",
|
|
@@ -26,9 +26,8 @@ __all__ = [
|
|
|
26
26
|
"get_settings",
|
|
27
27
|
"sql",
|
|
28
28
|
"put",
|
|
29
|
-
"create_engine_ck",
|
|
30
|
-
"create_engine_mysql",
|
|
31
|
-
"read_mysql",
|
|
32
|
-
"read_ck",
|
|
33
29
|
"tb_path",
|
|
30
|
+
"read_ck",
|
|
31
|
+
"read_mysql",
|
|
32
|
+
"Updator",
|
|
34
33
|
]
|
ycat/client.py
CHANGED
|
@@ -8,16 +8,15 @@ Created on 2024/7/1 09:44
|
|
|
8
8
|
"""
|
|
9
9
|
import os
|
|
10
10
|
import re
|
|
11
|
-
|
|
12
|
-
from
|
|
11
|
+
import urllib
|
|
12
|
+
from pathlib import Path
|
|
13
13
|
|
|
14
|
+
import clickhouse_df
|
|
14
15
|
import polars as pl
|
|
15
16
|
from dynaconf import Dynaconf
|
|
16
|
-
from sqlalchemy import create_engine
|
|
17
17
|
|
|
18
18
|
import ylog
|
|
19
19
|
from .parse import extract_table_names_from_sql
|
|
20
|
-
from .yck import connect, query_polars
|
|
21
20
|
|
|
22
21
|
# 配置文件在 “~/.catdb/setting.toml”
|
|
23
22
|
USERHOME = os.path.expanduser('~') # 用户家目录
|
|
@@ -54,7 +53,7 @@ if not os.path.exists(CONFIG_PATH):
|
|
|
54
53
|
|
|
55
54
|
def get_settings():
|
|
56
55
|
try:
|
|
57
|
-
return Dynaconf(settings_files=[CONFIG_PATH])
|
|
56
|
+
return Dynaconf(settings_files=[CONFIG_PATH])
|
|
58
57
|
except Exception as e:
|
|
59
58
|
ylog.error(f"读取配置文件失败: {e}")
|
|
60
59
|
return {}
|
|
@@ -71,7 +70,7 @@ if SETTINGS is not None:
|
|
|
71
70
|
|
|
72
71
|
|
|
73
72
|
# ======================== 本地数据库 catdb ========================
|
|
74
|
-
def tb_path(tb_name: str) ->
|
|
73
|
+
def tb_path(tb_name: str) -> Path:
|
|
75
74
|
"""
|
|
76
75
|
返回指定表名 完整的本地路径
|
|
77
76
|
Parameters
|
|
@@ -80,26 +79,23 @@ def tb_path(tb_name: str) -> str:
|
|
|
80
79
|
表名,路径写法: a/b/c
|
|
81
80
|
Returns
|
|
82
81
|
-------
|
|
83
|
-
full_abs_path:
|
|
82
|
+
full_abs_path: pathlib.Path
|
|
84
83
|
完整的本地绝对路径 $HOME/catdb/a/b/c
|
|
85
84
|
"""
|
|
86
|
-
return
|
|
85
|
+
return Path(CATDB, tb_name)
|
|
87
86
|
|
|
88
87
|
|
|
89
|
-
def put(df: pl.DataFrame, tb_name: str, partitions:
|
|
88
|
+
def put(df: pl.DataFrame, tb_name: str, partitions: list[str] | None = None, abs_path: bool = False):
|
|
90
89
|
if not abs_path:
|
|
91
90
|
tbpath = tb_path(tb_name)
|
|
92
91
|
else:
|
|
93
92
|
tbpath = tb_name
|
|
94
|
-
if not
|
|
95
|
-
|
|
96
|
-
os.makedirs(tbpath)
|
|
97
|
-
except FileExistsError as e:
|
|
98
|
-
pass
|
|
93
|
+
if not tbpath.exists():
|
|
94
|
+
os.makedirs(tbpath, exist_ok=True)
|
|
99
95
|
if partitions is not None:
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
96
|
+
df.write_parquet(tbpath, partition_by=partitions)
|
|
97
|
+
else:
|
|
98
|
+
df.write_parquet(tbpath / "data.parquet")
|
|
103
99
|
|
|
104
100
|
|
|
105
101
|
def sql(query: str, abs_path: bool = False, lazy: bool = True):
|
|
@@ -119,27 +115,58 @@ def sql(query: str, abs_path: bool = False, lazy: bool = True):
|
|
|
119
115
|
return pl.sql(new_query)
|
|
120
116
|
|
|
121
117
|
|
|
122
|
-
def
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
118
|
+
def read_mysql(query: str, db_conf: str = "database.mysql") -> pl.DataFrame:
|
|
119
|
+
"""
|
|
120
|
+
读取 mysql 返回 polars.DataFrame
|
|
121
|
+
:param query:
|
|
122
|
+
:param db_conf: .catdb/settings.toml 中的 database 配置
|
|
123
|
+
:return: polars.DataFrame
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
db_setting = get_settings().get(db_conf, {})
|
|
127
|
+
if not isinstance(db_setting, dict):
|
|
128
|
+
raise ValueError(f"Database configuration '{db_conf}' is not a dictionary.")
|
|
129
|
+
|
|
130
|
+
required_keys = ['user', 'password', 'url']
|
|
131
|
+
missing_keys = [key for key in required_keys if key not in db_setting]
|
|
132
|
+
if missing_keys:
|
|
133
|
+
raise KeyError(f"Missing required keys in database config: {missing_keys}")
|
|
134
|
+
|
|
135
|
+
user = urllib.parse.quote_plus(db_setting['user'])
|
|
136
|
+
password = urllib.parse.quote_plus(db_setting['password'])
|
|
137
|
+
uri = f"mysql://{user}:{password}@{db_setting['url']}"
|
|
138
|
+
return pl.read_database_uri(query, uri)
|
|
139
|
+
|
|
140
|
+
except KeyError as e:
|
|
141
|
+
raise RuntimeError("Database configuration error: missing required fields.") from e
|
|
142
|
+
except Exception as e:
|
|
143
|
+
raise RuntimeError(f"Failed to execute MySQL query: {e}") from e
|
|
129
144
|
|
|
130
145
|
|
|
131
|
-
def
|
|
146
|
+
def read_ck(query: str, db_conf: str = "database.ck") -> pl.DataFrame:
|
|
132
147
|
"""
|
|
133
|
-
|
|
134
|
-
:param
|
|
135
|
-
:param
|
|
136
|
-
:
|
|
137
|
-
:return:
|
|
148
|
+
读取 clickhouse 集群 返回 polars.DataFrame
|
|
149
|
+
:param query:
|
|
150
|
+
:param db_conf: .catdb/settings.toml 中的 database 配置
|
|
151
|
+
:return: polars.DataFrame
|
|
138
152
|
"""
|
|
139
|
-
|
|
140
|
-
|
|
153
|
+
try:
|
|
154
|
+
db_setting = get_settings().get(db_conf, {})
|
|
155
|
+
if not isinstance(db_setting, dict):
|
|
156
|
+
raise ValueError(f"Database configuration '{db_conf}' is not a dictionary.")
|
|
157
|
+
|
|
158
|
+
required_keys = ['user', 'password', 'urls']
|
|
159
|
+
missing_keys = [key for key in required_keys if key not in db_setting]
|
|
160
|
+
if missing_keys:
|
|
161
|
+
raise KeyError(f"Missing required keys in database config: {missing_keys}")
|
|
141
162
|
|
|
163
|
+
user = urllib.parse.quote_plus(db_setting['user'])
|
|
164
|
+
password = urllib.parse.quote_plus(db_setting['password'])
|
|
142
165
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
166
|
+
with clickhouse_df.connect(db_setting['urls'], user=user, password=password):
|
|
167
|
+
return clickhouse_df.to_polars(query)
|
|
168
|
+
|
|
169
|
+
except KeyError as e:
|
|
170
|
+
raise RuntimeError("Database configuration error: missing required fields.") from e
|
|
171
|
+
except Exception as e:
|
|
172
|
+
raise RuntimeError(f"Failed to execute ClickHouse query: {e}") from e
|
ycat/parse.py
CHANGED
{qdf → ycat/qdf}/__init__.py
RENAMED
ycat/updator.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
---------------------------------------------
|
|
4
|
+
Created on 2025/5/23 01:34
|
|
5
|
+
@author: ZhangYundi
|
|
6
|
+
@email: yundi.xxii@outlook.com
|
|
7
|
+
---------------------------------------------
|
|
8
|
+
"""
|
|
9
|
+
import os
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import ygo
|
|
14
|
+
import ylog
|
|
15
|
+
from .client import CATDB
|
|
16
|
+
|
|
17
|
+
DATE_FORMAT = "%Y-%m-%d"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Updator:
|
|
21
|
+
"""
|
|
22
|
+
数据更新器
|
|
23
|
+
路径:{ycat.CATDB}/updator/{name}
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, name: str, update_time="16:30"):
|
|
27
|
+
"""
|
|
28
|
+
数据更新器
|
|
29
|
+
:param name: 数据更新器名称
|
|
30
|
+
:param update_time: 数据更新时间,默认16:30
|
|
31
|
+
"""
|
|
32
|
+
self.name = name
|
|
33
|
+
self._tb_path = Path(CATDB) / "updator" / name
|
|
34
|
+
os.makedirs(self._tb_path, exist_ok=True)
|
|
35
|
+
self._update_time = update_time
|
|
36
|
+
self.present = datetime.now().today()
|
|
37
|
+
|
|
38
|
+
if self.present.strftime("%H:%M") >= self._update_time:
|
|
39
|
+
self.last_date = self.present.strftime(DATE_FORMAT)
|
|
40
|
+
else:
|
|
41
|
+
self.last_date = (self.present - timedelta(days=1)).strftime(DATE_FORMAT)
|
|
42
|
+
|
|
43
|
+
self._tasks = list()
|
|
44
|
+
self._last_run_file = self._tb_path / f".last_run"
|
|
45
|
+
self.logger = ylog.get_logger("updator")
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def last_update_date(self):
|
|
49
|
+
return self._read_last_run_date()
|
|
50
|
+
|
|
51
|
+
def _read_last_run_date(self):
|
|
52
|
+
if self._last_run_file.exists():
|
|
53
|
+
with open(self._last_run_file, "r") as f:
|
|
54
|
+
return f.read().strip()
|
|
55
|
+
return
|
|
56
|
+
|
|
57
|
+
def _write_last_run_date(self, date_str: str):
|
|
58
|
+
with open(self._last_run_file, "w") as f:
|
|
59
|
+
f.write(date_str)
|
|
60
|
+
|
|
61
|
+
def wrap_fn(self, task_name: str, update_fn: callable):
|
|
62
|
+
"""包装函数,添加异常处理"""
|
|
63
|
+
try:
|
|
64
|
+
update_fn()
|
|
65
|
+
return 0
|
|
66
|
+
except Exception as e:
|
|
67
|
+
self.logger.error(ygo.FailTaskError(task_name=task_name, error=e))
|
|
68
|
+
return 1
|
|
69
|
+
|
|
70
|
+
def add_task(self, task_name: str, update_fn: callable):
|
|
71
|
+
"""添加任务"""
|
|
72
|
+
self._tasks.append((task_name, ygo.delay(self.wrap_fn)(task_name=task_name, update_fn=update_fn)))
|
|
73
|
+
|
|
74
|
+
def do(self,
|
|
75
|
+
overwrite: bool = False,
|
|
76
|
+
n_jobs: int = 10,
|
|
77
|
+
backend: str = "threading"):
|
|
78
|
+
"""
|
|
79
|
+
执行任务
|
|
80
|
+
:param overwrite: 是否覆盖现有数据
|
|
81
|
+
:param n_jobs: 并发数
|
|
82
|
+
:param backend: loky/threading/multiprocessing
|
|
83
|
+
:return:
|
|
84
|
+
"""
|
|
85
|
+
if not overwrite:
|
|
86
|
+
local_last_date = self._read_last_run_date()
|
|
87
|
+
if local_last_date is not None:
|
|
88
|
+
if local_last_date >= self.last_date:
|
|
89
|
+
self.logger.info(f"[{self.name}] 已是最新数据,跳过更新")
|
|
90
|
+
return
|
|
91
|
+
self.logger.info(f"[{self.name}] 更新数据")
|
|
92
|
+
failed_num = 0
|
|
93
|
+
with ygo.pool(n_jobs=n_jobs, backend=backend) as go:
|
|
94
|
+
for task_name, task in self._tasks:
|
|
95
|
+
go.submit(task, job_name=task_name)()
|
|
96
|
+
for status in go.do():
|
|
97
|
+
failed_num += status
|
|
98
|
+
if failed_num < 1:
|
|
99
|
+
self._write_last_run_date(self.last_date)
|
|
100
|
+
self.logger.info(f"[{self.name}] 更新成功,最新数据日期:{self.last_date}")
|
|
101
|
+
self.logger.info(f"[{self.name}] 更新完成,失败任务数:{str(failed_num).zfill(2)}/{str(len(self._tasks)).zfill(2)}")
|
ygo/__init__.py
CHANGED
|
@@ -7,4 +7,28 @@ Created on 2025/4/28 15:25
|
|
|
7
7
|
---------------------------------------------
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from .
|
|
10
|
+
from .exceptions import FailTaskError
|
|
11
|
+
from .ygo import (
|
|
12
|
+
delay,
|
|
13
|
+
fn_params,
|
|
14
|
+
fn_signature_params,
|
|
15
|
+
fn_path,
|
|
16
|
+
fn_code,
|
|
17
|
+
fn_info,
|
|
18
|
+
module_from_str,
|
|
19
|
+
fn_from_str,
|
|
20
|
+
pool,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"FailTaskError",
|
|
25
|
+
"delay",
|
|
26
|
+
"fn_params",
|
|
27
|
+
"fn_signature_params",
|
|
28
|
+
"fn_path",
|
|
29
|
+
"fn_code",
|
|
30
|
+
"fn_info",
|
|
31
|
+
"fn_from_str",
|
|
32
|
+
"module_from_str",
|
|
33
|
+
"pool"
|
|
34
|
+
]
|
ygo/exceptions.py
CHANGED
|
@@ -7,7 +7,23 @@ Created on 2024/12/18 下午7:01
|
|
|
7
7
|
---------------------------------------------
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
|
|
10
12
|
class WarnException(Exception):
|
|
11
13
|
"""自定义异常类,仅用于警告"""
|
|
12
14
|
def __init__(self, message):
|
|
13
|
-
super().__init__(message) # 调用父类的构造函数
|
|
15
|
+
super().__init__(message) # 调用父类的构造函数
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class FailTaskError:
|
|
19
|
+
task_name: str
|
|
20
|
+
error: Exception
|
|
21
|
+
|
|
22
|
+
def __str__(self):
|
|
23
|
+
return f"""
|
|
24
|
+
[失败任务]: {self.task_name}
|
|
25
|
+
[错误信息]: \n{self.error}
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __repr__(self):
|
|
29
|
+
return self.__str__()
|
ygo/ygo.py
CHANGED
|
@@ -18,7 +18,7 @@ from pathlib import Path
|
|
|
18
18
|
from joblib import Parallel, delayed
|
|
19
19
|
|
|
20
20
|
import ylog
|
|
21
|
-
from .exceptions import WarnException
|
|
21
|
+
from .exceptions import WarnException, FailTaskError
|
|
22
22
|
|
|
23
23
|
with warnings.catch_warnings():
|
|
24
24
|
warnings.simplefilter("ignore")
|
|
@@ -228,22 +228,10 @@ def run_job(job, task_id, queue):
|
|
|
228
228
|
try:
|
|
229
229
|
result = job()
|
|
230
230
|
except WarnException as e:
|
|
231
|
-
|
|
232
|
-
=============================================================
|
|
233
|
-
{job.task_name}: {job.task_id}
|
|
234
|
-
{e}
|
|
235
|
-
=============================================================
|
|
236
|
-
"""
|
|
237
|
-
ylog.warning(warn_msg)
|
|
231
|
+
ylog.warning(FailTaskError(task_name=job.task_name, error=e))
|
|
238
232
|
result = None
|
|
239
233
|
except Exception as e:
|
|
240
|
-
|
|
241
|
-
=============================================================
|
|
242
|
-
{job.task_name}: {job.task_id}
|
|
243
|
-
{e}
|
|
244
|
-
=============================================================
|
|
245
|
-
"""
|
|
246
|
-
ylog.error(error_msg)
|
|
234
|
+
ylog.error(FailTaskError(task_name=job.task_name, error=e))
|
|
247
235
|
result = None
|
|
248
236
|
queue.put((task_id, 1))
|
|
249
237
|
return result
|
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ygo
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.10
|
|
4
4
|
Project-URL: homepage, https://github.com/link-yundi/ygo
|
|
5
5
|
Project-URL: repository, https://github.com/link-yundi/ygo
|
|
6
|
-
Requires-Python: >=3.
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
8
|
License-File: LICENSE
|
|
9
|
+
Requires-Dist: clickhouse-df>=0.1.5
|
|
9
10
|
Requires-Dist: clickhouse-driver>=0.2.9
|
|
11
|
+
Requires-Dist: connectorx>=0.3.3
|
|
10
12
|
Requires-Dist: dynaconf>=3.2.11
|
|
11
13
|
Requires-Dist: exchange-calendars>=4.2.8
|
|
12
14
|
Requires-Dist: joblib>=1.4.2
|
|
13
15
|
Requires-Dist: lark>=1.2.2
|
|
16
|
+
Requires-Dist: lightgbm>=4.6.0
|
|
14
17
|
Requires-Dist: loguru>=0.7.3
|
|
18
|
+
Requires-Dist: mlflow>=2.17.2
|
|
15
19
|
Requires-Dist: pandas>=2.0.3
|
|
16
20
|
Requires-Dist: polars>=1.8.2
|
|
17
21
|
Requires-Dist: pyarrow>=17.0.0
|
|
@@ -19,6 +23,7 @@ Requires-Dist: pymysql>=1.1.1
|
|
|
19
23
|
Requires-Dist: sqlalchemy>=2.0.40
|
|
20
24
|
Requires-Dist: sqlparse>=0.5.3
|
|
21
25
|
Requires-Dist: toolz>=1.0.0
|
|
26
|
+
Requires-Dist: torch>=2.5.1
|
|
22
27
|
Requires-Dist: tqdm>=4.67.1
|
|
23
28
|
Dynamic: license-file
|
|
24
29
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
ycat/__init__.py,sha256=8cVsQXL476hDIU_yDxfoAC9ILC4_Tnp1Y1dDTs6vkXA,529
|
|
2
|
+
ycat/client.py,sha256=Z5lwybzXv6MADbbooDT-rhdr9JMI_t2TDAXt0ghongQ,5451
|
|
3
|
+
ycat/parse.py,sha256=piz_iciF7JFnn8v_qtUEHP6NZ_MWJidsA5gBpLtuZzw,2195
|
|
4
|
+
ycat/updator.py,sha256=dV2vhYVlaoK8eRQggAkNkZuu9zbOYFajicyggMt-r78,3393
|
|
5
|
+
ycat/qdf/__init__.py,sha256=8fIBr0FUAYGc33CYnWILY0Ur2DXdctWjw28S5qDWhD4,7572
|
|
6
|
+
ycat/qdf/errors.py,sha256=lJhhjDRdQOOKUFGlLQ9ELK4AexXBwYQSYus_V-kc5K8,1180
|
|
7
|
+
ycat/qdf/expr.py,sha256=ck_BHMCV29Q8-szci1_v4ud964QI7JoRRcmA0ppupsc,8454
|
|
8
|
+
ycat/qdf/qdf.py,sha256=XcnGyyfuRY1HqaG56kC5tB6psrIXqo9QVQtgH3mhips,7322
|
|
9
|
+
ycat/qdf/udf/__init__.py,sha256=DdrSGaCB__5C1YL0vd_5rjIB3KLrAKn3h3k9k50L0jA,313
|
|
10
|
+
ycat/qdf/udf/base_udf.py,sha256=6VDaCIGNLJxZ7UsoIDWtTH6PzUDj89b8FiwN-TEat2g,3437
|
|
11
|
+
ycat/qdf/udf/cs_udf.py,sha256=HT3EKBwAhOxOFDQnpfwb4YcMTT3-lqFXkdysdn5_FI4,3179
|
|
12
|
+
ycat/qdf/udf/d_udf.py,sha256=L9mkX6yDpQPwXvQTAebDepjEkirCqur1DfV2Fnl8KA0,5352
|
|
13
|
+
ycat/qdf/udf/ind_udf.py,sha256=hDCKfcLFCgIhdC9dQ5GYxLemZaOE6K7kQyAnjUrwePM,6482
|
|
14
|
+
ycat/qdf/udf/ts_udf.py,sha256=uUuZnKMY-V_uInP0nsBMblDpxY3ld3EwvrXTwWMqeig,5410
|
|
15
|
+
ygo/__init__.py,sha256=kQK7CwVCz8NJTj5eS9Xrt_G1kPHvDIbe2sTzHgWITxI,590
|
|
16
|
+
ygo/exceptions.py,sha256=0OYDYt_9KKo8mF2XBG5QkCMr3-ASp69VDSPOEwlIsrI,660
|
|
17
|
+
ygo/ygo.py,sha256=kcXI5vzndNOJqEEEZOeWbn61O47gW72UDiUWN1v9AYc,11290
|
|
18
|
+
ygo-1.0.10.dist-info/licenses/LICENSE,sha256=6AKUWQ1xe-jwPSFv_H6FMQLNNWb7AYqzuEUTwlP2S8M,1067
|
|
19
|
+
ylog/__init__.py,sha256=AoRCQ-o4gWAcJ8svw30wM5UJyccx45WhYIndrrkNv8o,428
|
|
20
|
+
ylog/core.py,sha256=d6QCFRDTvlyxgvS6JphUGOgX5Mgx9qPv9wB3g-4YOJw,9225
|
|
21
|
+
ygo-1.0.10.dist-info/METADATA,sha256=J0M_naeBSfTGAcep-85CtjOz-Tz0z2l8h_VKd7RLIq4,2235
|
|
22
|
+
ygo-1.0.10.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
23
|
+
ygo-1.0.10.dist-info/top_level.txt,sha256=jEbfiz5fX4iSzDg8_Npdv5SIC_Kphmb1m3vuyD9ZC1E,14
|
|
24
|
+
ygo-1.0.10.dist-info/RECORD,,
|
ylog/__init__.py
CHANGED
|
@@ -7,7 +7,7 @@ Created on 2025/5/14 15:37
|
|
|
7
7
|
---------------------------------------------
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from .core import trace, debug, info, warning, error, critical, update_config
|
|
10
|
+
from .core import trace, debug, info, warning, error, critical, update_config, get_logger
|
|
11
11
|
|
|
12
12
|
__all__ = [
|
|
13
13
|
"trace",
|
|
@@ -16,5 +16,6 @@ __all__ = [
|
|
|
16
16
|
"warning",
|
|
17
17
|
"error",
|
|
18
18
|
"critical",
|
|
19
|
-
"update_config"
|
|
19
|
+
"update_config",
|
|
20
|
+
"get_logger",
|
|
20
21
|
]
|
ylog/core.py
CHANGED
|
@@ -27,21 +27,14 @@ class _Logger:
|
|
|
27
27
|
5. 全局异常捕获:自动记录未处理异常
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
|
-
_instance = None
|
|
31
|
-
_lock = Lock()
|
|
32
|
-
|
|
33
30
|
def __new__(cls, *args, **kwargs):
|
|
34
|
-
"""
|
|
35
|
-
|
|
36
|
-
with cls._lock:
|
|
37
|
-
if not cls._instance:
|
|
38
|
-
cls._instance = super().__new__(cls)
|
|
39
|
-
return cls._instance
|
|
31
|
+
"""移除单例限制,允许自由创建多个实例"""
|
|
32
|
+
return super().__new__(cls)
|
|
40
33
|
|
|
41
34
|
def __init__(
|
|
42
35
|
self,
|
|
43
36
|
log_dir: str = "logs",
|
|
44
|
-
app_name: str =
|
|
37
|
+
app_name: str|None = None,
|
|
45
38
|
retention_days: int = 7,
|
|
46
39
|
error_retention_days: int = 30,
|
|
47
40
|
enable_console: bool = True,
|
|
@@ -111,27 +104,48 @@ class _Logger:
|
|
|
111
104
|
"TRACE": {"level": "TRACE", "retention": f"{retention_days} days", "rotation": "daily"},
|
|
112
105
|
"DEBUG": {"level": "DEBUG", "retention": f"{retention_days} days", "rotation": "daily"},
|
|
113
106
|
"INFO": {"level": "INFO", "retention": f"{retention_days} days", "rotation": "daily"},
|
|
114
|
-
"WARNING": {"level": "WARNING", "retention": f"{
|
|
107
|
+
"WARNING": {"level": "WARNING", "retention": f"{error_retention_days} days", "rotation": "daily"},
|
|
115
108
|
"ERROR": {"level": "ERROR", "retention": f"{error_retention_days} days", "rotation": "daily"},
|
|
116
109
|
"CRITICAL": {"level": "CRITICAL", "retention": f"{error_retention_days} days", "rotation": "daily"}
|
|
117
110
|
}
|
|
118
111
|
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
112
|
+
# 错误级别以上的日志
|
|
113
|
+
error_levels = ("WARNING", "ERROR", "CRITICAL")
|
|
114
|
+
_error_log_file = f"{self.app_name}_{{time:YYYY-MM-DD}}.err.log" if self.app_name is not None else f"{{time:YYYY-MM-DD}}.err.log"
|
|
115
|
+
error_log_file = self.log_dir / _error_log_file
|
|
116
|
+
info_levels = ("INFO", "DEBUG", "TRACE")
|
|
117
|
+
_info_log_file = f"{self.app_name}_{{time:YYYY-MM-DD}}.log" if self.app_name is not None else f"{{time:YYYY-MM-DD}}.log"
|
|
118
|
+
info_log_file = self.log_dir / _info_log_file
|
|
119
|
+
# 错误级别以上的日志
|
|
120
|
+
logger.add(
|
|
121
|
+
str(error_log_file),
|
|
122
|
+
level="ERROR",
|
|
123
|
+
format=common_format,
|
|
124
|
+
rotation=levels["ERROR"]["rotation"],
|
|
125
|
+
retention=levels["ERROR"]["retention"],
|
|
126
|
+
compression="zip",
|
|
127
|
+
backtrace=True,
|
|
128
|
+
diagnose=self.debug_mode,
|
|
129
|
+
# enqueue=True, # 异步写入
|
|
130
|
+
filter=lambda record: record["level"].name in error_levels,
|
|
131
|
+
catch=True # 捕获格式化异常
|
|
132
|
+
)
|
|
133
|
+
# 错误级别以下的日志
|
|
134
|
+
logger.add(
|
|
135
|
+
str(info_log_file),
|
|
136
|
+
level="INFO",
|
|
137
|
+
format=common_format,
|
|
138
|
+
rotation=levels["INFO"]["rotation"],
|
|
139
|
+
retention=levels["INFO"]["retention"],
|
|
140
|
+
compression="zip",
|
|
141
|
+
backtrace=True,
|
|
142
|
+
diagnose=self.debug_mode,
|
|
143
|
+
# enqueue=True, # 异步写入
|
|
144
|
+
filter=lambda record: record["level"].name in info_levels,
|
|
145
|
+
catch=True # 捕获格式化异常
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
|
|
135
149
|
|
|
136
150
|
def _setup_global_exception_handling(self):
|
|
137
151
|
"""配置全局异常捕获"""
|
|
@@ -217,7 +231,7 @@ class _Logger:
|
|
|
217
231
|
self._setup_file_logging(self._retention_days, self._error_retention_days)
|
|
218
232
|
|
|
219
233
|
# 初始化默认实例
|
|
220
|
-
_default_logger = _Logger(
|
|
234
|
+
_default_logger = _Logger()
|
|
221
235
|
|
|
222
236
|
# 将日志方法绑定到模块级别
|
|
223
237
|
trace = _default_logger.trace
|
|
@@ -227,3 +241,22 @@ warning = _default_logger.warning
|
|
|
227
241
|
error = _default_logger.error
|
|
228
242
|
critical = _default_logger.critical
|
|
229
243
|
update_config = _default_logger.update_config
|
|
244
|
+
|
|
245
|
+
def get_logger(app_name: str,
|
|
246
|
+
log_dir: str = "logs",
|
|
247
|
+
retention_days: int = 7,
|
|
248
|
+
error_retention_days: int = 30,
|
|
249
|
+
enable_console: bool = True,
|
|
250
|
+
enable_file: bool = True,
|
|
251
|
+
debug_mode: bool = False):
|
|
252
|
+
"""获取指定应用的日志实例"""
|
|
253
|
+
return _Logger(
|
|
254
|
+
app_name=app_name,
|
|
255
|
+
log_dir=log_dir,
|
|
256
|
+
retention_days=retention_days,
|
|
257
|
+
error_retention_days=error_retention_days,
|
|
258
|
+
enable_console=enable_console,
|
|
259
|
+
enable_file=enable_file,
|
|
260
|
+
debug_mode=debug_mode
|
|
261
|
+
)
|
|
262
|
+
|
ycat/dtype.py
DELETED
|
@@ -1,389 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
---------------------------------------------
|
|
4
|
-
Created on 2024/11/4 下午1:20
|
|
5
|
-
@author: ZhangYundi
|
|
6
|
-
@email: yundi.xxii@outlook.com
|
|
7
|
-
---------------------------------------------
|
|
8
|
-
"""
|
|
9
|
-
import functools
|
|
10
|
-
import re
|
|
11
|
-
from typing import Any
|
|
12
|
-
import pyarrow as pa
|
|
13
|
-
import re # 正则解析 Decimal 类型
|
|
14
|
-
|
|
15
|
-
from polars._typing import PolarsDataType
|
|
16
|
-
from polars.datatypes import (
|
|
17
|
-
Binary,
|
|
18
|
-
Boolean,
|
|
19
|
-
Date,
|
|
20
|
-
Datetime,
|
|
21
|
-
Decimal,
|
|
22
|
-
Duration,
|
|
23
|
-
Float32,
|
|
24
|
-
Float64,
|
|
25
|
-
Int8,
|
|
26
|
-
Int16,
|
|
27
|
-
Int32,
|
|
28
|
-
Int64,
|
|
29
|
-
List,
|
|
30
|
-
Null,
|
|
31
|
-
String,
|
|
32
|
-
Time,
|
|
33
|
-
UInt8,
|
|
34
|
-
UInt16,
|
|
35
|
-
UInt32,
|
|
36
|
-
UInt64,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
@functools.lru_cache(8)
|
|
41
|
-
def integer_dtype_from_nbits(
|
|
42
|
-
bits: int,
|
|
43
|
-
*,
|
|
44
|
-
unsigned: bool,
|
|
45
|
-
default: PolarsDataType | None = None,
|
|
46
|
-
) -> PolarsDataType | None:
|
|
47
|
-
"""
|
|
48
|
-
Return matching Polars integer dtype from num bits and signed/unsigned flag.
|
|
49
|
-
|
|
50
|
-
Examples
|
|
51
|
-
--------
|
|
52
|
-
>>> integer_dtype_from_nbits(8, unsigned=False)
|
|
53
|
-
Int8
|
|
54
|
-
>>> integer_dtype_from_nbits(32, unsigned=True)
|
|
55
|
-
UInt32
|
|
56
|
-
"""
|
|
57
|
-
dtype = {
|
|
58
|
-
(8, False): Int8,
|
|
59
|
-
(8, True): UInt8,
|
|
60
|
-
(16, False): Int16,
|
|
61
|
-
(16, True): UInt16,
|
|
62
|
-
(32, False): Int32,
|
|
63
|
-
(32, True): UInt32,
|
|
64
|
-
(64, False): Int64,
|
|
65
|
-
(64, True): UInt64,
|
|
66
|
-
}.get((bits, unsigned), None)
|
|
67
|
-
|
|
68
|
-
if dtype is None and default is not None:
|
|
69
|
-
return default
|
|
70
|
-
return dtype
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def timeunit_from_precision(precision: int | str | None) -> str | None:
|
|
74
|
-
"""
|
|
75
|
-
Return `time_unit` from integer precision value.
|
|
76
|
-
|
|
77
|
-
Examples
|
|
78
|
-
--------
|
|
79
|
-
>>> timeunit_from_precision(3)
|
|
80
|
-
'ms'
|
|
81
|
-
>>> timeunit_from_precision(5)
|
|
82
|
-
'us'
|
|
83
|
-
>>> timeunit_from_precision(7)
|
|
84
|
-
'ns'
|
|
85
|
-
"""
|
|
86
|
-
from math import ceil
|
|
87
|
-
|
|
88
|
-
if not precision:
|
|
89
|
-
return None
|
|
90
|
-
elif isinstance(precision, str):
|
|
91
|
-
if precision.isdigit():
|
|
92
|
-
precision = int(precision)
|
|
93
|
-
elif (precision := precision.lower()) in ("s", "ms", "us", "ns"):
|
|
94
|
-
return "ms" if precision == "s" else precision
|
|
95
|
-
try:
|
|
96
|
-
n = min(max(3, int(ceil(precision / 3)) * 3), 9) # type: ignore[operator]
|
|
97
|
-
return {3: "ms", 6: "us", 9: "ns"}.get(n)
|
|
98
|
-
except TypeError:
|
|
99
|
-
return None
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def infer_dtype_from_database_typename(
|
|
103
|
-
value: str,
|
|
104
|
-
*,
|
|
105
|
-
raise_unmatched: bool = True,
|
|
106
|
-
) -> PolarsDataType | None:
|
|
107
|
-
"""
|
|
108
|
-
Attempt to infer Polars dtype from database cursor `type_code` string value.
|
|
109
|
-
|
|
110
|
-
Examples
|
|
111
|
-
--------
|
|
112
|
-
>>> infer_dtype_from_database_typename("INT2")
|
|
113
|
-
Int16
|
|
114
|
-
>>> infer_dtype_from_database_typename("NVARCHAR")
|
|
115
|
-
String
|
|
116
|
-
>>> infer_dtype_from_database_typename("NUMERIC(10,2)")
|
|
117
|
-
Decimal(precision=10, scale=2)
|
|
118
|
-
>>> infer_dtype_from_database_typename("TIMESTAMP WITHOUT TZ")
|
|
119
|
-
Datetime(time_unit='us', time_zone=None)
|
|
120
|
-
"""
|
|
121
|
-
dtype: PolarsDataType | None = None
|
|
122
|
-
|
|
123
|
-
# normalise string name/case (eg: 'IntegerType' -> 'INTEGER')
|
|
124
|
-
original_value = value
|
|
125
|
-
value = value.upper().replace("TYPE", "")
|
|
126
|
-
|
|
127
|
-
# extract optional type modifier (eg: 'VARCHAR(64)' -> '64')
|
|
128
|
-
if re.search(r"\([\w,: ]+\)$", value):
|
|
129
|
-
modifier = value[value.find("(") + 1: -1]
|
|
130
|
-
value = value.split("(")[0]
|
|
131
|
-
# Nullable type
|
|
132
|
-
if value.upper() == "NULLABLE":
|
|
133
|
-
return infer_dtype_from_database_typename(modifier)
|
|
134
|
-
elif (
|
|
135
|
-
not value.startswith(("<", ">")) and re.search(r"\[[\w,\]\[: ]+]$", value)
|
|
136
|
-
) or value.endswith(("[S]", "[MS]", "[US]", "[NS]")):
|
|
137
|
-
modifier = value[value.find("[") + 1: -1]
|
|
138
|
-
value = value.split("[")[0]
|
|
139
|
-
else:
|
|
140
|
-
modifier = ""
|
|
141
|
-
|
|
142
|
-
# array dtypes
|
|
143
|
-
array_aliases = ("ARRAY", "LIST", "[]")
|
|
144
|
-
if value.endswith(array_aliases) or value.startswith(array_aliases):
|
|
145
|
-
for a in array_aliases:
|
|
146
|
-
value = value.replace(a, "", 1) if value else ""
|
|
147
|
-
|
|
148
|
-
nested: PolarsDataType | None = None
|
|
149
|
-
if not value and modifier:
|
|
150
|
-
nested = infer_dtype_from_database_typename(
|
|
151
|
-
value=modifier,
|
|
152
|
-
raise_unmatched=False,
|
|
153
|
-
)
|
|
154
|
-
else:
|
|
155
|
-
if inner_value := infer_dtype_from_database_typename(
|
|
156
|
-
value[1:-1]
|
|
157
|
-
if (value[0], value[-1]) == ("<", ">")
|
|
158
|
-
else re.sub(r"\W", "", re.sub(r"\WOF\W", "", value)),
|
|
159
|
-
raise_unmatched=False,
|
|
160
|
-
):
|
|
161
|
-
nested = inner_value
|
|
162
|
-
elif modifier:
|
|
163
|
-
nested = infer_dtype_from_database_typename(
|
|
164
|
-
value=modifier,
|
|
165
|
-
raise_unmatched=False,
|
|
166
|
-
)
|
|
167
|
-
if nested:
|
|
168
|
-
dtype = List(nested)
|
|
169
|
-
|
|
170
|
-
# float dtypes
|
|
171
|
-
elif value.startswith("FLOAT") or ("DOUBLE" in value) or (value == "REAL"):
|
|
172
|
-
dtype = (
|
|
173
|
-
Float32
|
|
174
|
-
if value == "FLOAT4"
|
|
175
|
-
or (value.endswith(("16", "32")) or (modifier in ("16", "32")))
|
|
176
|
-
else Float64
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
# integer dtypes
|
|
180
|
-
elif ("INTERVAL" not in value) and (
|
|
181
|
-
value.startswith(("INT", "UINT", "UNSIGNED"))
|
|
182
|
-
or value.endswith(("INT", "SERIAL"))
|
|
183
|
-
or ("INTEGER" in value)
|
|
184
|
-
or value == "ROWID"
|
|
185
|
-
):
|
|
186
|
-
sz: Any
|
|
187
|
-
if "LARGE" in value or value.startswith("BIG") or value == "INT8":
|
|
188
|
-
sz = 64
|
|
189
|
-
elif "MEDIUM" in value or value in ("INT4", "SERIAL"):
|
|
190
|
-
sz = 32
|
|
191
|
-
elif "SMALL" in value or value == "INT2":
|
|
192
|
-
sz = 16
|
|
193
|
-
elif "TINY" in value:
|
|
194
|
-
sz = 8
|
|
195
|
-
else:
|
|
196
|
-
sz = None
|
|
197
|
-
|
|
198
|
-
sz = modifier if (not sz and modifier) else sz
|
|
199
|
-
if not isinstance(sz, int):
|
|
200
|
-
sz = int(sz) if isinstance(sz, str) and sz.isdigit() else None
|
|
201
|
-
if (
|
|
202
|
-
("U" in value and "MEDIUM" not in value)
|
|
203
|
-
or ("UNSIGNED" in value)
|
|
204
|
-
or value == "ROWID"
|
|
205
|
-
):
|
|
206
|
-
dtype = integer_dtype_from_nbits(sz, unsigned=True, default=UInt64)
|
|
207
|
-
else:
|
|
208
|
-
dtype = integer_dtype_from_nbits(sz, unsigned=False, default=Int64)
|
|
209
|
-
|
|
210
|
-
# number types (note: 'number' alone is not that helpful and requires refinement)
|
|
211
|
-
elif "NUMBER" in value and "CARDINAL" in value:
|
|
212
|
-
dtype = UInt64
|
|
213
|
-
|
|
214
|
-
# decimal dtypes
|
|
215
|
-
elif (is_dec := ("DECIMAL" in value)) or ("NUMERIC" in value):
|
|
216
|
-
if "," in modifier:
|
|
217
|
-
prec, scale = modifier.split(",")
|
|
218
|
-
dtype = Decimal(int(prec), int(scale))
|
|
219
|
-
else:
|
|
220
|
-
dtype = Decimal if is_dec else Float64
|
|
221
|
-
|
|
222
|
-
# string dtypes
|
|
223
|
-
elif (
|
|
224
|
-
any(tp in value for tp in ("VARCHAR", "STRING", "TEXT", "UNICODE"))
|
|
225
|
-
or value.startswith(("STR", "CHAR", "BPCHAR", "NCHAR", "UTF"))
|
|
226
|
-
or value.endswith(("_UTF8", "_UTF16", "_UTF32"))
|
|
227
|
-
):
|
|
228
|
-
dtype = String
|
|
229
|
-
|
|
230
|
-
# binary dtypes
|
|
231
|
-
elif value in ("BYTEA", "BYTES", "BLOB", "CLOB", "BINARY"):
|
|
232
|
-
dtype = Binary
|
|
233
|
-
|
|
234
|
-
# boolean dtypes
|
|
235
|
-
elif value.startswith("BOOL"):
|
|
236
|
-
dtype = Boolean
|
|
237
|
-
|
|
238
|
-
# null dtype; odd, but valid
|
|
239
|
-
elif value == "NULL":
|
|
240
|
-
dtype = Null
|
|
241
|
-
|
|
242
|
-
# temporal dtypes
|
|
243
|
-
elif value.startswith(("DATETIME", "TIMESTAMP")) and not (value.endswith("[D]")):
|
|
244
|
-
if any((tz in value.replace(" ", "")) for tz in ("TZ", "TIMEZONE")):
|
|
245
|
-
if "WITHOUT" not in value:
|
|
246
|
-
return None # there's a timezone, but we don't know what it is
|
|
247
|
-
unit = timeunit_from_precision(modifier) if modifier else "us"
|
|
248
|
-
dtype = Datetime(time_unit=(unit or "us")) # type: ignore[arg-type]
|
|
249
|
-
else:
|
|
250
|
-
value = re.sub(r"\d", "", value)
|
|
251
|
-
if value in ("INTERVAL", "TIMEDELTA", "DURATION"):
|
|
252
|
-
dtype = Duration
|
|
253
|
-
elif value == "DATE":
|
|
254
|
-
dtype = Date
|
|
255
|
-
elif value == "TIME":
|
|
256
|
-
dtype = Time
|
|
257
|
-
|
|
258
|
-
if not dtype and raise_unmatched:
|
|
259
|
-
msg = f"cannot infer dtype from {original_value!r} string value"
|
|
260
|
-
raise ValueError(msg)
|
|
261
|
-
|
|
262
|
-
return dtype
|
|
263
|
-
|
|
264
|
-
CLICKHOUSE_TO_ARROW_TYPE = {
|
|
265
|
-
# 整数类型
|
|
266
|
-
'Int8': pa.int8(),
|
|
267
|
-
'Int16': pa.int16(),
|
|
268
|
-
'Int32': pa.int32(),
|
|
269
|
-
'Int64': pa.int64(),
|
|
270
|
-
'UInt8': pa.uint8(),
|
|
271
|
-
'UInt16': pa.uint16(),
|
|
272
|
-
'UInt32': pa.uint32(),
|
|
273
|
-
'UInt64': pa.uint64(),
|
|
274
|
-
|
|
275
|
-
# 浮点类型
|
|
276
|
-
'Float32': pa.float32(),
|
|
277
|
-
'Float64': pa.float64(),
|
|
278
|
-
|
|
279
|
-
# 字符串类型
|
|
280
|
-
'String': pa.string(),
|
|
281
|
-
'FixedString': pa.string(), # Arrow 不区分固定长度和动态长度字符串
|
|
282
|
-
|
|
283
|
-
# 日期和时间类型
|
|
284
|
-
'Date': pa.date32(), # ClickHouse 的 Date 是 32 位(天)
|
|
285
|
-
'Date32': pa.date32(),
|
|
286
|
-
'DateTime': pa.timestamp('s'), # ClickHouse DateTime 精度为秒
|
|
287
|
-
'DateTime64': pa.timestamp('ms'), # 默认映射为毫秒精度(可根据需求调整)
|
|
288
|
-
'UUID': pa.binary(16), # UUID 是 16 字节的二进制
|
|
289
|
-
|
|
290
|
-
# 布尔类型
|
|
291
|
-
'Boolean': pa.bool_(),
|
|
292
|
-
|
|
293
|
-
# 数组类型(嵌套类型)
|
|
294
|
-
'Array(Int8)': pa.list_(pa.int8()),
|
|
295
|
-
'Array(Int16)': pa.list_(pa.int16()),
|
|
296
|
-
'Array(Int32)': pa.list_(pa.int32()),
|
|
297
|
-
'Array(Int64)': pa.list_(pa.int64()),
|
|
298
|
-
'Array(UInt8)': pa.list_(pa.uint8()),
|
|
299
|
-
'Array(UInt16)': pa.list_(pa.uint16()),
|
|
300
|
-
'Array(UInt32)': pa.list_(pa.uint32()),
|
|
301
|
-
'Array(UInt64)': pa.list_(pa.uint64()),
|
|
302
|
-
'Array(Float32)': pa.list_(pa.float32()),
|
|
303
|
-
'Array(Float64)': pa.list_(pa.float64()),
|
|
304
|
-
'Array(String)': pa.list_(pa.string()),
|
|
305
|
-
'Array(Date)': pa.list_(pa.date32()),
|
|
306
|
-
'Array(DateTime)': pa.list_(pa.timestamp('s')),
|
|
307
|
-
|
|
308
|
-
# 嵌套类型(元组、枚举等)
|
|
309
|
-
# 注意:Arrow 不直接支持 Tuple,通常需要转换为 Struct
|
|
310
|
-
'Tuple': pa.struct([]), # 需要动态定义每个字段的类型
|
|
311
|
-
# 枚举类型
|
|
312
|
-
'Enum8': pa.string(), # 通常映射为字符串
|
|
313
|
-
'Enum16': pa.string(),
|
|
314
|
-
|
|
315
|
-
# Map 类型
|
|
316
|
-
'Map': pa.map_(pa.string(), pa.string()), # 默认键值对是字符串(可根据需求调整)
|
|
317
|
-
|
|
318
|
-
# Nullable 类型(ClickHouse 的 Nullable 包装类型)
|
|
319
|
-
'Nullable(Int8)': pa.int8(),
|
|
320
|
-
'Nullable(Int16)': pa.int16(),
|
|
321
|
-
'Nullable(Int32)': pa.int32(),
|
|
322
|
-
'Nullable(Int64)': pa.int64(),
|
|
323
|
-
'Nullable(UInt8)': pa.uint8(),
|
|
324
|
-
'Nullable(UInt16)': pa.uint16(),
|
|
325
|
-
'Nullable(UInt32)': pa.uint32(),
|
|
326
|
-
'Nullable(UInt64)': pa.uint64(),
|
|
327
|
-
'Nullable(Float32)': pa.float32(),
|
|
328
|
-
'Nullable(Float64)': pa.float64(),
|
|
329
|
-
'Nullable(String)': pa.string(),
|
|
330
|
-
'Nullable(Date)': pa.date32(),
|
|
331
|
-
'Nullable(DateTime)': pa.timestamp('s'),
|
|
332
|
-
'Nullable(UUID)': pa.binary(16),
|
|
333
|
-
}
|
|
334
|
-
|
|
335
|
-
def map_clickhouse_decimal(ch_type: str) -> pa.DataType:
|
|
336
|
-
"""
|
|
337
|
-
映射 ClickHouse 的 Decimal 类型到 Arrow 的 Decimal 类型
|
|
338
|
-
:param ch_type: ClickHouse 的 Decimal 类型描述,例如 'Decimal(10, 2)' 或 'Decimal128(38)'
|
|
339
|
-
:return: 对应的 Arrow Decimal 类型
|
|
340
|
-
"""
|
|
341
|
-
# 匹配 ClickHouse 的 Decimal(p, s) 格式
|
|
342
|
-
decimal_match = re.match(r"Decimal(?:32|64|128)?\((\d+),\s*(\d+)\)", ch_type)
|
|
343
|
-
if decimal_match:
|
|
344
|
-
precision, scale = map(int, decimal_match.groups())
|
|
345
|
-
return pa.decimal128(precision, scale)
|
|
346
|
-
|
|
347
|
-
# 匹配 ClickHouse 的 Decimal(p) 格式,默认 scale 为 0
|
|
348
|
-
decimal_match_no_scale = re.match(r"Decimal(?:32|64|128)?\((\d+)\)", ch_type)
|
|
349
|
-
if decimal_match_no_scale:
|
|
350
|
-
precision = int(decimal_match_no_scale.group(1))
|
|
351
|
-
return pa.decimal128(precision, 0)
|
|
352
|
-
|
|
353
|
-
# 如果不匹配,抛出异常
|
|
354
|
-
raise ValueError(f"Unsupported ClickHouse Decimal type: {ch_type}")
|
|
355
|
-
|
|
356
|
-
def map_clickhouse_to_arrow(ch_type: str) -> pa.DataType:
|
|
357
|
-
"""
|
|
358
|
-
动态映射 ClickHouse 类型到 Arrow 类型
|
|
359
|
-
"""
|
|
360
|
-
# 基础类型直接映射
|
|
361
|
-
if ch_type in CLICKHOUSE_TO_ARROW_TYPE:
|
|
362
|
-
return CLICKHOUSE_TO_ARROW_TYPE[ch_type]
|
|
363
|
-
|
|
364
|
-
# Decimal 类型处理
|
|
365
|
-
if ch_type.startswith("Decimal"):
|
|
366
|
-
return map_clickhouse_decimal(ch_type)
|
|
367
|
-
|
|
368
|
-
# 动态处理 Array 类型
|
|
369
|
-
if ch_type.startswith('Array('):
|
|
370
|
-
inner_type = ch_type[6:-1] # 提取 Array 内的类型
|
|
371
|
-
return pa.list_(map_clickhouse_to_arrow(inner_type))
|
|
372
|
-
|
|
373
|
-
# 动态处理 Nullable 类型
|
|
374
|
-
if ch_type.startswith('Nullable('):
|
|
375
|
-
inner_type = ch_type[9:-1] # 提取 Nullable 内的类型
|
|
376
|
-
return map_clickhouse_to_arrow(inner_type)
|
|
377
|
-
|
|
378
|
-
# 动态处理 Tuple 类型
|
|
379
|
-
if ch_type.startswith('Tuple('):
|
|
380
|
-
inner_types = ch_type[6:-1].split(',') # 提取 Tuple 内的字段类型
|
|
381
|
-
return pa.struct([('field' + str(i), map_clickhouse_to_arrow(t.strip())) for i, t in enumerate(inner_types)])
|
|
382
|
-
|
|
383
|
-
# 动态处理 Map 类型
|
|
384
|
-
if ch_type.startswith('Map('):
|
|
385
|
-
key_type, value_type = ch_type[4:-1].split(',')
|
|
386
|
-
return pa.map_(map_clickhouse_to_arrow(key_type.strip()), map_clickhouse_to_arrow(value_type.strip()))
|
|
387
|
-
|
|
388
|
-
raise ValueError(f"Unsupported ClickHouse type: {ch_type}")
|
|
389
|
-
|
ycat/yck.py
DELETED
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
"""
|
|
3
|
-
---------------------------------------------
|
|
4
|
-
Created on 2024/11/4 上午9:01
|
|
5
|
-
@author: ZhangYundi
|
|
6
|
-
@email: yundi.xxii@outlook.com
|
|
7
|
-
---------------------------------------------
|
|
8
|
-
"""
|
|
9
|
-
from random import randint
|
|
10
|
-
|
|
11
|
-
import pandas as pd
|
|
12
|
-
import polars
|
|
13
|
-
import polars as pl
|
|
14
|
-
import pyarrow as pa
|
|
15
|
-
from clickhouse_driver import Client
|
|
16
|
-
|
|
17
|
-
from . import dtype
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def connect(urls: list[str], user: str, password: str) -> Client:
|
|
21
|
-
"""
|
|
22
|
-
连接clickhouse服务器, 支持集群
|
|
23
|
-
Parameters
|
|
24
|
-
----------
|
|
25
|
-
urls: List[str]
|
|
26
|
-
["host1:port1", "host2:port2", "host3:port3"...]
|
|
27
|
-
user: str
|
|
28
|
-
用户名
|
|
29
|
-
password: str
|
|
30
|
-
密码
|
|
31
|
-
Returns
|
|
32
|
-
-------
|
|
33
|
-
client: Client
|
|
34
|
-
ClickHouse 数据库连接客户端,必须是一个有效的 `clickhouse_driver.Client` 实例
|
|
35
|
-
"""
|
|
36
|
-
i = randint(0, len(urls) - 1)
|
|
37
|
-
url_ini = urls[i]
|
|
38
|
-
[host, port] = url_ini.split(":")
|
|
39
|
-
return Client(host, port=port, round_robin=True, alt_hosts=",".join(urls), user=user, password=password)
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def query_pandas(sql, conn) -> pd.DataFrame:
|
|
43
|
-
"""
|
|
44
|
-
请求ck,返回 pandas.DataFrame
|
|
45
|
-
Parameters
|
|
46
|
-
----------
|
|
47
|
-
sql: str
|
|
48
|
-
查询语句
|
|
49
|
-
conn: Client
|
|
50
|
-
ClickHouse 数据库连接客户端,必须是一个有效的 `clickhouse_driver.Client` 实例
|
|
51
|
-
Returns
|
|
52
|
-
-------
|
|
53
|
-
pandas.DataFrame
|
|
54
|
-
包含查询结果的 Pandas DataFrame。如果查询没有返回任何数据,则
|
|
55
|
-
返回一个空的 DataFrame 或者 None
|
|
56
|
-
"""
|
|
57
|
-
return conn.query_dataframe(sql)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
def query_polars(sql, conn) -> pl.DataFrame:
|
|
61
|
-
"""
|
|
62
|
-
请求ck,返回 polars.DataFrame
|
|
63
|
-
Parameters
|
|
64
|
-
----------
|
|
65
|
-
sql: str
|
|
66
|
-
查询语句
|
|
67
|
-
conn: Client
|
|
68
|
-
ClickHouse 数据库连接客户端,必须是一个有效的 `clickhouse_driver.Client` 实例。
|
|
69
|
-
Returns
|
|
70
|
-
-------
|
|
71
|
-
polars.DataFrame
|
|
72
|
-
包含查询结果的 Polars DataFrame。如果查询没有返回任何数据,则
|
|
73
|
-
返回一个空的 DataFrame 或者 None
|
|
74
|
-
"""
|
|
75
|
-
data, columns = conn.execute(sql, columnar=True, with_column_types=True)
|
|
76
|
-
# columns = {name: dtype.infer_dtype_from_database_typename(type_) for name, type_ in columns}
|
|
77
|
-
if len(data) < 1:
|
|
78
|
-
columns = {name: dtype.infer_dtype_from_database_typename(type_) for name, type_ in columns}
|
|
79
|
-
return pl.DataFrame(schema=columns)
|
|
80
|
-
columns = {name: dtype.map_clickhouse_to_arrow(type_) for name, type_ in columns}
|
|
81
|
-
# 构造 Arrow 表(逐列传递数据和类型)
|
|
82
|
-
arrow_table = pa.Table.from_arrays(
|
|
83
|
-
[pa.array(col, type=col_type) for col, col_type in zip(data, columns.values())],
|
|
84
|
-
schema=pa.schema(columns))
|
|
85
|
-
|
|
86
|
-
# 从 Arrow 表构造 Polars DataFrame
|
|
87
|
-
return pl.from_arrow(arrow_table)
|
ygo-1.0.9.dist-info/RECORD
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
qdf/__init__.py,sha256=XwH17ae6kX5Grhb_odgDqkNf6N-ambTVi3LoGzq1Fmc,7519
|
|
2
|
-
qdf/errors.py,sha256=lJhhjDRdQOOKUFGlLQ9ELK4AexXBwYQSYus_V-kc5K8,1180
|
|
3
|
-
qdf/expr.py,sha256=ck_BHMCV29Q8-szci1_v4ud964QI7JoRRcmA0ppupsc,8454
|
|
4
|
-
qdf/qdf.py,sha256=XcnGyyfuRY1HqaG56kC5tB6psrIXqo9QVQtgH3mhips,7322
|
|
5
|
-
qdf/udf/__init__.py,sha256=DdrSGaCB__5C1YL0vd_5rjIB3KLrAKn3h3k9k50L0jA,313
|
|
6
|
-
qdf/udf/base_udf.py,sha256=6VDaCIGNLJxZ7UsoIDWtTH6PzUDj89b8FiwN-TEat2g,3437
|
|
7
|
-
qdf/udf/cs_udf.py,sha256=HT3EKBwAhOxOFDQnpfwb4YcMTT3-lqFXkdysdn5_FI4,3179
|
|
8
|
-
qdf/udf/d_udf.py,sha256=L9mkX6yDpQPwXvQTAebDepjEkirCqur1DfV2Fnl8KA0,5352
|
|
9
|
-
qdf/udf/ind_udf.py,sha256=hDCKfcLFCgIhdC9dQ5GYxLemZaOE6K7kQyAnjUrwePM,6482
|
|
10
|
-
qdf/udf/ts_udf.py,sha256=uUuZnKMY-V_uInP0nsBMblDpxY3ld3EwvrXTwWMqeig,5410
|
|
11
|
-
ycat/__init__.py,sha256=zBMOFStzKSt_5jw4af6YFtPD5Svr8fJlZkP_AdUjCoA,554
|
|
12
|
-
ycat/client.py,sha256=0zD9BP1TH0f0D_N7sgQ8bgYVKb9hZZRxDU0dkun-G9w,3933
|
|
13
|
-
ycat/dtype.py,sha256=mRGLDe_Ho6-tDsoj5wwrAzozEoIYCAHGKdpRqgBfUcI,12577
|
|
14
|
-
ycat/parse.py,sha256=9Kgr33nHYC96TGpZs98PAu0cbK-FrR0pfuf8lfD647I,2289
|
|
15
|
-
ycat/yck.py,sha256=FlGMBuKEngB4TwFXMp4P3dLg9IfFmUg3eDqXzQ0kQoI,2738
|
|
16
|
-
ygo/__init__.py,sha256=FMN06Tfa8_oV26eklBZCtGTyHZ6MghHxHj4PS_FSXCA,222
|
|
17
|
-
ygo/exceptions.py,sha256=4Kd92kpwpsXHJJkSv4OqcN--PEEvIGGvDDgOOsk68gg,385
|
|
18
|
-
ygo/ygo.py,sha256=vCMUur_41yY0QB4gj8K5wBZHql_cbmANhI8QwPRCTmo,11613
|
|
19
|
-
ygo-1.0.9.dist-info/licenses/LICENSE,sha256=6AKUWQ1xe-jwPSFv_H6FMQLNNWb7AYqzuEUTwlP2S8M,1067
|
|
20
|
-
ylog/__init__.py,sha256=2sIp4PHNoQMCi0QtIarTI4raACd7SdRHNY7fY5hKYwc,397
|
|
21
|
-
ylog/core.py,sha256=jmz9JhklbVCQz-zahEXV6P-LEHnqU6opnY4CUEyo8Ss,7924
|
|
22
|
-
ygo-1.0.9.dist-info/METADATA,sha256=yXziZsrlRLbXbsLKEfo6qL6g99lT2udyAElw6T6_rAY,2076
|
|
23
|
-
ygo-1.0.9.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
24
|
-
ygo-1.0.9.dist-info/top_level.txt,sha256=FGbsOtsHgqWzZ9mGRSTCg0pLZEErR1lq5TFQSy2TL1w,18
|
|
25
|
-
ygo-1.0.9.dist-info/RECORD,,
|
{qdf → ycat/qdf}/errors.py
RENAMED
|
File without changes
|
{qdf → ycat/qdf}/expr.py
RENAMED
|
File without changes
|
{qdf → ycat/qdf}/qdf.py
RENAMED
|
File without changes
|
{qdf → ycat/qdf}/udf/__init__.py
RENAMED
|
File without changes
|
{qdf → ycat/qdf}/udf/base_udf.py
RENAMED
|
File without changes
|
{qdf → ycat/qdf}/udf/cs_udf.py
RENAMED
|
File without changes
|
{qdf → ycat/qdf}/udf/d_udf.py
RENAMED
|
File without changes
|
{qdf → ycat/qdf}/udf/ind_udf.py
RENAMED
|
File without changes
|
{qdf → ycat/qdf}/udf/ts_udf.py
RENAMED
|
File without changes
|
|
File without changes
|