ygo 1.0.11__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ygo might be problematic. Click here for more details.

@@ -0,0 +1,160 @@
1
+ Metadata-Version: 2.4
2
+ Name: ygo
3
+ Version: 1.1.0
4
+ Project-URL: homepage, https://github.com/link-yundi/ygo
5
+ Project-URL: repository, https://github.com/link-yundi/ygo
6
+ Requires-Python: >=3.12
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: joblib>=1.5.0
10
+ Requires-Dist: loguru>=0.7.3
11
+ Requires-Dist: tqdm>=4.67.1
12
+ Dynamic: license-file
13
+
14
+ # ygo
15
+ 一个轻量级 Python 工具包,底层基于 joblib 和 tqdm 、loguru 实现,支持
16
+ - 并发执行(带进度条)
17
+ - 延迟调用
18
+ - 链式绑定参数
19
+ - 函数信息获取
20
+ - 模块/函数动态加载...
21
+ - 并结合 ylog 提供日志记录能力
22
+
23
+ ### 安装
24
+ ```shell
25
+ pip install -U ygo
26
+ ```
27
+
28
+ ### 🧰 功能概览
29
+
30
+ | 模块 | 功能 |
31
+ | :----- | :----------------------------------------------------------- |
32
+ | `ygo` | 支持并发执行(带进度条)、延迟调用、函数信息获取以及模块/函数动态加载等功能 |
33
+ | `ylog` | 日志模块,提供统一的日志输出接口 |
34
+
35
+ ### 示例
36
+
37
+ ```
38
+ ├── a
39
+ │   ├── __init__.py
40
+ │   └── b
41
+ │   ├── __init__.py
42
+ │   └── c.py
43
+ └── test.py
44
+
45
+ c.py 中定义了目标函数
46
+ def test_fn(a, b=2):
47
+ return a+b
48
+ ```
49
+
50
+ #### 场景1: 并发执行
51
+
52
+ ```python
53
+ import ygo
54
+ import ylog
55
+ from a.b.c import test_fn
56
+
57
+ with ygo.pool(job_name="test parallel", show_progress=True) as go:
58
+ for i in range(10):
59
+ go.submit(test_fn)(a=i, b=2*i)
60
+ for res in go.do():
61
+ ylog.info(res)
62
+ ```
63
+
64
+ #### ✅ `ygo.pool` 支持的参数
65
+
66
+ | 参数名 | 类型 | 描述 |
67
+ | ------------- | ---- | ------------------------------------------------------------ |
68
+ | n_jobs | int | 并行任务数(<=1 表示串行) |
69
+ | show_progress | bool | 是否显示进度条 |
70
+ | backend | str | 执行后端(默认 'threading',可选 'multiprocessing' 或 'loky') |
71
+
72
+ #### 场景2: 延迟调用
73
+
74
+ ```
75
+ >>> fn = delay(test_fn)(a=1, b=2)
76
+ >>> fn()
77
+ 3
78
+ >>> # 逐步传递参数
79
+ >>> fn1 = delay(lambda a, b, c: a+b+c)(a=1)
80
+ >>> fn2 = delay(fn1)(b=2)
81
+ >>> fn2(c=3)
82
+ 6
83
+ >>> # 参数更改
84
+ >>> fn1 = delay(lambda a, b, c: a+b+c)(a=1, b=2)
85
+ >>> fn2 = delay(fn1)(c=3, b=5)
86
+ >>> fn2()
87
+ 9
88
+ ```
89
+
90
+ #### 场景3: 获取目标函数信息
91
+
92
+ ```
93
+ >>> ygo.fn_info(test_fn)
94
+ =============================================================
95
+ a.b.c.test_fn(a, b=2)
96
+ =============================================================
97
+ def test_fn(a, b=2):
98
+ return a+b
99
+ ```
100
+
101
+ #### 🔍 其他函数信息工具
102
+
103
+ | 方法名 | 描述 |
104
+ | ------------------------- | ---------------------------------------- |
105
+ | `fn_params(fn)` | 获取函数实参 |
106
+ | `fn_signature_params(fn)` | 获取函数定义的所有参数名 |
107
+ | `fn_code(fn)` | 获取函数源码字符串 |
108
+ | `fn_path(fn)` | 获取函数所属模块路径 |
109
+ | `fn_from_str(s)` | 根据字符串导入函数(如 "a.b.c.test_fn") |
110
+ | `module_from_str(s)` | 根据字符串导入模块 |
111
+
112
+ #### 场景4: 通过字符串解析函数并执行
113
+
114
+ ```
115
+ >>> ygo.fn_from_str("a.b.c.test_fn")(a=1, b=5)
116
+ 6
117
+ ```
118
+
119
+ ### 📝 日志记录(ylog)
120
+
121
+ ```python
122
+ import ylog
123
+
124
+ ylog.info("这是一个信息日志")
125
+ ylog.warning("这是一个警告日志")
126
+ ylog.error("这是一个错误日志", exc_info=True)
127
+
128
+
129
+
130
+ # 为不同的模块使用不同的logger
131
+ logger_app1 = ylog.get_logger("app1", )
132
+ logger_app2 = ylog.get_logger("app2", )
133
+ ```
134
+
135
+ #### 🔧 配置管理:`update_config`
136
+
137
+ 你可以通过 update_config 方法动态修改日志配置,例如设置日志级别、格式、是否启用颜色等。
138
+
139
+ ```python
140
+ # 开启调试模式
141
+ ylog.update_config(debug_mode=True)
142
+ ```
143
+
144
+ #### 🧩 获取独立的 Logger 实例:`get_logger`
145
+
146
+ 在大型项目中,你可能希望为不同模块或组件创建独立的 logger 实例以区分日志来源。
147
+
148
+ ```python
149
+ logger1 = ylog.get_logger("moduleA")
150
+ logger2 = ylog.get_logger("moduleB")
151
+
152
+ logger1.info("这是来自 moduleA 的日志")
153
+ logger2.warning("这是来自 moduleB 的警告")
154
+ ```
155
+
156
+ #### 📌 使用建议
157
+
158
+ - 生产环境建议关闭 `debug_mode`,避免产生过多调试日志。
159
+ - 对于复杂项目,推荐使用 `get_logger` 创建命名 logger,便于日志分类与分析。
160
+ - 使用 `exc_info=True` 参数时,可自动打印异常堆栈信息,适用于错误捕获场景。
@@ -0,0 +1,12 @@
1
+ ygo/__init__.py,sha256=AlvzcS4Ge94nklq5AhPhCNCIg5D1F8gaZqhdzQpoXH4,679
2
+ ygo/delay.py,sha256=66xtPXqyD630FL7LWL5qJKAIZvyGDwZyM4qPfk8Czlg,2206
3
+ ygo/exceptions.py,sha256=0OYDYt_9KKo8mF2XBG5QkCMr3-ASp69VDSPOEwlIsrI,660
4
+ ygo/pool.py,sha256=bnHm4TtnRoFBv5UvV7WpuObJoK4FdoRf65mvf82yEyI,7052
5
+ ygo/utils.py,sha256=c-g4fJgeZp8diinkJhX4DAJBZEhH2tHYniUzRlt1EgU,3178
6
+ ygo-1.1.0.dist-info/licenses/LICENSE,sha256=6AKUWQ1xe-jwPSFv_H6FMQLNNWb7AYqzuEUTwlP2S8M,1067
7
+ ylog/__init__.py,sha256=aNrUp1n3JJFMUt1JFEsq33bckIhSQwCiTQCmV9rOMYk,452
8
+ ylog/core.py,sha256=d6QCFRDTvlyxgvS6JphUGOgX5Mgx9qPv9wB3g-4YOJw,9225
9
+ ygo-1.1.0.dist-info/METADATA,sha256=Eelk_nhMZZ6nwqCtTtmLJabPuLB3d0PacwbS3rP8uDE,4651
10
+ ygo-1.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
11
+ ygo-1.1.0.dist-info/top_level.txt,sha256=sY7lJBJ2ncfEMAxoNBVay0RVUixpVt9Osuwwy0_uWqU,9
12
+ ygo-1.1.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,3 +1,2 @@
1
- ycat
2
1
  ygo
3
2
  ylog
ylog/__init__.py CHANGED
@@ -9,6 +9,8 @@ Created on 2025/5/14 15:37
9
9
 
10
10
  from .core import trace, debug, info, warning, error, critical, update_config, get_logger
11
11
 
12
+ __version__ = "v0.0.1"
13
+
12
14
  __all__ = [
13
15
  "trace",
14
16
  "debug",
ycat/__init__.py DELETED
@@ -1,33 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- ---------------------------------------------
4
- Created on 2025/5/14 18:29
5
- @author: ZhangYundi
6
- @email: yundi.xxii@outlook.com
7
- ---------------------------------------------
8
- """
9
-
10
- from .client import (
11
- HOME,
12
- CATDB,
13
- get_settings,
14
- sql,
15
- put,
16
- tb_path,
17
- read_ck,
18
- read_mysql,
19
- )
20
- from .qdf import from_polars
21
- from .provider import Provider
22
-
23
- __all__ = [
24
- "HOME",
25
- "CATDB",
26
- "get_settings",
27
- "sql",
28
- "put",
29
- "tb_path",
30
- "read_ck",
31
- "read_mysql",
32
- "Provider",
33
- ]
ycat/client.py DELETED
@@ -1,172 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- ---------------------------------------------
4
- Created on 2024/7/1 09:44
5
- @author: ZhangYundi
6
- @email: yundi.xxii@outlook.com
7
- ---------------------------------------------
8
- """
9
- import os
10
- import re
11
- import urllib
12
- from pathlib import Path
13
-
14
- import clickhouse_df
15
- import polars as pl
16
- from dynaconf import Dynaconf
17
-
18
- import ylog
19
- from .parse import extract_table_names_from_sql
20
-
21
- # 配置文件在 “~/.catdb/setting.toml”
22
- USERHOME = os.path.expanduser('~') # 用户家目录
23
- NAME = "catdb"
24
- CONFIG_PATH = os.path.join(USERHOME, f".{NAME}", "settings.toml")
25
- if not os.path.exists(CONFIG_PATH):
26
- try:
27
- os.makedirs(os.path.dirname(CONFIG_PATH))
28
- except FileExistsError as e:
29
- ...
30
- except Exception as e:
31
- ylog.error(f"配置文件生成失败: {e}")
32
- catdb_path = os.path.join(USERHOME, NAME)
33
- template_content = f"""[paths]
34
- {NAME}="{catdb_path}" # 本地数据库,默认家目录
35
-
36
- ## 数据库配置:
37
- [database]
38
- [database.ck]
39
- # urls=["<host1>:<port1>", "<host2>:<port2>",]
40
- # user="xxx"
41
- # password="xxxxxx"
42
- [database.jy]
43
- # url="<host>:<port>"
44
- # user="xxxx"
45
- # password="xxxxxx"
46
-
47
- ## 视情况自由增加其他配置
48
- """
49
- with open(CONFIG_PATH, "w") as f:
50
- f.write(template_content)
51
- ylog.info(f"生成配置文件: {CONFIG_PATH}")
52
-
53
-
54
- def get_settings():
55
- try:
56
- return Dynaconf(settings_files=[CONFIG_PATH])
57
- except Exception as e:
58
- ylog.error(f"读取配置文件失败: {e}")
59
- return {}
60
-
61
-
62
- HOME = USERHOME
63
- CATDB = os.path.join(HOME, NAME)
64
- # 读取配置文件覆盖
65
- SETTINGS = get_settings()
66
- if SETTINGS is not None:
67
- CATDB = SETTINGS["PATHS"][NAME]
68
- if not CATDB.endswith(NAME):
69
- CATDB = os.path.join(CATDB, NAME)
70
-
71
-
72
- # ======================== 本地数据库 catdb ========================
73
- def tb_path(tb_name: str) -> Path:
74
- """
75
- 返回指定表名 完整的本地路径
76
- Parameters
77
- ----------
78
- tb_name: str
79
- 表名,路径写法: a/b/c
80
- Returns
81
- -------
82
- full_abs_path: pathlib.Path
83
- 完整的本地绝对路径 $HOME/catdb/a/b/c
84
- """
85
- return Path(CATDB, tb_name)
86
-
87
-
88
- def put(df: pl.DataFrame, tb_name: str, partitions: list[str] | None = None, abs_path: bool = False):
89
- if not abs_path:
90
- tbpath = tb_path(tb_name)
91
- else:
92
- tbpath = tb_name
93
- if not tbpath.exists():
94
- os.makedirs(tbpath, exist_ok=True)
95
- if partitions is not None:
96
- df.write_parquet(tbpath, partition_by=partitions)
97
- else:
98
- df.write_parquet(tbpath / "data.parquet")
99
-
100
-
101
- def sql(query: str, abs_path: bool = False, lazy: bool = True):
102
- tbs = extract_table_names_from_sql(query)
103
- convertor = dict()
104
- for tb in tbs:
105
- if not abs_path:
106
- db_path = tb_path(tb)
107
- else:
108
- db_path = tb
109
- format_tb = f"read_parquet('{db_path}/**/*.parquet')"
110
- convertor[tb] = format_tb
111
- pattern = re.compile("|".join(re.escape(k) for k in convertor.keys()))
112
- new_query = pattern.sub(lambda m: convertor[m.group(0)], query)
113
- if not lazy:
114
- return pl.sql(new_query).collect()
115
- return pl.sql(new_query)
116
-
117
-
118
- def read_mysql(query: str, db_conf: str = "database.mysql") -> pl.DataFrame:
119
- """
120
- 读取 mysql 返回 polars.DataFrame
121
- :param query:
122
- :param db_conf: .catdb/settings.toml 中的 database 配置
123
- :return: polars.DataFrame
124
- """
125
- try:
126
- db_setting = get_settings().get(db_conf, {})
127
- if not isinstance(db_setting, dict):
128
- raise ValueError(f"Database configuration '{db_conf}' is not a dictionary.")
129
-
130
- required_keys = ['user', 'password', 'url']
131
- missing_keys = [key for key in required_keys if key not in db_setting]
132
- if missing_keys:
133
- raise KeyError(f"Missing required keys in database config: {missing_keys}")
134
-
135
- user = urllib.parse.quote_plus(db_setting['user'])
136
- password = urllib.parse.quote_plus(db_setting['password'])
137
- uri = f"mysql://{user}:{password}@{db_setting['url']}"
138
- return pl.read_database_uri(query, uri)
139
-
140
- except KeyError as e:
141
- raise RuntimeError("Database configuration error: missing required fields.") from e
142
- except Exception as e:
143
- raise RuntimeError(f"Failed to execute MySQL query: {e}") from e
144
-
145
-
146
- def read_ck(query: str, db_conf: str = "database.ck") -> pl.DataFrame:
147
- """
148
- 读取 clickhouse 集群 返回 polars.DataFrame
149
- :param query:
150
- :param db_conf: .catdb/settings.toml 中的 database 配置
151
- :return: polars.DataFrame
152
- """
153
- try:
154
- db_setting = get_settings().get(db_conf, {})
155
- if not isinstance(db_setting, dict):
156
- raise ValueError(f"Database configuration '{db_conf}' is not a dictionary.")
157
-
158
- required_keys = ['user', 'password', 'urls']
159
- missing_keys = [key for key in required_keys if key not in db_setting]
160
- if missing_keys:
161
- raise KeyError(f"Missing required keys in database config: {missing_keys}")
162
-
163
- user = urllib.parse.quote_plus(db_setting['user'])
164
- password = urllib.parse.quote_plus(db_setting['password'])
165
-
166
- with clickhouse_df.connect(db_setting['urls'], user=user, password=password):
167
- return clickhouse_df.to_polars(query)
168
-
169
- except KeyError as e:
170
- raise RuntimeError("Database configuration error: missing required fields.") from e
171
- except Exception as e:
172
- raise RuntimeError(f"Failed to execute ClickHouse query: {e}") from e
ycat/parse.py DELETED
@@ -1,64 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- ---------------------------------------------
4
- Created on 2024/11/6 下午7:25
5
- @author: ZhangYundi
6
- @email: yundi.xxii@outlook.com
7
- ---------------------------------------------
8
- """
9
- import sqlparse
10
- import re
11
-
12
- def format_sql(sql_content):
13
- """将sql语句进行规范化,并去除sql中的注释,输入和输出均为字符串"""
14
- parse_str = sqlparse.format(sql_content, reindent=True, strip_comments=True)
15
- return parse_str
16
-
17
- def extract_temp_tables(with_clause):
18
- """从WITH子句中提取临时表名,输出为列表"""
19
- temp_tables = re.findall(r'\b(\w+)\s*as\s*\(', with_clause, re.IGNORECASE)
20
- return temp_tables
21
-
22
- def extract_table_names_from_sql(sql_query):
23
- """从sql中提取对应的表名称,输出为列表"""
24
- table_names = set()
25
- # 解析SQL语句
26
- parsed = sqlparse.parse(sql_query)
27
- # 正则表达式模式,用于匹配表名
28
- table_name_pattern = r'\bFROM\s+([^\s\(\)\,]+)|\bJOIN\s+([^\s\(\)\,]+)'
29
-
30
- # 用于存储WITH子句中的临时表名
31
- remove_with_name = []
32
-
33
- # 遍历解析后的语句块
34
- for statement in parsed:
35
- # 转换为字符串
36
- statement_str = str(statement)# .lower()
37
-
38
- # 将字符串中的特殊语法置空
39
- statement_str = re.sub(r'(substring|extract)\s*\(((.|\s)*?)\)', '', statement_str)
40
-
41
- # 查找匹配的表名
42
- matches = re.findall(table_name_pattern, statement_str, re.IGNORECASE)
43
-
44
- for match in matches:
45
- # 提取非空的表名部分
46
- for name in match:
47
- if name:
48
- # 对于可能包含命名空间的情况,只保留最后一部分作为表名
49
- table_name = name.split('.')[-1]
50
- # 去除表名中的特殊符号
51
- table_name = re.sub(r'("|`|\'|;)', '', table_name)
52
- table_names.add(table_name)
53
-
54
- # 处理特殊的WITH语句
55
- if 'with' in statement_str:
56
- remove_with_name = extract_temp_tables(statement_str)
57
- # 移除多余的表名
58
- if remove_with_name:
59
- table_names = list(set(table_names) - set(remove_with_name))
60
-
61
- return table_names
62
-
63
-
64
-
ycat/provider.py DELETED
@@ -1,101 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- ---------------------------------------------
4
- Created on 2025/5/23 01:34
5
- @author: ZhangYundi
6
- @email: yundi.xxii@outlook.com
7
- ---------------------------------------------
8
- """
9
- import os
10
- from datetime import datetime, timedelta
11
- from pathlib import Path
12
-
13
- import ygo
14
- import ylog
15
- from .client import CATDB
16
-
17
- DATE_FORMAT = "%Y-%m-%d"
18
-
19
-
20
- class Provider:
21
- """
22
- 数据更新器
23
- 路径:{ycat.CATDB}/provider/{name}
24
- """
25
-
26
- def __init__(self, name: str, update_time="16:30"):
27
- """
28
- 数据更新器
29
- :param name: 数据更新器名称
30
- :param update_time: 数据更新时间,默认16:30
31
- """
32
- self.name = name
33
- self._tb_path = Path(CATDB) / name
34
- os.makedirs(self._tb_path, exist_ok=True)
35
- self._update_time = update_time
36
- self.present = datetime.now().today()
37
-
38
- if self.present.strftime("%H:%M") >= self._update_time:
39
- self.last_date = self.present.strftime(DATE_FORMAT)
40
- else:
41
- self.last_date = (self.present - timedelta(days=1)).strftime(DATE_FORMAT)
42
-
43
- self._tasks = list()
44
- self._last_run_file = self._tb_path / f".last_run"
45
- self.logger = ylog.get_logger("provider")
46
-
47
- @property
48
- def last_update_date(self):
49
- return self._read_last_run_date()
50
-
51
- def _read_last_run_date(self):
52
- if self._last_run_file.exists():
53
- with open(self._last_run_file, "r") as f:
54
- return f.read().strip()
55
- return
56
-
57
- def _write_last_run_date(self, date_str: str):
58
- with open(self._last_run_file, "w") as f:
59
- f.write(date_str)
60
-
61
- def wrap_fn(self, task_name: str, update_fn: callable):
62
- """包装函数,添加异常处理"""
63
- try:
64
- update_fn()
65
- return 0
66
- except Exception as e:
67
- self.logger.error(ygo.FailTaskError(task_name=task_name, error=e))
68
- return 1
69
-
70
- def add_task(self, task_name: str, update_fn: callable):
71
- """添加任务"""
72
- self._tasks.append((task_name, ygo.delay(self.wrap_fn)(task_name=task_name, update_fn=update_fn)))
73
-
74
- def do(self,
75
- overwrite: bool = False,
76
- n_jobs: int = 10,
77
- backend: str = "threading"):
78
- """
79
- 执行任务
80
- :param overwrite: 是否覆盖现有数据
81
- :param n_jobs: 并发数
82
- :param backend: loky/threading/multiprocessing
83
- :return:
84
- """
85
- if not overwrite:
86
- local_last_date = self._read_last_run_date()
87
- if local_last_date is not None:
88
- if local_last_date >= self.last_date:
89
- self.logger.info(f"[{self.name}] 已是最新数据,跳过更新")
90
- return
91
- self.logger.info(f"[{self.name}] 更新数据")
92
- failed_num = 0
93
- with ygo.pool(n_jobs=n_jobs, backend=backend) as go:
94
- for task_name, task in self._tasks:
95
- go.submit(task, job_name=task_name)()
96
- for status in go.do():
97
- failed_num += status
98
- if failed_num < 1:
99
- self._write_last_run_date(self.last_date)
100
- self.logger.info(f"[{self.name}] 更新成功,最新数据日期:{self.last_date}")
101
- self.logger.info(f"[{self.name}] 更新完成,失败任务数:{str(failed_num).zfill(2)}/{str(len(self._tasks)).zfill(2)}")