tablemaster 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ import warnings
2
+ import importlib
3
+
4
+ from .config import load_cfg
5
+
6
+
7
+ def __getattr__(name: str):
8
+ if name == 'cfg':
9
+ warnings.warn(
10
+ 'Implicit loading via tm.cfg is deprecated and will be removed in a future release; use tm.load_cfg() and pass config objects explicitly.',
11
+ FutureWarning,
12
+ stacklevel=2,
13
+ )
14
+ return load_cfg()
15
+ symbol_module_map = {
16
+ ('query', 'opt', 'ManageTable', 'Manage_table'): 'database',
17
+ ('fs_read_df', 'fs_read_base', 'fs_write_df', 'fs_write_base'): 'feishu',
18
+ ('gs_read_df', 'gs_write_df'): 'gspread',
19
+ ('read', 'batch_read', 'read_dfs'): 'local',
20
+ ('sync',): 'sync',
21
+ ('utils',): 'utils',
22
+ ('DBConfig', 'FeishuConfig', 'GoogleConfig', 'ConfigNamespace', 'read_cfg'): 'config',
23
+ }
24
+ for names, module in symbol_module_map.items():
25
+ if name in names:
26
+ mod = importlib.import_module(f'.{module}', __name__)
27
+ return getattr(mod, name)
28
+ raise AttributeError(f"module 'tablemaster' has no attribute {name!r}")
@@ -0,0 +1,3 @@
1
+ from .cli import app
2
+
3
+ app()
tablemaster/cli.py ADDED
@@ -0,0 +1,97 @@
1
+ import json
2
+ from dataclasses import asdict, is_dataclass
3
+ from importlib.metadata import PackageNotFoundError, version
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ import typer
8
+
9
+ from .config import load_cfg
10
+
11
+ app = typer.Typer(help='CLI for tablemaster data operations.')
12
+ config_app = typer.Typer(help='Inspect config entries.')
13
+ db_app = typer.Typer(help='Run database operations.')
14
+ local_app = typer.Typer(help='Read local files.')
15
+
16
+
17
+ def _to_plain(value):
18
+ if is_dataclass(value):
19
+ return asdict(value)
20
+ if isinstance(value, dict):
21
+ return {k: _to_plain(v) for k, v in value.items()}
22
+ if isinstance(value, (list, tuple)):
23
+ return [_to_plain(v) for v in value]
24
+ if hasattr(value, '__dict__'):
25
+ return {k: _to_plain(v) for k, v in vars(value).items()}
26
+ return value
27
+
28
+
29
+ def _load_named_cfg(cfg_path: Optional[str], cfg_key: str):
30
+ cfg = load_cfg(cfg_path)
31
+ if not hasattr(cfg, cfg_key):
32
+ raise typer.BadParameter(f'Config key not found: {cfg_key}')
33
+ return getattr(cfg, cfg_key)
34
+
35
+
36
+ @app.command()
37
+ def version_info():
38
+ try:
39
+ pkg_version = version('tablemaster')
40
+ except PackageNotFoundError:
41
+ pkg_version = 'dev'
42
+ typer.echo(pkg_version)
43
+
44
+
45
+ @config_app.command('list')
46
+ def config_list(
47
+ cfg_path: Optional[str] = typer.Option(None, '--cfg-path', help='Config file path or directory.'),
48
+ ):
49
+ cfg = load_cfg(cfg_path)
50
+ keys = sorted(vars(cfg).keys())
51
+ typer.echo('\n'.join(keys))
52
+
53
+
54
+ @config_app.command('show')
55
+ def config_show(
56
+ cfg_key: str = typer.Argument(..., help='Top-level config key.'),
57
+ cfg_path: Optional[str] = typer.Option(None, '--cfg-path', help='Config file path or directory.'),
58
+ ):
59
+ entry = _load_named_cfg(cfg_path, cfg_key)
60
+ typer.echo(json.dumps(_to_plain(entry), ensure_ascii=False, indent=2))
61
+
62
+
63
+ @db_app.command('query')
64
+ def db_query(
65
+ sql: str = typer.Argument(..., help='SQL to execute.'),
66
+ cfg_key: str = typer.Option(..., '--cfg-key', help='Database config key in cfg.yaml.'),
67
+ cfg_path: Optional[str] = typer.Option(None, '--cfg-path', help='Config file path or directory.'),
68
+ output: Optional[Path] = typer.Option(None, '--output', help='Optional CSV output path.'),
69
+ limit: int = typer.Option(100, '--limit', min=1, help='Max rows to print to stdout.'),
70
+ ):
71
+ from .database import query
72
+
73
+ db_cfg = _load_named_cfg(cfg_path, cfg_key)
74
+ df = query(sql, db_cfg)
75
+ if output:
76
+ output.parent.mkdir(parents=True, exist_ok=True)
77
+ df.to_csv(output, index=False)
78
+ typer.echo(f'Wrote {len(df)} rows to {output}')
79
+ preview = df.head(limit)
80
+ typer.echo(preview.to_csv(index=False))
81
+
82
+
83
+ @local_app.command('read')
84
+ def local_read(
85
+ pattern: str = typer.Argument(..., help='Glob pattern to read, e.g. "*orders_2026*".'),
86
+ det_header: bool = typer.Option(True, '--det-header/--no-det-header', help='Enable header detection.'),
87
+ limit: int = typer.Option(20, '--limit', min=1, help='Max rows to print.'),
88
+ ):
89
+ from .local import read
90
+
91
+ df = read(pattern, det_header=det_header)
92
+ typer.echo(df.head(limit).to_csv(index=False))
93
+
94
+
95
+ app.add_typer(config_app, name='config')
96
+ app.add_typer(db_app, name='db')
97
+ app.add_typer(local_app, name='local')
tablemaster/config.py ADDED
@@ -0,0 +1,107 @@
1
+ import os
2
+ import warnings
3
+ from dataclasses import dataclass
4
+ from typing import Optional
5
+
6
+ from yaml import load
7
+
8
+ try:
9
+ from yaml import CLoader as Loader
10
+ except ImportError:
11
+ from yaml import Loader
12
+
13
+
14
+ @dataclass
15
+ class DBConfig:
16
+ host: str
17
+ user: str
18
+ password: str
19
+ database: str
20
+ name: str = ''
21
+ port: int = 3306
22
+ db_type: str = 'mysql'
23
+ use_ssl: bool = False
24
+ ssl_ca: Optional[str] = None
25
+
26
+
27
+ @dataclass
28
+ class FeishuConfig:
29
+ feishu_app_id: str
30
+ feishu_app_secret: str
31
+
32
+
33
+ @dataclass
34
+ class GoogleConfig:
35
+ service_account_path: str
36
+
37
+
38
+ class ConfigNamespace:
39
+ def __init__(self, raw: dict):
40
+ for key, val in raw.items():
41
+ setattr(self, key, _parse_entry(key, val))
42
+
43
+
44
+ def _resolve_cfg_path(path: str = None) -> str:
45
+ explicit_candidates = []
46
+ if path:
47
+ if os.path.isdir(path):
48
+ explicit_candidates.append(os.path.join(path, 'cfg.yaml'))
49
+ explicit_candidates.append(path)
50
+ for candidate in explicit_candidates:
51
+ if candidate and os.path.isfile(candidate):
52
+ return os.path.abspath(candidate)
53
+ raise FileNotFoundError(f'Config file not found: {path}')
54
+
55
+ candidates = []
56
+ env_path = os.getenv('TM_CFG_PATH')
57
+ if env_path:
58
+ if os.path.isdir(env_path):
59
+ candidates.append(os.path.join(env_path, 'cfg.yaml'))
60
+ candidates.append(env_path)
61
+ candidates.append(os.path.join(os.getcwd(), 'cfg.yaml'))
62
+ candidates.append(os.path.expanduser('~/.tablemaster/cfg.yaml'))
63
+
64
+ for candidate in candidates:
65
+ if candidate and os.path.isfile(candidate):
66
+ return os.path.abspath(candidate)
67
+ raise FileNotFoundError(
68
+ 'Config file not found. Checked: TM_CFG_PATH, ./cfg.yaml, ~/.tablemaster/cfg.yaml'
69
+ )
70
+
71
+
72
+ def _parse_entry(key: str, val):
73
+ if not isinstance(val, dict):
74
+ return val
75
+
76
+ if 'host' in val and 'database' in val:
77
+ db_kwargs = {k: v for k, v in val.items() if k in DBConfig.__dataclass_fields__}
78
+ db_kwargs['name'] = key
79
+ return DBConfig(**db_kwargs)
80
+
81
+ if 'feishu_app_id' in val and 'feishu_app_secret' in val:
82
+ fs_kwargs = {k: v for k, v in val.items() if k in FeishuConfig.__dataclass_fields__}
83
+ return FeishuConfig(**fs_kwargs)
84
+
85
+ if 'service_account_path' in val:
86
+ gs_kwargs = {k: v for k, v in val.items() if k in GoogleConfig.__dataclass_fields__}
87
+ return GoogleConfig(**gs_kwargs)
88
+
89
+ return ConfigNamespace(val)
90
+
91
+
92
+ def load_cfg(path: str = None) -> ConfigNamespace:
93
+ cfg_path = _resolve_cfg_path(path)
94
+ with open(cfg_path, 'r', encoding='utf-8') as f:
95
+ yaml_content = load(f, Loader=Loader) or {}
96
+ if not isinstance(yaml_content, dict):
97
+ raise ValueError(f'Config root must be a dict, got: {type(yaml_content).__name__}')
98
+ return ConfigNamespace(yaml_content)
99
+
100
+
101
+ def read_cfg(file_path: str):
102
+ warnings.warn(
103
+ 'read_cfg is deprecated and will be removed in a future release; use load_cfg(path) instead.',
104
+ FutureWarning,
105
+ stacklevel=2,
106
+ )
107
+ return load_cfg(file_path)
@@ -0,0 +1,286 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ import warnings
5
+ from functools import lru_cache
6
+
7
+ from sqlalchemy import create_engine, pool, text
8
+ import pandas as pd
9
+ from datetime import datetime
10
+ from tqdm import tqdm
11
+ from urllib.parse import quote_plus
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def get_connect_args(configs):
17
+ """
18
+ 获取数据库连接参数,支持SSL和其他通用配置
19
+
20
+ Args:
21
+ configs: 配置对象,可以包含以下属性:
22
+ - use_ssl: 是否使用SSL (bool)
23
+ - ssl_ca: SSL证书路径 (str)
24
+ - connect_args: 自定义连接参数 (dict)
25
+ - db_type: 数据库类型 ('tidb', 'mysql' 等)
26
+
27
+ Returns:
28
+ dict: 连接参数字典
29
+ """
30
+ connect_args = {}
31
+
32
+ if hasattr(configs, 'connect_args') and configs.connect_args:
33
+ connect_args = configs.connect_args.copy()
34
+ else:
35
+ use_ssl = getattr(configs, 'use_ssl', False)
36
+ db_type = getattr(configs, 'db_type', 'mysql').lower()
37
+
38
+ if db_type == 'tidb' or use_ssl:
39
+ ssl_ca = getattr(configs, 'ssl_ca', '/etc/ssl/cert.pem')
40
+ connect_args = {
41
+ 'ssl': {
42
+ 'ca': ssl_ca,
43
+ 'check_hostname': False,
44
+ 'verify_identity': False
45
+ }
46
+ }
47
+
48
+ return connect_args
49
+
50
+
51
+ def _build_conn_str(configs):
52
+ db_type = getattr(configs, 'db_type', 'mysql').lower()
53
+ password_encoded = quote_plus(configs.password)
54
+ match db_type:
55
+ case 'mysql' | 'tidb':
56
+ cf_port = getattr(configs, 'port', 3306)
57
+ return f'mysql+pymysql://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
58
+ case 'postgresql':
59
+ cf_port = getattr(configs, 'port', 5432)
60
+ return f'postgresql+psycopg2://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
61
+ case _:
62
+ raise ValueError(f'Unsupported db_type: {configs.db_type}')
63
+
64
+
65
+ @lru_cache(maxsize=16)
66
+ def _get_engine(conn_str, connect_args_json='{}', autocommit=False):
67
+ connect_args = json.loads(connect_args_json) if connect_args_json else {}
68
+ engine_kwargs = {
69
+ 'connect_args': connect_args,
70
+ 'poolclass': pool.QueuePool,
71
+ 'pool_size': 5,
72
+ 'max_overflow': 10,
73
+ 'pool_pre_ping': True,
74
+ }
75
+ if autocommit:
76
+ engine_kwargs['isolation_level'] = 'AUTOCOMMIT'
77
+ return create_engine(conn_str, **engine_kwargs)
78
+
79
+
80
+ def _resolve_engine(configs, autocommit=False):
81
+ connection_string = _build_conn_str(configs)
82
+ connect_args = get_connect_args(configs)
83
+ connect_args_json = json.dumps(connect_args, sort_keys=True, default=str)
84
+ return _get_engine(connection_string, connect_args_json, autocommit)
85
+
86
+
87
+ def _safe_identifier(identifier):
88
+ if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', identifier):
89
+ raise ValueError(f'Invalid identifier: {identifier}')
90
+ return identifier
91
+
92
+
93
+ def _safe_mysql_type(data_type):
94
+ normalized = data_type.strip()
95
+ if not re.match(r'^[A-Za-z0-9_,()\s]+$', normalized):
96
+ raise ValueError(f'Invalid data type expression: {data_type}')
97
+ return normalized
98
+
99
+
100
+ def query(sql, configs, params=None):
101
+ logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
102
+ engine = _resolve_engine(configs, autocommit=False)
103
+ with engine.connect() as conn:
104
+ statement = text(sql) if isinstance(sql, str) else sql
105
+ df = pd.read_sql(statement, conn, params=params)
106
+ logger.debug('query preview: %s', df.head())
107
+ return df
108
+
109
+
110
+ def opt(sql, configs, params=None):
111
+ logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
112
+ engine = _resolve_engine(configs, autocommit=True)
113
+ with engine.connect() as conn:
114
+ statement = text(sql) if isinstance(sql, str) else sql
115
+ conn.execute(statement, params or {})
116
+ logger.info('database execute success')
117
+
118
+
119
+ class ManageTable:
120
+ def __init__(self, table, configs, verify=False):
121
+ self.port = getattr(configs, 'port', 3306)
122
+ self.table = table
123
+ self.name = configs.name
124
+ self.user = configs.user
125
+ self.password = configs.password
126
+ self.host = configs.host
127
+ self.database = configs.database
128
+ self.configs = configs
129
+ if verify:
130
+ self._check_exists()
131
+
132
+ def _check_exists(self):
133
+ if not self.exists():
134
+ raise ValueError(f'table not found: {self.table}')
135
+ logger.info('table exists: %s', self.table)
136
+
137
+ def exists(self):
138
+ safe_table = _safe_identifier(self.table)
139
+ check_sql = text(f'SELECT 1 FROM `{safe_table}` LIMIT 1')
140
+ try:
141
+ opt(check_sql, self)
142
+ return True
143
+ except Exception:
144
+ return False
145
+
146
+ def delete_table(self):
147
+ safe_table = _safe_identifier(self.table)
148
+ try:
149
+ opt(text(f'DROP TABLE `{safe_table}`'), self)
150
+ logger.info('%s deleted', self.table)
151
+ except Exception:
152
+ logger.exception('table was not deleted')
153
+
154
+ def par_del(self, clause, params=None):
155
+ safe_table = _safe_identifier(self.table)
156
+ del_clause = text(f'DELETE FROM `{safe_table}` WHERE {clause}')
157
+ opt(del_clause, self, params=params)
158
+ logger.info('records deleted by clause: %s', clause)
159
+
160
+ def change_data_type(self, cols_name, data_type):
161
+ safe_table = _safe_identifier(self.table)
162
+ safe_col = _safe_identifier(cols_name)
163
+ safe_type = _safe_mysql_type(data_type)
164
+ change_clause = text(f'ALTER TABLE `{safe_table}` MODIFY COLUMN `{safe_col}` {safe_type}')
165
+ opt(change_clause, self)
166
+ logger.info('%s changed to %s successfully', cols_name, data_type)
167
+
168
+
169
+ def upload_data(self, df, chunk_size=10000, add_date=False):
170
+ engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
171
+
172
+ with engine.begin() as connection:
173
+ if add_date:
174
+ df_copy = df.copy()
175
+ df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
176
+ else:
177
+ df_copy = df
178
+ total_chunks = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
179
+ logger.info('try to upload data now, chunk_size is %s', chunk_size)
180
+ with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
181
+ try:
182
+ for start in range(0, len(df_copy), chunk_size):
183
+ end = min(start + chunk_size, len(df_copy))
184
+ chunk = df_copy.iloc[start:end]
185
+ chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
186
+ pbar.update(1)
187
+ except Exception as e:
188
+ logger.exception('an error occurred during upload: %s', e)
189
+
190
+ def upsert_data(self, df, chunk_size=10000, add_date=False, ignore=False, key=None):
191
+ engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
192
+ db_type = getattr(self.configs if hasattr(self, 'configs') else self, 'db_type', 'mysql').lower()
193
+
194
+ with engine.begin() as connection:
195
+ if add_date:
196
+ df_copy = df.copy()
197
+ df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
198
+ else:
199
+ df_copy = df
200
+
201
+ total_chunks = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
202
+ logger.info('trying to upload data now, chunk_size is %s', chunk_size)
203
+
204
+ with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
205
+ for start in range(0, len(df_copy), chunk_size):
206
+ end = min(start + chunk_size, len(df_copy))
207
+ chunk = df_copy.iloc[start:end]
208
+ columns = chunk.columns.tolist()
209
+ value_placeholders = ', '.join([f':{col}' for col in columns])
210
+
211
+ try:
212
+ if ignore == False:
213
+ if db_type in ('mysql', 'tidb'):
214
+ update_columns = ', '.join([f"`{col}`=VALUES(`{col}`)" for col in columns])
215
+ insert_sql = f"""
216
+ INSERT INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
217
+ VALUES ({value_placeholders})
218
+ ON DUPLICATE KEY UPDATE {update_columns}
219
+ """
220
+ elif db_type == 'postgresql':
221
+ if not key:
222
+ raise ValueError('key is required for postgresql upsert')
223
+ safe_key = _safe_identifier(key)
224
+ safe_columns = [_safe_identifier(col) for col in columns]
225
+ quoted_columns = ', '.join([f'"{col}"' for col in safe_columns])
226
+ update_columns = ', '.join(
227
+ [f'"{col}"=EXCLUDED."{col}"' for col in safe_columns if col != safe_key]
228
+ )
229
+ if update_columns:
230
+ insert_sql = f"""
231
+ INSERT INTO {self.table} ({quoted_columns})
232
+ VALUES ({value_placeholders})
233
+ ON CONFLICT ("{safe_key}") DO UPDATE SET {update_columns}
234
+ """
235
+ else:
236
+ insert_sql = f"""
237
+ INSERT INTO {self.table} ({quoted_columns})
238
+ VALUES ({value_placeholders})
239
+ ON CONFLICT ("{safe_key}") DO NOTHING
240
+ """
241
+ else:
242
+ raise ValueError(f'Unsupported db_type for upsert: {db_type}')
243
+ else:
244
+ insert_sql = f"""
245
+ INSERT IGNORE INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
246
+ VALUES ({value_placeholders})
247
+ """
248
+
249
+ data = chunk.where(pd.notna(chunk), None).to_dict(orient='records')
250
+ connection.execute(text(insert_sql), data)
251
+ pbar.update(1)
252
+ except Exception as e:
253
+ logger.exception('an error occurred during upsert: %s', e)
254
+
255
+ class Manage_table(ManageTable):
256
+ def __init__(self, table, configs, verify=False):
257
+ warnings.warn(
258
+ 'Manage_table is deprecated and will be removed in v2.0.0; use ManageTable instead.',
259
+ DeprecationWarning,
260
+ stacklevel=2,
261
+ )
262
+ super().__init__(table, configs, verify=verify)
263
+
264
+ def delete_table(self):
265
+ super().delete_table()
266
+
267
+ def upload_data(self, df, chunk_size=10000, add_date=True):
268
+ engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
269
+
270
+ with engine.begin() as connection:
271
+ if add_date:
272
+ df_copy = df.copy()
273
+ df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
274
+ else:
275
+ df_copy = df
276
+ total_chunks = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
277
+ logger.info('try to upload data now, chunk_size is %s', chunk_size)
278
+ with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
279
+ try:
280
+ for start in range(0, len(df_copy), chunk_size):
281
+ end = min(start + chunk_size, len(df_copy))
282
+ chunk = df_copy.iloc[start:end]
283
+ chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
284
+ pbar.update(1)
285
+ except Exception as e:
286
+ logger.exception('an error occurred during upload: %s', e)