tablemaster 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tablemaster/local.py ADDED
@@ -0,0 +1,90 @@
1
+ import pandas as pd
2
+ import pathlib
3
+ from pathlib import Path
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ def detect_header_read_csv(path, det_rows=10):
9
+ df = pd.read_csv(path)
10
+ l_unname = len([x for x in df.columns if 'Unnamed' in x])
11
+ if l_unname>1:
12
+ for i in range(det_rows):
13
+ df = pd.read_csv(path, header=i+1)
14
+ if(len([x for x in df.columns if 'Unnamed' in x])==0):
15
+ break
16
+ return df
17
+
18
+ def detect_header_read_excel(path, det_rows=10):
19
+ df = pd.read_excel(path)
20
+ l_unname = len([x for x in df.columns if 'Unnamed' in x])
21
+ if l_unname>1:
22
+ for i in range(det_rows):
23
+ df = pd.read_excel(path, header=i+1)
24
+ if(len([x for x in df.columns if 'Unnamed' in x])==0):
25
+ break
26
+ return df
27
+
28
+ def equal_table(df1, df2, det_col='nan'):
29
+ if(len(df1) != len(df2)):
30
+ return False
31
+ elif df1.equals(df2):
32
+ return True
33
+ else:
34
+ if det_col == 'nan':
35
+ return False
36
+ else:
37
+ return all(df1[det_col].fillna("").sort_values().reset_index(drop=True).fillna(0) == df2[det_col].fillna("").sort_values().reset_index(drop=True))
38
+
39
+ def read(file, det_header=True):
40
+ if isinstance(file, pathlib.PosixPath):
41
+ file = str(file)
42
+ file_detect = list(Path().glob(file))
43
+ file_detect = [i for i in file_detect if (str(i)[0]!="." or str(i)[:3]=="../")]
44
+ if len(file_detect)>1:
45
+ raise ValueError(f'There are more than 1 files detected, please specify file name: {file_detect}')
46
+ if len(file_detect) == 0:
47
+ raise FileNotFoundError(f'No file matched: {file}')
48
+
49
+ file_path = file_detect[0]
50
+ if file_path.suffix[:3] == '.xl':
51
+ if det_header == True:
52
+ return detect_header_read_excel(file_path)
53
+ return pd.read_excel(file_path)
54
+
55
+ if file_path.suffix[:4] == '.csv':
56
+ if det_header == True:
57
+ return detect_header_read_csv(file_path)
58
+ return pd.read_csv(file_path)
59
+
60
+ raise Exception(f'unsupported file type: {file_path.suffix}')
61
+
62
+ def batch_read(file, det_col='nan'):
63
+ path_list = list(Path().glob(file))
64
+ logger.info('below %s files found: %s', len(path_list), path_list)
65
+ dataframes = []
66
+ for i, file in enumerate(path_list):
67
+ df = read(file)
68
+ dataframes.append(df)
69
+
70
+ unique_dataframes = []
71
+ for df in dataframes:
72
+ if not any(equal_table(df, existing_df, det_col) for existing_df in unique_dataframes):
73
+ unique_dataframes.append(df)
74
+ logger.info('%s unique files found', len(unique_dataframes))
75
+ return pd.concat(unique_dataframes).reset_index(drop=True)
76
+
77
+
78
+ def read_dfs(file, det_col='nan'):
79
+ path_list = list(Path().glob(file))
80
+ logger.info('below %s files found: %s', len(path_list), path_list)
81
+ dataframes = []
82
+ for i, file in enumerate(path_list):
83
+ df = read(file)
84
+ dataframes.append(df)
85
+ unique_dataframes = []
86
+ for df in dataframes:
87
+ if not any(equal_table(df, existing_df, det_col) for existing_df in unique_dataframes):
88
+ unique_dataframes.append(df)
89
+ logger.info('%s unique files found', len(unique_dataframes))
90
+ return unique_dataframes
tablemaster/sync.py ADDED
@@ -0,0 +1,139 @@
1
+ import logging
2
+ import re
3
+ from collections import OrderedDict
4
+
5
+ import pandas as pd
6
+
7
+ from .database import ManageTable, query
8
+ from .feishu import fs_read_df, fs_write_df
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def _safe_identifier(identifier):
14
+ if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', identifier):
15
+ raise ValueError(f'Invalid identifier: {identifier}')
16
+ return identifier
17
+
18
+
19
+ def _is_blank(value):
20
+ if pd.isna(value):
21
+ return True
22
+ if isinstance(value, str) and value.strip() == '':
23
+ return True
24
+ return False
25
+
26
+
27
+ def _coerce_key(df, key):
28
+ copied = df.copy()
29
+ copied[key] = copied[key].astype(str)
30
+ copied = copied[copied[key].str.strip() != '']
31
+ copied = copied.drop_duplicates(subset=[key], keep='last')
32
+ return copied
33
+
34
+
35
+ def _auto_feishu_cfg():
36
+ from . import load_cfg
37
+
38
+ cfg = load_cfg()
39
+ matches = []
40
+ for val in vars(cfg).values():
41
+ if hasattr(val, 'feishu_app_id') and hasattr(val, 'feishu_app_secret'):
42
+ matches.append(val)
43
+
44
+ if len(matches) == 1:
45
+ return matches[0]
46
+ if len(matches) == 0:
47
+ raise ValueError('Feishu config is required. Pass it in endpoint tuple or keep one feishu config in cfg.')
48
+ raise ValueError('Multiple feishu configs found. Please pass feishu config in endpoint tuple.')
49
+
50
+
51
+ def _read_endpoint(endpoint):
52
+ if not isinstance(endpoint, tuple) or len(endpoint) < 2:
53
+ raise ValueError('endpoint must be tuple like ("feishu", sheet, cfg?) or ("db", cfg, table)')
54
+
55
+ kind = endpoint[0]
56
+ if kind == 'feishu':
57
+ sheet = endpoint[1]
58
+ feishu_cfg = endpoint[2] if len(endpoint) >= 3 else _auto_feishu_cfg()
59
+ return fs_read_df(sheet, feishu_cfg), {'kind': kind, 'sheet': sheet, 'feishu_cfg': feishu_cfg}
60
+
61
+ if kind == 'db':
62
+ if len(endpoint) < 3:
63
+ raise ValueError('db endpoint requires ("db", db_cfg, table)')
64
+ db_cfg = endpoint[1]
65
+ table = _safe_identifier(endpoint[2])
66
+ df = query(f'SELECT * FROM {table}', db_cfg)
67
+ return df, {'kind': kind, 'db_cfg': db_cfg, 'table': table}
68
+
69
+ raise ValueError(f'Unsupported endpoint kind: {kind}')
70
+
71
+
72
+ def _write_endpoint(endpoint_state, df, key, on_conflict):
73
+ kind = endpoint_state['kind']
74
+ if kind == 'feishu':
75
+ fs_write_df(endpoint_state['sheet'], df, endpoint_state['feishu_cfg'], clear_sheet=True)
76
+ return
77
+
78
+ if kind == 'db':
79
+ tb = ManageTable(endpoint_state['table'], endpoint_state['db_cfg'])
80
+ tb.upsert_data(df, ignore=False, key=key)
81
+ return
82
+
83
+ raise ValueError(f'Unsupported endpoint kind: {kind}')
84
+
85
+
86
+ def _merge_bidirectional(source_df, target_df, key, on_conflict):
87
+ if on_conflict != 'upsert':
88
+ raise ValueError('on_conflict currently only supports "upsert"')
89
+ if key not in source_df.columns:
90
+ raise ValueError(f'key "{key}" not found in source columns')
91
+ if key not in target_df.columns:
92
+ raise ValueError(f'key "{key}" not found in target columns')
93
+
94
+ left = _coerce_key(source_df, key)
95
+ right = _coerce_key(target_df, key)
96
+
97
+ ordered_cols = OrderedDict()
98
+ for col in left.columns:
99
+ ordered_cols[col] = True
100
+ for col in right.columns:
101
+ ordered_cols[col] = True
102
+
103
+ left_map = left.set_index(key).to_dict(orient='index')
104
+ right_map = right.set_index(key).to_dict(orient='index')
105
+ all_keys = list(OrderedDict.fromkeys(list(left_map.keys()) + list(right_map.keys())))
106
+
107
+ merged_rows = []
108
+ for k in all_keys:
109
+ src_row = left_map.get(k, {})
110
+ tgt_row = right_map.get(k, {})
111
+ row = {}
112
+ for col in ordered_cols.keys():
113
+ if col == key:
114
+ row[col] = k
115
+ continue
116
+ src_val = src_row.get(col)
117
+ tgt_val = tgt_row.get(col)
118
+ if not _is_blank(src_val):
119
+ row[col] = src_val
120
+ elif not _is_blank(tgt_val):
121
+ row[col] = tgt_val
122
+ else:
123
+ row[col] = None
124
+ merged_rows.append(row)
125
+
126
+ merged_df = pd.DataFrame(merged_rows)
127
+ if key in merged_df.columns:
128
+ merged_df = merged_df[[key] + [c for c in merged_df.columns if c != key]]
129
+ return merged_df
130
+
131
+
132
+ def sync(source, target, on_conflict='upsert', key='id'):
133
+ source_df, source_state = _read_endpoint(source)
134
+ target_df, target_state = _read_endpoint(target)
135
+ merged_df = _merge_bidirectional(source_df, target_df, key=key, on_conflict=on_conflict)
136
+ _write_endpoint(source_state, merged_df, key=key, on_conflict=on_conflict)
137
+ _write_endpoint(target_state, merged_df, key=key, on_conflict=on_conflict)
138
+ logger.info('sync completed, merged rows: %s', len(merged_df))
139
+ return merged_df
tablemaster/utils.py ADDED
@@ -0,0 +1,19 @@
1
+ from datetime import datetime
2
+ from datetime import timedelta
3
+ from dateutil.relativedelta import relativedelta
4
+
5
+ def gen_month_list(month_start, month_end):
6
+ l = []
7
+ while datetime.strptime(month_start, '%Y-%m') <= datetime.strptime(month_end, '%Y-%m'):
8
+ l.append(month_start)
9
+ month_start = datetime.strftime(datetime.strptime(month_start, '%Y-%m')+ relativedelta(months=1), '%Y-%m')
10
+ return l
11
+
12
+ def gen_day_list(day_start, day_end='now'):
13
+ if day_end == 'now':
14
+ day_end = datetime.strftime(datetime.now(), '%Y-%m-%d')
15
+ l = []
16
+ while datetime.strptime(day_start, '%Y-%m-%d') <= datetime.strptime(day_end, '%Y-%m-%d'):
17
+ l.append(day_start)
18
+ day_start = datetime.strftime(datetime.strptime(day_start, '%Y-%m-%d')+ relativedelta(days=1), '%Y-%m-%d')
19
+ return l
@@ -0,0 +1,243 @@
1
+ Metadata-Version: 2.4
2
+ Name: tablemaster
3
+ Version: 2.0.0
4
+ Summary: tablemaster is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API.
5
+ Author-email: Livid <livid.su@gmail.com>
6
+ Project-URL: Homepage, https://github.com/ilivid/tablemaster
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: pandas<3,>=1.5
11
+ Requires-Dist: pyyaml>=6
12
+ Requires-Dist: python-dateutil>=2.8
13
+ Requires-Dist: tqdm>=4.60
14
+ Requires-Dist: typer<1,>=0.12
15
+ Provides-Extra: mysql
16
+ Requires-Dist: PyMySQL>=1.1; extra == "mysql"
17
+ Requires-Dist: SQLAlchemy>=2.0; extra == "mysql"
18
+ Provides-Extra: feishu
19
+ Requires-Dist: requests>=2.28; extra == "feishu"
20
+ Provides-Extra: gspread
21
+ Requires-Dist: gspread>=6; extra == "gspread"
22
+ Provides-Extra: local
23
+ Requires-Dist: openpyxl>=3.1; extra == "local"
24
+ Requires-Dist: pyarrow>=14; extra == "local"
25
+ Provides-Extra: all
26
+ Requires-Dist: tablemaster[feishu,gspread,local,mysql]; extra == "all"
27
+ Dynamic: license-file
28
+
29
+ # tablemaster
30
+
31
+ `tablemaster` is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API.
32
+
33
+ ## Why tablemaster
34
+
35
+ - Unified DataFrame-first API across multiple data backends
36
+ - Production-friendly DB helpers (query, execute, chunked upload, upsert)
37
+ - Built-in Feishu and Google Sheets connectors
38
+ - Local CSV/Excel ingestion utilities
39
+ - Declarative two-way sync between Feishu Sheet and database table
40
+ - Configuration-first design for reproducible automation
41
+
42
+ ## Installation
43
+
44
+ Install core package:
45
+
46
+ ```bash
47
+ pip install -U tablemaster
48
+ ```
49
+
50
+ Install backend-specific extras as needed:
51
+
52
+ ```bash
53
+ pip install -U "tablemaster[mysql]" # MySQL/TiDB database support
54
+ pip install -U "tablemaster[feishu]" # Feishu/Lark connectors
55
+ pip install -U "tablemaster[gspread]" # Google Sheets connectors
56
+ pip install -U "tablemaster[local]" # Local CSV/Excel helpers
57
+ pip install -U "tablemaster[all]" # Everything above
58
+ ```
59
+
60
+ ## Configuration
61
+
62
+ Load configuration with:
63
+
64
+ ```python
65
+ import tablemaster as tm
66
+
67
+ cfg = tm.load_cfg()
68
+ ```
69
+
70
+ Load config from another path:
71
+
72
+ ```python
73
+ import os
74
+ import tablemaster as tm
75
+
76
+ cfg = tm.load_cfg(path="C:/configs/tablemaster/prod.yaml")
77
+ cfg = tm.load_cfg(path="C:/configs/tablemaster")
78
+
79
+ os.environ["TM_CFG_PATH"] = "D:/ops/tablemaster/cfg.yaml"
80
+ cfg = tm.load_cfg()
81
+ ```
82
+
83
+ `load_cfg()` resolves config file in this order:
84
+
85
+ 1. Explicit `path` argument
86
+ 2. `TM_CFG_PATH` environment variable
87
+ 3. `./cfg.yaml`
88
+ 4. `~/.tablemaster/cfg.yaml`
89
+
90
+ Example `cfg.yaml`:
91
+
92
+ ```yaml
93
+ mydb:
94
+ host: 10.0.0.1
95
+ user: admin
96
+ password: secret
97
+ database: bake_prod
98
+ port: 3306
99
+ db_type: mysql
100
+
101
+ db_tidb:
102
+ host: sh.internal
103
+ user: reader
104
+ password: xxx
105
+ database: analytics
106
+ db_type: tidb
107
+ use_ssl: true
108
+ ssl_ca: /path/to/ca.pem
109
+
110
+ feishu_prod:
111
+ feishu_app_id: cli_xxx
112
+ feishu_app_secret: yyy
113
+
114
+ gsheet:
115
+ service_account_path: /absolute/path/to/service_account.json
116
+ ```
117
+
118
+ For Google Sheets authentication setup, see:
119
+ <https://docs.gspread.org/en/latest/oauth2.html>
120
+
121
+ ## Quick Start
122
+
123
+ ### Query and execute SQL
124
+
125
+ ```python
126
+ import tablemaster as tm
127
+
128
+ cfg = tm.load_cfg()
129
+ df = tm.query("SELECT * FROM orders LIMIT 20", cfg.mydb)
130
+ tm.opt("ALTER TABLE orders RENAME COLUMN old_col TO new_col", cfg.mydb)
131
+ ```
132
+
133
+ ### Manage database tables
134
+
135
+ ```python
136
+ import tablemaster as tm
137
+
138
+ cfg = tm.load_cfg()
139
+ tb = tm.ManageTable("orders", cfg.mydb)
140
+ tb.upload_data(df, add_date=True)
141
+ tb.upsert_data(df, key="order_id")
142
+ tb.par_del("order_date > '2023-01-01'")
143
+ ```
144
+
145
+ ### Google Sheets
146
+
147
+ ```python
148
+ import tablemaster as tm
149
+
150
+ cfg = tm.load_cfg()
151
+ sheet = ("spreadsheet_id_or_name", "worksheet_name")
152
+ df = tm.gs_read_df(sheet, cfg.gsheet)
153
+ tm.gs_write_df(sheet, df, cfg.gsheet)
154
+ ```
155
+
156
+ ### Feishu / Lark
157
+
158
+ ```python
159
+ import tablemaster as tm
160
+
161
+ cfg = tm.load_cfg()
162
+ feishu_sheet = ("spreadsheet_token", "sheet_id")
163
+ feishu_base = ("app_token", "table_id")
164
+
165
+ sheet_df = tm.fs_read_df(feishu_sheet, cfg.feishu_prod)
166
+ base_df = tm.fs_read_base(feishu_base, cfg.feishu_prod)
167
+ tm.fs_write_df(feishu_sheet, sheet_df, cfg.feishu_prod, loc="A1", clear_sheet=False)
168
+ tm.fs_write_base(feishu_base, base_df, cfg.feishu_prod, clear_table=False)
169
+ ```
170
+
171
+ ### Local files
172
+
173
+ ```python
174
+ import tablemaster as tm
175
+
176
+ single_df = tm.read("*orders_2026*")
177
+ merged_df = tm.batch_read("*orders_2026*")
178
+ df_list = tm.read_dfs("*orders_2026*")
179
+ ```
180
+
181
+ ### Declarative two-way sync
182
+
183
+ ```python
184
+ import tablemaster as tm
185
+
186
+ cfg = tm.load_cfg()
187
+ feishu_sheet = ("spreadsheet_token", "sheet_id")
188
+
189
+ merged = tm.sync(
190
+ source=("feishu", feishu_sheet, cfg.feishu_prod),
191
+ target=("db", cfg.mydb, "orders"),
192
+ on_conflict="upsert",
193
+ key="order_id",
194
+ )
195
+ ```
196
+
197
+ ## CLI
198
+
199
+ `tablemaster` now ships with a built-in CLI:
200
+
201
+ ```bash
202
+ tablemaster --help
203
+ python -m tablemaster --help
204
+ ```
205
+
206
+ Commands:
207
+
208
+ ```bash
209
+ tablemaster version-info
210
+ tablemaster config list --cfg-path ./cfg.yaml
211
+ tablemaster config show mydb --cfg-path ./cfg.yaml
212
+ tablemaster db query "SELECT * FROM orders LIMIT 20" --cfg-key mydb --cfg-path ./cfg.yaml
213
+ tablemaster db query "SELECT * FROM orders" --cfg-key mydb --output ./out/orders.csv
214
+ tablemaster local read "*orders_2026*" --limit 10
215
+ tablemaster local read "*orders_2026*" --no-det-header
216
+ ```
217
+
218
+ CLI command groups:
219
+
220
+ - `version-info`: Print installed package version.
221
+ - <br />
222
+ - `config show <cfg_key>`: Print one config entry as JSON.
223
+ - `db query <sql>`: Run SQL with `--cfg-key`; use `--limit` to control stdout preview and `--output` to export full result as CSV.
224
+ - `local read <pattern>`: Read one local CSV/Excel match and print preview; use `--det-header/--no-det-header` to control header detection.
225
+ - `config list`: List top-level keys from config.
226
+
227
+ `--cfg-path` accepts either a config file path or a directory containing `cfg.yaml`.
228
+
229
+ ## Public API
230
+
231
+ - Database: `query`, `opt`, `ManageTable`
232
+ - Feishu/Lark: `fs_read_df`, `fs_write_df`, `fs_read_base`, `fs_write_base`
233
+ - Google Sheets: `gs_read_df`, `gs_write_df`
234
+ - Local files: `read`, `batch_read`, `read_dfs`
235
+ - Sync: `sync`
236
+ - Config: `load_cfg`
237
+
238
+ ## Notes
239
+
240
+ - Python 3.9+ is required.
241
+ - CLI entrypoint is `tablemaster`; use `tablemaster --help` for command details.
242
+ - `tm.cfg` and `read_cfg()` are backward-compatible but deprecated in favor of `load_cfg()`.
243
+ - PostgreSQL upsert is supported by code path; install PostgreSQL driver dependencies separately when needed.
@@ -0,0 +1,16 @@
1
+ tablemaster/__init__.py,sha256=Op9bcDT0JbSWfurVwaECmWa1ZuZYY93w957lSuwO26Q,1074
2
+ tablemaster/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
3
+ tablemaster/cli.py,sha256=1JWDlVuYE-NOqdgtVHMqn6rZBBDGx3kTszLglzypF54,3220
4
+ tablemaster/config.py,sha256=AfEG48FKgGklSdVMIQny5XufJu6niPT6901sq_TcLcM,3082
5
+ tablemaster/database.py,sha256=cK7qrhXXKwMlCLohD7ATexIl-hBH2BNUsyrJwZ-7q2Q,12232
6
+ tablemaster/feishu.py,sha256=R_cG0xISB0lqrJ56nNguGnmOHsybAkxQ66JD7DMmqsA,19171
7
+ tablemaster/gspread.py,sha256=QaqAXxcjtVr4qV82GCrA_gEsPT7ZjaI7TNeKgrvpsm8,4754
8
+ tablemaster/local.py,sha256=6m2MszuKODHgQmwIhNrSxNfRhZ3waK-PDgG9QBxBolk,3278
9
+ tablemaster/sync.py,sha256=TMG5aXSM0_xWIhNa11yANwwmVQkMsDbqiNQX1iDu53o,4686
10
+ tablemaster/utils.py,sha256=2Em7AdXr0jidjbKw40YVuRBhrqFpXLDNu5Ea8RJ0tFc,828
11
+ tablemaster-2.0.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
12
+ tablemaster-2.0.0.dist-info/METADATA,sha256=nnJEHSphXoByTOaWTi67qej3sDY7QKJnrjruTcFN_K4,6604
13
+ tablemaster-2.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
14
+ tablemaster-2.0.0.dist-info/entry_points.txt,sha256=vFCzNfeETaMezJ21oy5KCcayqjNLa0HvuPftCNEz0IE,52
15
+ tablemaster-2.0.0.dist-info/top_level.txt,sha256=_dNmxs-Udm2KKCZpPylx2KwWus-euGnVw_3A13Ewe4o,12
16
+ tablemaster-2.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ tablemaster = tablemaster.cli:app