PyPI - tablemaster - Versions diffs - 2.0.0__py3-none-any.whl - Mend

tablemaster 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

tablemaster/__init__.py +28 -0
tablemaster/__main__.py +3 -0
tablemaster/cli.py +97 -0
tablemaster/config.py +107 -0
tablemaster/database.py +286 -0
tablemaster/feishu.py +502 -0
tablemaster/gspread.py +130 -0
tablemaster/local.py +90 -0
tablemaster/sync.py +139 -0
tablemaster/utils.py +19 -0
tablemaster-2.0.0.dist-info/METADATA +243 -0
tablemaster-2.0.0.dist-info/RECORD +16 -0
tablemaster-2.0.0.dist-info/WHEEL +5 -0
tablemaster-2.0.0.dist-info/entry_points.txt +2 -0
tablemaster-2.0.0.dist-info/licenses/LICENSE +201 -0
tablemaster-2.0.0.dist-info/top_level.txt +1 -0

tablemaster/local.py ADDED Viewed

@@ -0,0 +1,90 @@
+import pandas as pd
+import pathlib
+from pathlib import Path
+import logging
+logger = logging.getLogger(__name__)
+def detect_header_read_csv(path, det_rows=10):
+    df = pd.read_csv(path)
+    l_unname = len([x for x in df.columns if 'Unnamed' in x])
+    if l_unname>1:
+        for i in range(det_rows):
+            df = pd.read_csv(path, header=i+1)
+            if(len([x for x in df.columns if 'Unnamed' in x])==0):
+                break
+    return df
+def detect_header_read_excel(path, det_rows=10):
+    df = pd.read_excel(path)
+    l_unname = len([x for x in df.columns if 'Unnamed' in x])
+    if l_unname>1:
+        for i in range(det_rows):
+            df = pd.read_excel(path, header=i+1)
+            if(len([x for x in df.columns if 'Unnamed' in x])==0):
+                break
+    return df
+def equal_table(df1, df2, det_col='nan'):
+    if(len(df1) != len(df2)):
+        return False
+    elif df1.equals(df2):
+        return True
+    else:
+        if det_col == 'nan':
+            return False
+        else:
+            return all(df1[det_col].fillna("").sort_values().reset_index(drop=True).fillna(0) == df2[det_col].fillna("").sort_values().reset_index(drop=True))
+def read(file, det_header=True):
+    if isinstance(file, pathlib.PosixPath):
+        file = str(file)
+    file_detect = list(Path().glob(file))
+    file_detect = [i for i in file_detect if (str(i)[0]!="." or str(i)[:3]=="../")]
+    if len(file_detect)>1:
+        raise ValueError(f'There are more than 1 files detected, please specify file name: {file_detect}')
+    if len(file_detect) == 0:
+        raise FileNotFoundError(f'No file matched: {file}')
+    file_path = file_detect[0]
+    if file_path.suffix[:3] == '.xl':
+        if det_header == True:
+            return detect_header_read_excel(file_path)
+        return pd.read_excel(file_path)
+    if file_path.suffix[:4] == '.csv':
+        if det_header == True:
+            return detect_header_read_csv(file_path)
+        return pd.read_csv(file_path)
+    raise Exception(f'unsupported file type: {file_path.suffix}')
+def batch_read(file, det_col='nan'):
+    path_list = list(Path().glob(file))
+    logger.info('below %s files found: %s', len(path_list), path_list)
+    dataframes = []
+    for i, file in enumerate(path_list):
+        df = read(file)
+        dataframes.append(df)
+    unique_dataframes = []
+    for df in dataframes:
+        if not any(equal_table(df, existing_df, det_col) for existing_df in unique_dataframes):
+            unique_dataframes.append(df)
+    logger.info('%s unique files found', len(unique_dataframes))
+    return pd.concat(unique_dataframes).reset_index(drop=True)
+def read_dfs(file, det_col='nan'):
+    path_list = list(Path().glob(file))
+    logger.info('below %s files found: %s', len(path_list), path_list)
+    dataframes = []
+    for i, file in enumerate(path_list):
+        df = read(file)
+        dataframes.append(df)
+    unique_dataframes = []
+    for df in dataframes:
+        if not any(equal_table(df, existing_df, det_col) for existing_df in unique_dataframes):
+            unique_dataframes.append(df)
+    logger.info('%s unique files found', len(unique_dataframes))
+    return unique_dataframes

tablemaster/sync.py ADDED Viewed

@@ -0,0 +1,139 @@
+import logging
+import re
+from collections import OrderedDict
+import pandas as pd
+from .database import ManageTable, query
+from .feishu import fs_read_df, fs_write_df
+logger = logging.getLogger(__name__)
+def _safe_identifier(identifier):
+    if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', identifier):
+        raise ValueError(f'Invalid identifier: {identifier}')
+    return identifier
+def _is_blank(value):
+    if pd.isna(value):
+        return True
+    if isinstance(value, str) and value.strip() == '':
+        return True
+    return False
+def _coerce_key(df, key):
+    copied = df.copy()
+    copied[key] = copied[key].astype(str)
+    copied = copied[copied[key].str.strip() != '']
+    copied = copied.drop_duplicates(subset=[key], keep='last')
+    return copied
+def _auto_feishu_cfg():
+    from . import load_cfg
+    cfg = load_cfg()
+    matches = []
+    for val in vars(cfg).values():
+        if hasattr(val, 'feishu_app_id') and hasattr(val, 'feishu_app_secret'):
+            matches.append(val)
+    if len(matches) == 1:
+        return matches[0]
+    if len(matches) == 0:
+        raise ValueError('Feishu config is required. Pass it in endpoint tuple or keep one feishu config in cfg.')
+    raise ValueError('Multiple feishu configs found. Please pass feishu config in endpoint tuple.')
+def _read_endpoint(endpoint):
+    if not isinstance(endpoint, tuple) or len(endpoint) < 2:
+        raise ValueError('endpoint must be tuple like ("feishu", sheet, cfg?) or ("db", cfg, table)')
+    kind = endpoint[0]
+    if kind == 'feishu':
+        sheet = endpoint[1]
+        feishu_cfg = endpoint[2] if len(endpoint) >= 3 else _auto_feishu_cfg()
+        return fs_read_df(sheet, feishu_cfg), {'kind': kind, 'sheet': sheet, 'feishu_cfg': feishu_cfg}
+    if kind == 'db':
+        if len(endpoint) < 3:
+            raise ValueError('db endpoint requires ("db", db_cfg, table)')
+        db_cfg = endpoint[1]
+        table = _safe_identifier(endpoint[2])
+        df = query(f'SELECT * FROM {table}', db_cfg)
+        return df, {'kind': kind, 'db_cfg': db_cfg, 'table': table}
+    raise ValueError(f'Unsupported endpoint kind: {kind}')
+def _write_endpoint(endpoint_state, df, key, on_conflict):
+    kind = endpoint_state['kind']
+    if kind == 'feishu':
+        fs_write_df(endpoint_state['sheet'], df, endpoint_state['feishu_cfg'], clear_sheet=True)
+        return
+    if kind == 'db':
+        tb = ManageTable(endpoint_state['table'], endpoint_state['db_cfg'])
+        tb.upsert_data(df, ignore=False, key=key)
+        return
+    raise ValueError(f'Unsupported endpoint kind: {kind}')
+def _merge_bidirectional(source_df, target_df, key, on_conflict):
+    if on_conflict != 'upsert':
+        raise ValueError('on_conflict currently only supports "upsert"')
+    if key not in source_df.columns:
+        raise ValueError(f'key "{key}" not found in source columns')
+    if key not in target_df.columns:
+        raise ValueError(f'key "{key}" not found in target columns')
+    left = _coerce_key(source_df, key)
+    right = _coerce_key(target_df, key)
+    ordered_cols = OrderedDict()
+    for col in left.columns:
+        ordered_cols[col] = True
+    for col in right.columns:
+        ordered_cols[col] = True
+    left_map = left.set_index(key).to_dict(orient='index')
+    right_map = right.set_index(key).to_dict(orient='index')
+    all_keys = list(OrderedDict.fromkeys(list(left_map.keys()) + list(right_map.keys())))
+    merged_rows = []
+    for k in all_keys:
+        src_row = left_map.get(k, {})
+        tgt_row = right_map.get(k, {})
+        row = {}
+        for col in ordered_cols.keys():
+            if col == key:
+                row[col] = k
+                continue
+            src_val = src_row.get(col)
+            tgt_val = tgt_row.get(col)
+            if not _is_blank(src_val):
+                row[col] = src_val
+            elif not _is_blank(tgt_val):
+                row[col] = tgt_val
+            else:
+                row[col] = None
+        merged_rows.append(row)
+    merged_df = pd.DataFrame(merged_rows)
+    if key in merged_df.columns:
+        merged_df = merged_df[[key] + [c for c in merged_df.columns if c != key]]
+    return merged_df
+def sync(source, target, on_conflict='upsert', key='id'):
+    source_df, source_state = _read_endpoint(source)
+    target_df, target_state = _read_endpoint(target)
+    merged_df = _merge_bidirectional(source_df, target_df, key=key, on_conflict=on_conflict)
+    _write_endpoint(source_state, merged_df, key=key, on_conflict=on_conflict)
+    _write_endpoint(target_state, merged_df, key=key, on_conflict=on_conflict)
+    logger.info('sync completed, merged rows: %s', len(merged_df))
+    return merged_df

tablemaster/utils.py ADDED Viewed

@@ -0,0 +1,19 @@
+from datetime import datetime
+from datetime import timedelta
+from dateutil.relativedelta import relativedelta
+def gen_month_list(month_start, month_end):
+    l = []
+    while datetime.strptime(month_start, '%Y-%m') <= datetime.strptime(month_end, '%Y-%m'):
+        l.append(month_start)
+        month_start = datetime.strftime(datetime.strptime(month_start, '%Y-%m')+ relativedelta(months=1), '%Y-%m')
+    return l
+def gen_day_list(day_start, day_end='now'):
+    if day_end == 'now':
+        day_end = datetime.strftime(datetime.now(), '%Y-%m-%d')
+    l = []
+    while datetime.strptime(day_start, '%Y-%m-%d') <= datetime.strptime(day_end, '%Y-%m-%d'):
+        l.append(day_start)
+        day_start = datetime.strftime(datetime.strptime(day_start, '%Y-%m-%d')+ relativedelta(days=1), '%Y-%m-%d')
+    return l

tablemaster-2.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,243 @@
+Metadata-Version: 2.4
+Name: tablemaster
+Version: 2.0.0
+Summary: tablemaster is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API.
+Author-email: Livid <livid.su@gmail.com>
+Project-URL: Homepage, https://github.com/ilivid/tablemaster
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: pandas<3,>=1.5
+Requires-Dist: pyyaml>=6
+Requires-Dist: python-dateutil>=2.8
+Requires-Dist: tqdm>=4.60
+Requires-Dist: typer<1,>=0.12
+Provides-Extra: mysql
+Requires-Dist: PyMySQL>=1.1; extra == "mysql"
+Requires-Dist: SQLAlchemy>=2.0; extra == "mysql"
+Provides-Extra: feishu
+Requires-Dist: requests>=2.28; extra == "feishu"
+Provides-Extra: gspread
+Requires-Dist: gspread>=6; extra == "gspread"
+Provides-Extra: local
+Requires-Dist: openpyxl>=3.1; extra == "local"
+Requires-Dist: pyarrow>=14; extra == "local"
+Provides-Extra: all
+Requires-Dist: tablemaster[feishu,gspread,local,mysql]; extra == "all"
+Dynamic: license-file
+# tablemaster
+`tablemaster` is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API.
+## Why tablemaster
+- Unified DataFrame-first API across multiple data backends
+- Production-friendly DB helpers (query, execute, chunked upload, upsert)
+- Built-in Feishu and Google Sheets connectors
+- Local CSV/Excel ingestion utilities
+- Declarative two-way sync between Feishu Sheet and database table
+- Configuration-first design for reproducible automation
+## Installation
+Install core package:
+```bash
+pip install -U tablemaster
+```
+Install backend-specific extras as needed:
+```bash
+pip install -U "tablemaster[mysql]"    # MySQL/TiDB database support
+pip install -U "tablemaster[feishu]"   # Feishu/Lark connectors
+pip install -U "tablemaster[gspread]"  # Google Sheets connectors
+pip install -U "tablemaster[local]"    # Local CSV/Excel helpers
+pip install -U "tablemaster[all]"      # Everything above
+```
+## Configuration
+Load configuration with:
+```python
+import tablemaster as tm
+cfg = tm.load_cfg()
+```
+Load config from another path:
+```python
+import os
+import tablemaster as tm
+cfg = tm.load_cfg(path="C:/configs/tablemaster/prod.yaml")
+cfg = tm.load_cfg(path="C:/configs/tablemaster")
+os.environ["TM_CFG_PATH"] = "D:/ops/tablemaster/cfg.yaml"
+cfg = tm.load_cfg()
+```
+`load_cfg()` resolves config file in this order:
+1. Explicit `path` argument
+2. `TM_CFG_PATH` environment variable
+3. `./cfg.yaml`
+4. `~/.tablemaster/cfg.yaml`
+Example `cfg.yaml`:
+```yaml
+mydb:
+  host: 10.0.0.1
+  user: admin
+  password: secret
+  database: bake_prod
+  port: 3306
+  db_type: mysql
+db_tidb:
+  host: sh.internal
+  user: reader
+  password: xxx
+  database: analytics
+  db_type: tidb
+  use_ssl: true
+  ssl_ca: /path/to/ca.pem
+feishu_prod:
+  feishu_app_id: cli_xxx
+  feishu_app_secret: yyy
+gsheet:
+  service_account_path: /absolute/path/to/service_account.json
+```
+For Google Sheets authentication setup, see:
+<https://docs.gspread.org/en/latest/oauth2.html>
+## Quick Start
+### Query and execute SQL
+```python
+import tablemaster as tm
+cfg = tm.load_cfg()
+df = tm.query("SELECT * FROM orders LIMIT 20", cfg.mydb)
+tm.opt("ALTER TABLE orders RENAME COLUMN old_col TO new_col", cfg.mydb)
+```
+### Manage database tables
+```python
+import tablemaster as tm
+cfg = tm.load_cfg()
+tb = tm.ManageTable("orders", cfg.mydb)
+tb.upload_data(df, add_date=True)
+tb.upsert_data(df, key="order_id")
+tb.par_del("order_date > '2023-01-01'")
+```
+### Google Sheets
+```python
+import tablemaster as tm
+cfg = tm.load_cfg()
+sheet = ("spreadsheet_id_or_name", "worksheet_name")
+df = tm.gs_read_df(sheet, cfg.gsheet)
+tm.gs_write_df(sheet, df, cfg.gsheet)
+```
+### Feishu / Lark
+```python
+import tablemaster as tm
+cfg = tm.load_cfg()
+feishu_sheet = ("spreadsheet_token", "sheet_id")
+feishu_base = ("app_token", "table_id")
+sheet_df = tm.fs_read_df(feishu_sheet, cfg.feishu_prod)
+base_df = tm.fs_read_base(feishu_base, cfg.feishu_prod)
+tm.fs_write_df(feishu_sheet, sheet_df, cfg.feishu_prod, loc="A1", clear_sheet=False)
+tm.fs_write_base(feishu_base, base_df, cfg.feishu_prod, clear_table=False)
+```
+### Local files
+```python
+import tablemaster as tm
+single_df = tm.read("*orders_2026*")
+merged_df = tm.batch_read("*orders_2026*")
+df_list = tm.read_dfs("*orders_2026*")
+```
+### Declarative two-way sync
+```python
+import tablemaster as tm
+cfg = tm.load_cfg()
+feishu_sheet = ("spreadsheet_token", "sheet_id")
+merged = tm.sync(
+    source=("feishu", feishu_sheet, cfg.feishu_prod),
+    target=("db", cfg.mydb, "orders"),
+    on_conflict="upsert",
+    key="order_id",
+)
+```
+## CLI
+`tablemaster` now ships with a built-in CLI:
+```bash
+tablemaster --help
+python -m tablemaster --help
+```
+Commands:
+```bash
+tablemaster version-info
+tablemaster config list --cfg-path ./cfg.yaml
+tablemaster config show mydb --cfg-path ./cfg.yaml
+tablemaster db query "SELECT * FROM orders LIMIT 20" --cfg-key mydb --cfg-path ./cfg.yaml
+tablemaster db query "SELECT * FROM orders" --cfg-key mydb --output ./out/orders.csv
+tablemaster local read "*orders_2026*" --limit 10
+tablemaster local read "*orders_2026*" --no-det-header
+```
+CLI command groups:
+- `version-info`: Print installed package version.
+- <br />
+- `config show <cfg_key>`: Print one config entry as JSON.
+- `db query <sql>`: Run SQL with `--cfg-key`; use `--limit` to control stdout preview and `--output` to export full result as CSV.
+- `local read <pattern>`: Read one local CSV/Excel match and print preview; use `--det-header/--no-det-header` to control header detection.
+- `config list`: List top-level keys from config.
+`--cfg-path` accepts either a config file path or a directory containing `cfg.yaml`.
+## Public API
+- Database: `query`, `opt`, `ManageTable`
+- Feishu/Lark: `fs_read_df`, `fs_write_df`, `fs_read_base`, `fs_write_base`
+- Google Sheets: `gs_read_df`, `gs_write_df`
+- Local files: `read`, `batch_read`, `read_dfs`
+- Sync: `sync`
+- Config: `load_cfg`
+## Notes
+- Python 3.9+ is required.
+- CLI entrypoint is `tablemaster`; use `tablemaster --help` for command details.
+- `tm.cfg` and `read_cfg()` are backward-compatible but deprecated in favor of `load_cfg()`.
+- PostgreSQL upsert is supported by code path; install PostgreSQL driver dependencies separately when needed.

tablemaster-2.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,16 @@
+tablemaster/__init__.py,sha256=Op9bcDT0JbSWfurVwaECmWa1ZuZYY93w957lSuwO26Q,1074
+tablemaster/__main__.py,sha256=wcCrL4PjG51r5wVKqJhcoJPTLfHW0wNbD31DrUN0MWI,28
+tablemaster/cli.py,sha256=1JWDlVuYE-NOqdgtVHMqn6rZBBDGx3kTszLglzypF54,3220
+tablemaster/config.py,sha256=AfEG48FKgGklSdVMIQny5XufJu6niPT6901sq_TcLcM,3082
+tablemaster/database.py,sha256=cK7qrhXXKwMlCLohD7ATexIl-hBH2BNUsyrJwZ-7q2Q,12232
+tablemaster/feishu.py,sha256=R_cG0xISB0lqrJ56nNguGnmOHsybAkxQ66JD7DMmqsA,19171
+tablemaster/gspread.py,sha256=QaqAXxcjtVr4qV82GCrA_gEsPT7ZjaI7TNeKgrvpsm8,4754
+tablemaster/local.py,sha256=6m2MszuKODHgQmwIhNrSxNfRhZ3waK-PDgG9QBxBolk,3278
+tablemaster/sync.py,sha256=TMG5aXSM0_xWIhNa11yANwwmVQkMsDbqiNQX1iDu53o,4686
+tablemaster/utils.py,sha256=2Em7AdXr0jidjbKw40YVuRBhrqFpXLDNu5Ea8RJ0tFc,828
+tablemaster-2.0.0.dist-info/licenses/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+tablemaster-2.0.0.dist-info/METADATA,sha256=nnJEHSphXoByTOaWTi67qej3sDY7QKJnrjruTcFN_K4,6604
+tablemaster-2.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
+tablemaster-2.0.0.dist-info/entry_points.txt,sha256=vFCzNfeETaMezJ21oy5KCcayqjNLa0HvuPftCNEz0IE,52
+tablemaster-2.0.0.dist-info/top_level.txt,sha256=_dNmxs-Udm2KKCZpPylx2KwWus-euGnVw_3A13Ewe4o,12
+tablemaster-2.0.0.dist-info/RECORD,,

tablemaster-2.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (82.0.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

tablemaster-2.0.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ tablemaster = tablemaster.cli:app