PyPI - tablemaster - Versions diffs - 2.1.0__tar.gz → 2.1.2__tar.gz - Mend

tablemaster 2.1.0tar.gz → 2.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{tablemaster-2.1.0 → tablemaster-2.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tablemaster
-Version: 2.1.0
+Version: 2.1.2
 Summary: tablemaster is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API.
 Author-email: Livid <livid.su@gmail.com>
 Project-URL: Homepage, https://github.com/ilivid/tablemaster

{tablemaster-2.1.0 → tablemaster-2.1.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "tablemaster"
-version = "2.1.0"
+version = "2.1.2"
 description = "tablemaster is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API."
 readme = "README.md"
 requires-python = ">=3.9"

tablemaster-2.1.2/tablemaster/database.py ADDED Viewed

@@ -0,0 +1,473 @@
+import json
+import logging
+import re
+import warnings
+from typing import Union, List, Tuple, Dict, Any, Optional
+from functools import lru_cache
+from sqlalchemy import create_engine, inspect, pool, text
+from sqlalchemy.engine import Engine
+import pandas as pd
+from datetime import datetime
+from tqdm import tqdm
+from urllib.parse import quote_plus
+logger = logging.getLogger(__name__)
+def get_connect_args(configs: Any) -> Dict[str, Any]:
+    """
+    Get database connection arguments, supporting SSL and other common configurations.
+    Args:
+        configs (Any): Configuration object that may contain use_ssl, ssl_ca, connect_args, db_type.
+    Returns:
+        Dict[str, Any]: A dictionary of connection arguments.
+    """
+    connect_args: Dict[str, Any] = {}
+    if hasattr(configs, 'connect_args') and configs.connect_args:
+        connect_args = configs.connect_args.copy()
+    else:
+        use_ssl: bool = getattr(configs, 'use_ssl', False)
+        db_type: str = getattr(configs, 'db_type', 'mysql').lower()
+        if db_type == 'tidb' or use_ssl:
+            ssl_ca: str = getattr(configs, 'ssl_ca', '/etc/ssl/cert.pem')
+            connect_args = {
+                'ssl': {
+                    'ca': ssl_ca,
+                    'check_hostname': False,
+                    'verify_identity': False
+                }
+            }
+    return connect_args
+def _build_conn_str(configs: Any) -> str:
+    """
+    Build the SQLAlchemy connection string based on configuration.
+    Args:
+        configs (Any): Configuration object containing host, port, user, password, database, etc.
+    Returns:
+        str: The SQLAlchemy connection string.
+    """
+    db_type: str = getattr(configs, 'db_type', 'mysql').lower()
+    password_encoded: str = quote_plus(configs.password)
+    match db_type:
+        case 'mysql' | 'tidb':
+            cf_port: int = getattr(configs, 'port', 3306)
+            return f'mysql+pymysql://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
+        case 'postgresql':
+            cf_port: int = getattr(configs, 'port', 5432)
+            return f'postgresql+psycopg2://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
+        case _:
+            raise ValueError(f'Unsupported db_type: {configs.db_type}')
+@lru_cache(maxsize=16)
+def _get_engine(conn_str: str, connect_args_json: str = '{}', autocommit: bool = False) -> Engine:
+    """
+    Get or create a cached SQLAlchemy Engine instance.
+    Args:
+        conn_str (str): The database connection string.
+        connect_args_json (str, optional): JSON string representation of connection arguments. Defaults to '{}'.
+        autocommit (bool, optional): Whether the engine should be in autocommit mode. Defaults to False.
+    Returns:
+        Engine: The created SQLAlchemy Engine instance.
+    """
+    connect_args: Dict[str, Any] = json.loads(connect_args_json) if connect_args_json else {}
+    engine_kwargs: Dict[str, Any] = {
+        'connect_args': connect_args,
+        'poolclass': pool.QueuePool,
+        'pool_size': 5,
+        'max_overflow': 10,
+        'pool_pre_ping': True,
+    }
+    if autocommit:
+        engine_kwargs['isolation_level'] = 'AUTOCOMMIT'
+    return create_engine(conn_str, **engine_kwargs)
+def _resolve_engine(configs: Any, autocommit: bool = False) -> Engine:
+    """
+    Resolve and return an Engine based on configuration.
+    Args:
+        configs (Any): Configuration object.
+        autocommit (bool, optional): Whether to use autocommit mode. Defaults to False.
+    Returns:
+        Engine: The SQLAlchemy Engine instance.
+    """
+    connection_string: str = _build_conn_str(configs)
+    connect_args: Dict[str, Any] = get_connect_args(configs)
+    connect_args_json: str = json.dumps(connect_args, sort_keys=True, default=str)
+    return _get_engine(connection_string, connect_args_json, autocommit)
+def _safe_identifier(identifier: str) -> str:
+    """
+    Ensure an identifier is safe from SQL injection.
+    Args:
+        identifier (str): The SQL identifier to validate.
+    Returns:
+        str: The safe identifier.
+    Raises:
+        ValueError: If the identifier contains invalid characters.
+    """
+    if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', identifier):
+        raise ValueError(f'Invalid identifier: {identifier}')
+    return identifier
+def _safe_mysql_type(data_type: str) -> str:
+    """
+    Ensure a MySQL data type expression is safe from SQL injection.
+    Args:
+        data_type (str): The MySQL data type to validate.
+    Returns:
+        str: The safe data type string.
+    Raises:
+        ValueError: If the data type expression contains invalid characters.
+    """
+    normalized: str = data_type.strip()
+    if not re.match(r'^[A-Za-z0-9_,()\s]+$', normalized):
+        raise ValueError(f'Invalid data type expression: {data_type}')
+    return normalized
+def query(sql: Union[str, text], configs: Any, params: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
+    """
+    Execute a query and return results as a pandas DataFrame.
+    Args:
+        sql (Union[str, text]): The SQL query to execute.
+        configs (Any): Configuration object.
+        params (Optional[Dict[str, Any]], optional): Query parameters. Defaults to None.
+    Returns:
+        pd.DataFrame: Query results.
+    """
+    logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
+    engine: Engine = _resolve_engine(configs, autocommit=False)
+    with engine.connect() as conn:
+        statement = text(sql) if isinstance(sql, str) else sql
+        df: pd.DataFrame = pd.read_sql(statement, conn, params=params)
+    logger.debug('query preview: %s', df.head())
+    return df
+def opt(sql: Union[str, text], configs: Any, params: Optional[Dict[str, Any]] = None) -> None:
+    """
+    Execute a SQL statement that modifies the database (e.g., INSERT, UPDATE, DELETE).
+    Args:
+        sql (Union[str, text]): The SQL statement to execute.
+        configs (Any): Configuration object.
+        params (Optional[Dict[str, Any]], optional): Query parameters. Defaults to None.
+    """
+    logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
+    engine: Engine = _resolve_engine(configs, autocommit=True)
+    with engine.connect() as conn:
+        statement = text(sql) if isinstance(sql, str) else sql
+        conn.execute(statement, params or {})
+    logger.info('database execute success')
+class ManageTable:
+    """
+    A class to manage a specific database table's operations.
+    """
+    def __init__(self, table: str, configs: Any, verify: bool = False) -> None:
+        """
+        Initialize a ManageTable instance.
+        Args:
+            table (str): The name of the table.
+            configs (Any): Configuration object for the database.
+            verify (bool, optional): Whether to verify if the table exists upon initialization. Defaults to False.
+        """
+        self.port: int = getattr(configs, 'port', 3306)
+        self.table: str = table
+        self.name: str = configs.name
+        self.user: str = configs.user
+        self.password: str = configs.password
+        self.host: str = configs.host
+        self.database: str = configs.database
+        self.configs: Any = configs
+        if verify:
+            self._check_exists()
+    def _check_exists(self) -> None:
+        """
+        Check if the table exists and raise an error if not.
+        Raises:
+            ValueError: If the table does not exist.
+        """
+        if not self.exists():
+            raise ValueError(f'table not found: {self.table}')
+        logger.info('table exists: %s', self.table)
+    def exists(self) -> bool:
+        """
+        Check if the table exists in the database.
+        Returns:
+            bool: True if table exists, False otherwise.
+        """
+        safe_table: str = _safe_identifier(self.table)
+        try:
+            engine: Engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
+            inspector = inspect(engine)
+            return inspector.has_table(safe_table)
+        except Exception as e:
+            logger.exception('failed to check if table exists: %s', e)
+            raise
+    def delete_table(self) -> None:
+        """
+        Drop the table from the database.
+        """
+        safe_table: str = _safe_identifier(self.table)
+        try:
+            opt(text(f'DROP TABLE `{safe_table}`'), self)
+            logger.info('%s deleted', self.table)
+        except Exception as e:
+            logger.exception('table was not deleted: %s', e)
+            raise
+    def par_del(self, clause: str, params: Optional[Dict[str, Any]] = None) -> None:
+        """
+        Delete specific records from the table based on a WHERE clause.
+        Args:
+            clause (str): The WHERE clause conditions.
+            params (Optional[Dict[str, Any]], optional): Parameters for the WHERE clause. Defaults to None.
+        """
+        safe_table: str = _safe_identifier(self.table)
+        del_clause = text(f'DELETE FROM `{safe_table}` WHERE {clause}')
+        opt(del_clause, self, params=params)
+        logger.info('records deleted by clause: %s', clause)
+    def change_data_type(self, cols_name: str, data_type: str) -> None:
+        """
+        Change the data type of a specific column in the table.
+        Args:
+            cols_name (str): The name of the column to alter.
+            data_type (str): The new data type expression.
+        """
+        safe_table: str = _safe_identifier(self.table)
+        safe_col: str = _safe_identifier(cols_name)
+        safe_type: str = _safe_mysql_type(data_type)
+        change_clause = text(f'ALTER TABLE `{safe_table}` MODIFY COLUMN `{safe_col}` {safe_type}')
+        opt(change_clause, self)
+        logger.info('%s changed to %s successfully', cols_name, data_type)
+    def upload_data(self, df: pd.DataFrame, chunk_size: int = 10000, add_date: bool = False) -> None:
+        """
+        Upload data from a pandas DataFrame to the database table.
+        Args:
+            df (pd.DataFrame): The DataFrame containing data to upload.
+            chunk_size (int, optional): Number of rows to upload per chunk. Defaults to 10000.
+            add_date (bool, optional): Whether to append the current date to the DataFrame before uploading. Defaults to False.
+        """
+        engine: Engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
+        with engine.begin() as connection:
+            if add_date:
+                df_copy: pd.DataFrame = df.copy()
+                df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
+            else:
+                df_copy: pd.DataFrame = df
+            total_chunks: int = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
+            logger.info('try to upload data now, chunk_size is %s', chunk_size)
+            with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
+                try:
+                    for start in range(0, len(df_copy), chunk_size):
+                        end: int = min(start + chunk_size, len(df_copy))
+                        chunk: pd.DataFrame = df_copy.iloc[start:end]
+                        chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
+                        pbar.update(1)
+                except Exception as e:
+                    logger.exception('an error occurred during upload: %s', e)
+                    raise
+    def upsert_data(self, df: pd.DataFrame, chunk_size: int = 10000, add_date: bool = False, ignore: bool = False, key: Union[str, List[str], Tuple[str, ...], None] = None) -> None:
+        """
+        Upsert data from a pandas DataFrame into the database table.
+        This method will perform an "insert or update" (upsert) operation based on the target database type.
+        If the record already exists (based on the specified primary key or unique index), it updates the existing record.
+        Otherwise, it inserts a new record.
+        Args:
+            df (pd.DataFrame): The pandas DataFrame containing the data to be upserted.
+            chunk_size (int, optional): The number of rows to insert per batch. Defaults to 10000.
+            add_date (bool, optional): Whether to add a 'rundate' column with the current date to the dataframe. Defaults to False.
+            ignore (bool, optional): If True, it performs an 'INSERT IGNORE' or 'ON CONFLICT DO NOTHING' operation, skipping existing records instead of updating them. Defaults to False.
+            key (Union[str, List[str], Tuple[str, ...], None], optional): The primary key or unique index column(s) used to detect conflicts.
+                                                                           Required for PostgreSQL. For MySQL/TiDB, this is used to exclude primary key columns from being updated.
+                                                                           Can be a comma-separated string or a list/tuple of strings. Defaults to None.
+        Raises:
+            ValueError: If 'key' is not provided when 'db_type' is 'postgresql', or if an unsupported 'db_type' is used.
+        """
+        engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
+        db_type: str = getattr(self.configs if hasattr(self, 'configs') else self, 'db_type', 'mysql').lower()
+        with engine.begin() as connection:
+            if add_date:
+                df_copy: pd.DataFrame = df.copy()
+                df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
+            else:
+                df_copy: pd.DataFrame = df
+            total_chunks: int = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
+            logger.info('trying to upload data now, chunk_size is %s', chunk_size)
+            with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
+                for start in range(0, len(df_copy), chunk_size):
+                    end: int = min(start + chunk_size, len(df_copy))
+                    chunk: pd.DataFrame = df_copy.iloc[start:end]
+                    columns: List[str] = chunk.columns.tolist()
+                    value_placeholders: str = ', '.join([f':{col}' for col in columns])
+                    try:
+                        if ignore == False:
+                            keys: List[str] = []
+                            if key:
+                                if isinstance(key, str):
+                                    keys = [k.strip() for k in key.split(',')]
+                                elif isinstance(key, (list, tuple)):
+                                    keys = [str(k).strip() for k in key]
+                                else:
+                                    raise ValueError('key must be a string or a list of strings')
+                            if db_type in ('mysql', 'tidb'):
+                                if keys:
+                                    update_columns = ', '.join([f"`{col}`=VALUES(`{col}`)" for col in columns if col not in keys])
+                                else:
+                                    update_columns = ', '.join([f"`{col}`=VALUES(`{col}`)" for col in columns])
+                                if update_columns:
+                                    insert_sql = f"""
+                                    INSERT INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
+                                    VALUES ({value_placeholders})
+                                    ON DUPLICATE KEY UPDATE {update_columns}
+                                    """
+                                else:
+                                    insert_sql = f"""
+                                    INSERT IGNORE INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
+                                    VALUES ({value_placeholders})
+                                    """
+                            elif db_type == 'postgresql':
+                                if not keys:
+                                    raise ValueError('key is required for postgresql upsert')
+                                safe_keys = [_safe_identifier(k) for k in keys]
+                                safe_columns = [_safe_identifier(col) for col in columns]
+                                quoted_columns = ', '.join([f'"{col}"' for col in safe_columns])
+                                update_columns = ', '.join(
+                                    [f'"{col}"=EXCLUDED."{col}"' for col in safe_columns if col not in safe_keys]
+                                )
+                                conflict_keys_str = ', '.join([f'"{k}"' for k in safe_keys])
+                                if update_columns:
+                                    insert_sql = f"""
+                                    INSERT INTO {self.table} ({quoted_columns})
+                                    VALUES ({value_placeholders})
+                                    ON CONFLICT ({conflict_keys_str}) DO UPDATE SET {update_columns}
+                                    """
+                                else:
+                                    insert_sql = f"""
+                                    INSERT INTO {self.table} ({quoted_columns})
+                                    VALUES ({value_placeholders})
+                                    ON CONFLICT ({conflict_keys_str}) DO NOTHING
+                                    """
+                            else:
+                                raise ValueError(f'Unsupported db_type for upsert: {db_type}')
+                        else:
+                            insert_sql = f"""
+                            INSERT IGNORE INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
+                            VALUES ({value_placeholders})
+                            """
+                        data = chunk.where(pd.notna(chunk), None).to_dict(orient='records')
+                        connection.execute(text(insert_sql), data)
+                        pbar.update(1)
+                    except Exception as e:
+                        logger.exception('an error occurred during upsert: %s', e)
+                        raise
+class Manage_table(ManageTable):
+    """
+    Deprecated class for managing database tables. Use ManageTable instead.
+    """
+    def __init__(self, table: str, configs: Any, verify: bool = False) -> None:
+        """
+        Initialize the Manage_table instance. Issues a deprecation warning.
+        Args:
+            table (str): The name of the table to manage.
+            configs (Any): Configuration object containing database connection details.
+            verify (bool, optional): Whether to verify the table configuration. Defaults to False.
+        """
+        warnings.warn(
+            'Manage_table is deprecated and will be removed in v2.0.0; use ManageTable instead.',
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(table, configs, verify=verify)
+    def delete_table(self) -> None:
+        """
+        Drop the table from the database.
+        """
+        super().delete_table()
+    def upload_data(self, df: pd.DataFrame, chunk_size: int = 10000, add_date: bool = True) -> None:
+        """
+        Upload data from a pandas DataFrame to the database table.
+        Args:
+            df (pd.DataFrame): The pandas DataFrame containing the data to upload.
+            chunk_size (int, optional): The number of rows to insert per batch. Defaults to 10000.
+            add_date (bool, optional): Whether to add a 'rundate' column with the current date to the dataframe. Defaults to True.
+        """
+        engine: Engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
+        with engine.begin() as connection:
+            if add_date:
+                df_copy: pd.DataFrame = df.copy()
+                df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
+            else:
+                df_copy: pd.DataFrame = df
+            total_chunks: int = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
+            logger.info('try to upload data now, chunk_size is %s', chunk_size)
+            with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
+                try:
+                    for start in range(0, len(df_copy), chunk_size):
+                        end: int = min(start + chunk_size, len(df_copy))
+                        chunk: pd.DataFrame = df_copy.iloc[start:end]
+                        chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
+                        pbar.update(1)
+                except Exception as e:
+                    logger.exception('an error occurred during upload: %s', e)
+                    raise

{tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/feishu.py RENAMED Viewed

@@ -178,9 +178,10 @@ def fs_write_df(sheet_address, df, feishu_cfg, loc='A1', clear_sheet=True):
             if clear_resp.json().get('code') == 0:
                 logger.info('sheet cleared')
             else:
-                logger.warning("failed to clear sheet: %s", clear_resp.json().get('msg'))
+                raise RuntimeError(f"failed to clear sheet: {clear_resp.json().get('msg')}")
         except Exception as e:
-            logger.warning('failed to clear sheet: %s', e)
+            logger.exception('failed to clear sheet: %s', e)
+            raise
     # 处理 DataFrame 数据类型
     df_copy = df.copy()
@@ -305,8 +306,7 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
     existing_fields = _get_bitable_fields(app_token, table_id, header)
     if not existing_fields:
-        logger.error('could not fetch table fields or table has no fields')
-        return None
+        raise ValueError('could not fetch table fields or table has no fields')
     logger.info('table has %s fields', len(existing_fields))
@@ -323,8 +323,7 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
             logger.warning('skip column: %s', field)
     if not valid_fields:
-        logger.error('no valid fields to write, all dataframe columns are missing in bitable')
-        return None
+        raise ValueError('no valid fields to write, all dataframe columns are missing in bitable')
     logger.info('will write %s valid fields', len(valid_fields))
@@ -360,8 +359,9 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
                     _request_with_retry("post", delete_url, headers=header, json_data=delete_data)
                 logger.info('deleted %s records', len(record_ids))
-        except Exception as e:
-            logger.warning('failed to clear table: %s', e)
+        except Exception as e:
+            logger.exception('failed to clear table: %s', e)
+            raise
     # 处理 DataFrame - 只保留有效字段
     df_copy = df[list(valid_fields)].copy()
@@ -444,7 +444,7 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
                     str_val = str(value)
                     if str_val and str_val != 'None' and str_val != 'nan':
                         fields[col] = str_val
-                except:
+                except Exception:
                     if col not in skipped_cols:
                         skipped_cols.add(col)
                     continue
@@ -457,6 +457,7 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
     # 批量写入（每次最多500条）
     batch_size = 500
     all_responses = []
+    failed_batches = []
     for i in range(0, len(records), batch_size):
         batch = records[i:i + batch_size]
@@ -473,9 +474,11 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
                 logger.info('batch %s wrote %s records', i // batch_size + 1, len(batch))
             else:
                 logger.error('failed to write batch: %s', response.get('msg', 'Unknown error'))
+                failed_batches.append((i // batch_size + 1, response.get('msg', 'Unknown error')))
         except Exception as e:
             logger.exception('failed to write batch: %s', e)
+            failed_batches.append((i // batch_size + 1, str(e)))
     logger.info('write summary total records: %s', len(records))
     logger.info('write summary fields written: %s', len(valid_fields))
@@ -483,6 +486,8 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
         logger.info('write summary fields skipped: %s', len(missing_fields))
         for field in sorted(missing_fields):
             logger.info('skip field: %s', field)
+    if failed_batches:
+        raise RuntimeError(f'bitable write failed for {len(failed_batches)} batch(es): {failed_batches}')
     logger.info('data is written')
     return all_responses

{tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/gspread.py RENAMED Viewed

@@ -68,14 +68,16 @@ def gs_read_df(address, cfg=None, service_account_path=None):
         return df
     except gspread.exceptions.SpreadsheetNotFound:
-        logger.error("spreadsheet '%s' not found", spreadsheet_identifier)
-        return None
+        message = f"spreadsheet '{spreadsheet_identifier}' not found"
+        logger.error(message)
+        raise ValueError(message)
     except gspread.exceptions.WorksheetNotFound:
-        logger.error("worksheet '%s' not found in spreadsheet", worksheet_name)
-        return None
+        message = f"worksheet '{worksheet_name}' not found in spreadsheet"
+        logger.error(message)
+        raise ValueError(message)
     except Exception as e:
         logger.exception('an unexpected error occurred: %s', e)
-        return None
+        raise
 def gs_write_df(address, df, cfg=None, loc='A1', service_account_path=None):
@@ -105,8 +107,9 @@ def gs_write_df(address, df, cfg=None, loc='A1', service_account_path=None):
     except gspread.exceptions.SpreadsheetNotFound:
         if is_id:
-            logger.error("spreadsheet ID '%s' not found, cannot create with specific ID", spreadsheet_identifier)
-            return
+            message = f"spreadsheet ID '{spreadsheet_identifier}' not found, cannot create with specific ID"
+            logger.error(message)
+            raise ValueError(message)
         else:
             logger.info("spreadsheet '%s' not found, creating one", spreadsheet_identifier)
             sh = gc.create(spreadsheet_identifier)
@@ -128,3 +131,4 @@ def gs_write_df(address, df, cfg=None, loc='A1', service_account_path=None):
         logger.info('data is written')
     except Exception as e:
         logger.exception('failed to update worksheet: %s', e)
+        raise

{tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/pull.py RENAMED Viewed

@@ -8,6 +8,17 @@ import yaml
 from .models import ActualTable
+class _QuotedStringDumper(yaml.SafeDumper):
+    pass
+def _quoted_string_representer(dumper, value):
+    return dumper.represent_scalar('tag:yaml.org,2002:str', value, style='"')
+_QuotedStringDumper.add_representer(str, _quoted_string_representer)
 def _table_to_payload(table: ActualTable) -> dict:
     payload: dict = {
         'table': table.table,
@@ -51,7 +62,13 @@ def write_pulled_schema(
         target = out / f'{table.table}.yaml'
         payload = _table_to_payload(table)
         with target.open('w', encoding='utf-8') as f:
-            yaml.safe_dump(payload, f, sort_keys=False, allow_unicode=True)
+            yaml.dump(
+                payload,
+                f,
+                Dumper=_QuotedStringDumper,
+                sort_keys=False,
+                allow_unicode=True,
+            )
         written.append(target)
     return written

{tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tablemaster
-Version: 2.1.0
+Version: 2.1.2
 Summary: tablemaster is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API.
 Author-email: Livid <livid.su@gmail.com>
 Project-URL: Homepage, https://github.com/ilivid/tablemaster

{tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster.egg-info/SOURCES.txt RENAMED Viewed

@@ -31,4 +31,5 @@ tablemaster/schema/dialects/base.py
 tablemaster/schema/dialects/mysql.py
 tablemaster/schema/dialects/postgresql.py
 tablemaster/schema/dialects/tidb.py
+tests/test_error_visibility.py
 tests/test_schema_core.py

tablemaster-2.1.2/tests/test_error_visibility.py ADDED Viewed

@@ -0,0 +1,54 @@
+from types import SimpleNamespace
+from unittest import TestCase
+from unittest.mock import patch
+import pandas as pd
+from tablemaster.database import ManageTable
+from tablemaster.feishu import fs_write_base
+class _DummyResponse:
+    def __init__(self, body, status_code=200):
+        self._body = body
+        self.status_code = status_code
+    def json(self):
+        return self._body
+class ErrorVisibilityTests(TestCase):
+    def setUp(self):
+        self.db_cfg = SimpleNamespace(
+            name='test_db',
+            user='u',
+            password='p',
+            host='127.0.0.1',
+            database='d',
+            db_type='mysql',
+        )
+        self.feishu_cfg = SimpleNamespace(feishu_app_id='id', feishu_app_secret='secret')
+    def test_manage_table_exists_propagates_errors(self):
+        table = ManageTable('orders', self.db_cfg)
+        with patch('tablemaster.database._resolve_engine', side_effect=RuntimeError('db unavailable')):
+            with self.assertRaises(RuntimeError):
+                table.exists()
+    def test_delete_table_propagates_errors(self):
+        table = ManageTable('orders', self.db_cfg)
+        with patch('tablemaster.database.opt', side_effect=RuntimeError('drop failed')):
+            with self.assertRaises(RuntimeError):
+                table.delete_table()
+    def test_fs_write_base_raises_when_batch_write_failed(self):
+        df = pd.DataFrame({'a': [1]})
+        with patch('tablemaster.feishu._get_tenant_access_token', return_value='token'):
+            with patch('tablemaster.feishu._get_bitable_fields', return_value={'a'}):
+                with patch(
+                    'tablemaster.feishu._request_with_retry',
+                    return_value=_DummyResponse({'code': 1001, 'msg': 'bad request'}),
+                ):
+                    with self.assertRaises(RuntimeError):
+                        fs_write_base(['app_token', 'table_id'], df, self.feishu_cfg)

{tablemaster-2.1.0 → tablemaster-2.1.2}/tests/test_schema_core.py RENAMED Viewed

@@ -102,8 +102,35 @@ class SchemaCoreTests(unittest.TestCase):
             paths = write_pulled_schema(tables, root / 'schema' / 'mydb')
             self.assertEqual(1, len(paths))
             content = paths[0].read_text(encoding='utf-8')
-            self.assertIn('table: orders', content)
-            self.assertIn('primary_key: true', content)
+            self.assertIn('"table": "orders"', content)
+            self.assertIn('"primary_key": true', content)
+    def test_pull_quotes_comment_with_colon(self):
+        with TemporaryDirectory() as td:
+            root = Path(td)
+            tables = [
+                ActualTable(
+                    table='orders',
+                    columns=[
+                        ActualColumn(
+                            name='id',
+                            type='BIGINT',
+                            nullable=False,
+                            default=None,
+                            comment='主键:业务单号',
+                            primary_key=True,
+                        )
+                    ],
+                    indexes=[],
+                    comment='订单:主表',
+                )
+            ]
+            paths = write_pulled_schema(tables, root / 'schema' / 'mydb')
+            content = paths[0].read_text(encoding='utf-8')
+            self.assertIn('"comment": "订单:主表"', content)
+            loaded = load_schema_definitions(connection='mydb', root_dir=root / 'schema')
+            self.assertEqual('订单:主表', loaded[0].comment)
+            self.assertEqual('主键:业务单号', loaded[0].columns[0].comment)
 if __name__ == '__main__':

tablemaster-2.1.0/tablemaster/database.py DELETED Viewed

@@ -1,286 +0,0 @@
-import json
-import logging
-import re
-import warnings
-from functools import lru_cache
-from sqlalchemy import create_engine, pool, text
-import pandas as pd
-from datetime import datetime
-from tqdm import tqdm
-from urllib.parse import quote_plus
-logger = logging.getLogger(__name__)
-def get_connect_args(configs):
-    """
-    获取数据库连接参数，支持SSL和其他通用配置
-    Args:
-        configs: 配置对象，可以包含以下属性:
-            - use_ssl: 是否使用SSL (bool)
-            - ssl_ca: SSL证书路径 (str)
-            - connect_args: 自定义连接参数 (dict)
-            - db_type: 数据库类型 ('tidb', 'mysql' 等)
-    Returns:
-        dict: 连接参数字典
-    """
-    connect_args = {}
-    if hasattr(configs, 'connect_args') and configs.connect_args:
-        connect_args = configs.connect_args.copy()
-    else:
-        use_ssl = getattr(configs, 'use_ssl', False)
-        db_type = getattr(configs, 'db_type', 'mysql').lower()
-        if db_type == 'tidb' or use_ssl:
-            ssl_ca = getattr(configs, 'ssl_ca', '/etc/ssl/cert.pem')
-            connect_args = {
-                'ssl': {
-                    'ca': ssl_ca,
-                    'check_hostname': False,
-                    'verify_identity': False
-                }
-            }
-    return connect_args
-def _build_conn_str(configs):
-    db_type = getattr(configs, 'db_type', 'mysql').lower()
-    password_encoded = quote_plus(configs.password)
-    match db_type:
-        case 'mysql' | 'tidb':
-            cf_port = getattr(configs, 'port', 3306)
-            return f'mysql+pymysql://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
-        case 'postgresql':
-            cf_port = getattr(configs, 'port', 5432)
-            return f'postgresql+psycopg2://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
-        case _:
-            raise ValueError(f'Unsupported db_type: {configs.db_type}')
-@lru_cache(maxsize=16)
-def _get_engine(conn_str, connect_args_json='{}', autocommit=False):
-    connect_args = json.loads(connect_args_json) if connect_args_json else {}
-    engine_kwargs = {
-        'connect_args': connect_args,
-        'poolclass': pool.QueuePool,
-        'pool_size': 5,
-        'max_overflow': 10,
-        'pool_pre_ping': True,
-    }
-    if autocommit:
-        engine_kwargs['isolation_level'] = 'AUTOCOMMIT'
-    return create_engine(conn_str, **engine_kwargs)
-def _resolve_engine(configs, autocommit=False):
-    connection_string = _build_conn_str(configs)
-    connect_args = get_connect_args(configs)
-    connect_args_json = json.dumps(connect_args, sort_keys=True, default=str)
-    return _get_engine(connection_string, connect_args_json, autocommit)
-def _safe_identifier(identifier):
-    if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', identifier):
-        raise ValueError(f'Invalid identifier: {identifier}')
-    return identifier
-def _safe_mysql_type(data_type):
-    normalized = data_type.strip()
-    if not re.match(r'^[A-Za-z0-9_,()\s]+$', normalized):
-        raise ValueError(f'Invalid data type expression: {data_type}')
-    return normalized
-def query(sql, configs, params=None):
-    logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
-    engine = _resolve_engine(configs, autocommit=False)
-    with engine.connect() as conn:
-        statement = text(sql) if isinstance(sql, str) else sql
-        df = pd.read_sql(statement, conn, params=params)
-    logger.debug('query preview: %s', df.head())
-    return df
-def opt(sql, configs, params=None):
-    logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
-    engine = _resolve_engine(configs, autocommit=True)
-    with engine.connect() as conn:
-        statement = text(sql) if isinstance(sql, str) else sql
-        conn.execute(statement, params or {})
-    logger.info('database execute success')
-class ManageTable:
-    def __init__(self, table, configs, verify=False):
-        self.port = getattr(configs, 'port', 3306)
-        self.table = table
-        self.name = configs.name
-        self.user = configs.user
-        self.password = configs.password
-        self.host = configs.host
-        self.database = configs.database
-        self.configs = configs
-        if verify:
-            self._check_exists()
-    def _check_exists(self):
-        if not self.exists():
-            raise ValueError(f'table not found: {self.table}')
-        logger.info('table exists: %s', self.table)
-    def exists(self):
-        safe_table = _safe_identifier(self.table)
-        check_sql = text(f'SELECT 1 FROM `{safe_table}` LIMIT 1')
-        try:
-            opt(check_sql, self)
-            return True
-        except Exception:
-            return False
-    def delete_table(self):
-        safe_table = _safe_identifier(self.table)
-        try:
-            opt(text(f'DROP TABLE `{safe_table}`'), self)
-            logger.info('%s deleted', self.table)
-        except Exception:
-            logger.exception('table was not deleted')
-    def par_del(self, clause, params=None):
-        safe_table = _safe_identifier(self.table)
-        del_clause = text(f'DELETE FROM `{safe_table}` WHERE {clause}')
-        opt(del_clause, self, params=params)
-        logger.info('records deleted by clause: %s', clause)
-    def change_data_type(self, cols_name, data_type):
-        safe_table = _safe_identifier(self.table)
-        safe_col = _safe_identifier(cols_name)
-        safe_type = _safe_mysql_type(data_type)
-        change_clause = text(f'ALTER TABLE `{safe_table}` MODIFY COLUMN `{safe_col}` {safe_type}')
-        opt(change_clause, self)
-        logger.info('%s changed to %s successfully', cols_name, data_type)
-    def upload_data(self, df, chunk_size=10000, add_date=False):
-        engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
-        with engine.begin() as connection:
-            if add_date:
-                df_copy = df.copy()
-                df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
-            else:
-                df_copy = df
-            total_chunks = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
-            logger.info('try to upload data now, chunk_size is %s', chunk_size)
-            with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
-                try:
-                    for start in range(0, len(df_copy), chunk_size):
-                        end = min(start + chunk_size, len(df_copy))
-                        chunk = df_copy.iloc[start:end]
-                        chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
-                        pbar.update(1)
-                except Exception as e:
-                    logger.exception('an error occurred during upload: %s', e)
-    def upsert_data(self, df, chunk_size=10000, add_date=False, ignore=False, key=None):
-        engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
-        db_type = getattr(self.configs if hasattr(self, 'configs') else self, 'db_type', 'mysql').lower()
-        with engine.begin() as connection:
-            if add_date:
-                df_copy = df.copy()
-                df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
-            else:
-                df_copy = df
-            total_chunks = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
-            logger.info('trying to upload data now, chunk_size is %s', chunk_size)
-            with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
-                for start in range(0, len(df_copy), chunk_size):
-                    end = min(start + chunk_size, len(df_copy))
-                    chunk = df_copy.iloc[start:end]
-                    columns = chunk.columns.tolist()
-                    value_placeholders = ', '.join([f':{col}' for col in columns])
-                    try:
-                        if ignore == False:
-                            if db_type in ('mysql', 'tidb'):
-                                update_columns = ', '.join([f"`{col}`=VALUES(`{col}`)" for col in columns])
-                                insert_sql = f"""
-                                INSERT INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
-                                VALUES ({value_placeholders})
-                                ON DUPLICATE KEY UPDATE {update_columns}
-                                """
-                            elif db_type == 'postgresql':
-                                if not key:
-                                    raise ValueError('key is required for postgresql upsert')
-                                safe_key = _safe_identifier(key)
-                                safe_columns = [_safe_identifier(col) for col in columns]
-                                quoted_columns = ', '.join([f'"{col}"' for col in safe_columns])
-                                update_columns = ', '.join(
-                                    [f'"{col}"=EXCLUDED."{col}"' for col in safe_columns if col != safe_key]
-                                )
-                                if update_columns:
-                                    insert_sql = f"""
-                                    INSERT INTO {self.table} ({quoted_columns})
-                                    VALUES ({value_placeholders})
-                                    ON CONFLICT ("{safe_key}") DO UPDATE SET {update_columns}
-                                    """
-                                else:
-                                    insert_sql = f"""
-                                    INSERT INTO {self.table} ({quoted_columns})
-                                    VALUES ({value_placeholders})
-                                    ON CONFLICT ("{safe_key}") DO NOTHING
-                                    """
-                            else:
-                                raise ValueError(f'Unsupported db_type for upsert: {db_type}')
-                        else:
-                            insert_sql = f"""
-                            INSERT IGNORE INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
-                            VALUES ({value_placeholders})
-                            """
-                        data = chunk.where(pd.notna(chunk), None).to_dict(orient='records')
-                        connection.execute(text(insert_sql), data)
-                        pbar.update(1)
-                    except Exception as e:
-                        logger.exception('an error occurred during upsert: %s', e)
-class Manage_table(ManageTable):
-    def __init__(self, table, configs, verify=False):
-        warnings.warn(
-            'Manage_table is deprecated and will be removed in v2.0.0; use ManageTable instead.',
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        super().__init__(table, configs, verify=verify)
-    def delete_table(self):
-        super().delete_table()
-    def upload_data(self, df, chunk_size=10000, add_date=True):
-        engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
-        with engine.begin() as connection:
-            if add_date:
-                df_copy = df.copy()
-                df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
-            else:
-                df_copy = df
-            total_chunks = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
-            logger.info('try to upload data now, chunk_size is %s', chunk_size)
-            with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
-                try:
-                    for start in range(0, len(df_copy), chunk_size):
-                        end = min(start + chunk_size, len(df_copy))
-                        chunk = df_copy.iloc[start:end]
-                        chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
-                        pbar.update(1)
-                except Exception as e:
-                    logger.exception('an error occurred during upload: %s', e)