tablemaster 2.1.0__tar.gz → 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {tablemaster-2.1.0 → tablemaster-2.1.2}/PKG-INFO +1 -1
  2. {tablemaster-2.1.0 → tablemaster-2.1.2}/pyproject.toml +1 -1
  3. tablemaster-2.1.2/tablemaster/database.py +473 -0
  4. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/feishu.py +14 -9
  5. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/gspread.py +11 -7
  6. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/pull.py +18 -1
  7. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster.egg-info/PKG-INFO +1 -1
  8. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster.egg-info/SOURCES.txt +1 -0
  9. tablemaster-2.1.2/tests/test_error_visibility.py +54 -0
  10. {tablemaster-2.1.0 → tablemaster-2.1.2}/tests/test_schema_core.py +29 -2
  11. tablemaster-2.1.0/tablemaster/database.py +0 -286
  12. {tablemaster-2.1.0 → tablemaster-2.1.2}/LICENSE +0 -0
  13. {tablemaster-2.1.0 → tablemaster-2.1.2}/README.md +0 -0
  14. {tablemaster-2.1.0 → tablemaster-2.1.2}/setup.cfg +0 -0
  15. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/__init__.py +0 -0
  16. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/__main__.py +0 -0
  17. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/cli.py +0 -0
  18. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/config.py +0 -0
  19. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/local.py +0 -0
  20. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/__init__.py +0 -0
  21. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/apply.py +0 -0
  22. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/dialects/__init__.py +0 -0
  23. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/dialects/base.py +0 -0
  24. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/dialects/mysql.py +0 -0
  25. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/dialects/postgresql.py +0 -0
  26. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/dialects/tidb.py +0 -0
  27. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/diff.py +0 -0
  28. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/init.py +0 -0
  29. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/introspect.py +0 -0
  30. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/loader.py +0 -0
  31. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/models.py +0 -0
  32. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/schema/plan.py +0 -0
  33. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/sync.py +0 -0
  34. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster/utils.py +0 -0
  35. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster.egg-info/dependency_links.txt +0 -0
  36. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster.egg-info/entry_points.txt +0 -0
  37. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster.egg-info/requires.txt +0 -0
  38. {tablemaster-2.1.0 → tablemaster-2.1.2}/tablemaster.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tablemaster
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: tablemaster is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API.
5
5
  Author-email: Livid <livid.su@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/ilivid/tablemaster
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "tablemaster"
7
- version = "2.1.0"
7
+ version = "2.1.2"
8
8
  description = "tablemaster is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -0,0 +1,473 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ import warnings
5
+ from typing import Union, List, Tuple, Dict, Any, Optional
6
+ from functools import lru_cache
7
+
8
+ from sqlalchemy import create_engine, inspect, pool, text
9
+ from sqlalchemy.engine import Engine
10
+ import pandas as pd
11
+ from datetime import datetime
12
+ from tqdm import tqdm
13
+ from urllib.parse import quote_plus
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def get_connect_args(configs: Any) -> Dict[str, Any]:
19
+ """
20
+ Get database connection arguments, supporting SSL and other common configurations.
21
+
22
+ Args:
23
+ configs (Any): Configuration object that may contain use_ssl, ssl_ca, connect_args, db_type.
24
+
25
+ Returns:
26
+ Dict[str, Any]: A dictionary of connection arguments.
27
+ """
28
+ connect_args: Dict[str, Any] = {}
29
+
30
+ if hasattr(configs, 'connect_args') and configs.connect_args:
31
+ connect_args = configs.connect_args.copy()
32
+ else:
33
+ use_ssl: bool = getattr(configs, 'use_ssl', False)
34
+ db_type: str = getattr(configs, 'db_type', 'mysql').lower()
35
+
36
+ if db_type == 'tidb' or use_ssl:
37
+ ssl_ca: str = getattr(configs, 'ssl_ca', '/etc/ssl/cert.pem')
38
+ connect_args = {
39
+ 'ssl': {
40
+ 'ca': ssl_ca,
41
+ 'check_hostname': False,
42
+ 'verify_identity': False
43
+ }
44
+ }
45
+
46
+ return connect_args
47
+
48
+
49
+ def _build_conn_str(configs: Any) -> str:
50
+ """
51
+ Build the SQLAlchemy connection string based on configuration.
52
+
53
+ Args:
54
+ configs (Any): Configuration object containing host, port, user, password, database, etc.
55
+
56
+ Returns:
57
+ str: The SQLAlchemy connection string.
58
+ """
59
+ db_type: str = getattr(configs, 'db_type', 'mysql').lower()
60
+ password_encoded: str = quote_plus(configs.password)
61
+ match db_type:
62
+ case 'mysql' | 'tidb':
63
+ cf_port: int = getattr(configs, 'port', 3306)
64
+ return f'mysql+pymysql://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
65
+ case 'postgresql':
66
+ cf_port: int = getattr(configs, 'port', 5432)
67
+ return f'postgresql+psycopg2://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
68
+ case _:
69
+ raise ValueError(f'Unsupported db_type: {configs.db_type}')
70
+
71
+
72
+ @lru_cache(maxsize=16)
73
+ def _get_engine(conn_str: str, connect_args_json: str = '{}', autocommit: bool = False) -> Engine:
74
+ """
75
+ Get or create a cached SQLAlchemy Engine instance.
76
+
77
+ Args:
78
+ conn_str (str): The database connection string.
79
+ connect_args_json (str, optional): JSON string representation of connection arguments. Defaults to '{}'.
80
+ autocommit (bool, optional): Whether the engine should be in autocommit mode. Defaults to False.
81
+
82
+ Returns:
83
+ Engine: The created SQLAlchemy Engine instance.
84
+ """
85
+ connect_args: Dict[str, Any] = json.loads(connect_args_json) if connect_args_json else {}
86
+ engine_kwargs: Dict[str, Any] = {
87
+ 'connect_args': connect_args,
88
+ 'poolclass': pool.QueuePool,
89
+ 'pool_size': 5,
90
+ 'max_overflow': 10,
91
+ 'pool_pre_ping': True,
92
+ }
93
+ if autocommit:
94
+ engine_kwargs['isolation_level'] = 'AUTOCOMMIT'
95
+ return create_engine(conn_str, **engine_kwargs)
96
+
97
+
98
+ def _resolve_engine(configs: Any, autocommit: bool = False) -> Engine:
99
+ """
100
+ Resolve and return an Engine based on configuration.
101
+
102
+ Args:
103
+ configs (Any): Configuration object.
104
+ autocommit (bool, optional): Whether to use autocommit mode. Defaults to False.
105
+
106
+ Returns:
107
+ Engine: The SQLAlchemy Engine instance.
108
+ """
109
+ connection_string: str = _build_conn_str(configs)
110
+ connect_args: Dict[str, Any] = get_connect_args(configs)
111
+ connect_args_json: str = json.dumps(connect_args, sort_keys=True, default=str)
112
+ return _get_engine(connection_string, connect_args_json, autocommit)
113
+
114
+
115
+ def _safe_identifier(identifier: str) -> str:
116
+ """
117
+ Ensure an identifier is safe from SQL injection.
118
+
119
+ Args:
120
+ identifier (str): The SQL identifier to validate.
121
+
122
+ Returns:
123
+ str: The safe identifier.
124
+
125
+ Raises:
126
+ ValueError: If the identifier contains invalid characters.
127
+ """
128
+ if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', identifier):
129
+ raise ValueError(f'Invalid identifier: {identifier}')
130
+ return identifier
131
+
132
+
133
+ def _safe_mysql_type(data_type: str) -> str:
134
+ """
135
+ Ensure a MySQL data type expression is safe from SQL injection.
136
+
137
+ Args:
138
+ data_type (str): The MySQL data type to validate.
139
+
140
+ Returns:
141
+ str: The safe data type string.
142
+
143
+ Raises:
144
+ ValueError: If the data type expression contains invalid characters.
145
+ """
146
+ normalized: str = data_type.strip()
147
+ if not re.match(r'^[A-Za-z0-9_,()\s]+$', normalized):
148
+ raise ValueError(f'Invalid data type expression: {data_type}')
149
+ return normalized
150
+
151
+
152
+ def query(sql: Union[str, text], configs: Any, params: Optional[Dict[str, Any]] = None) -> pd.DataFrame:
153
+ """
154
+ Execute a query and return results as a pandas DataFrame.
155
+
156
+ Args:
157
+ sql (Union[str, text]): The SQL query to execute.
158
+ configs (Any): Configuration object.
159
+ params (Optional[Dict[str, Any]], optional): Query parameters. Defaults to None.
160
+
161
+ Returns:
162
+ pd.DataFrame: Query results.
163
+ """
164
+ logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
165
+ engine: Engine = _resolve_engine(configs, autocommit=False)
166
+ with engine.connect() as conn:
167
+ statement = text(sql) if isinstance(sql, str) else sql
168
+ df: pd.DataFrame = pd.read_sql(statement, conn, params=params)
169
+ logger.debug('query preview: %s', df.head())
170
+ return df
171
+
172
+
173
+ def opt(sql: Union[str, text], configs: Any, params: Optional[Dict[str, Any]] = None) -> None:
174
+ """
175
+ Execute a SQL statement that modifies the database (e.g., INSERT, UPDATE, DELETE).
176
+
177
+ Args:
178
+ sql (Union[str, text]): The SQL statement to execute.
179
+ configs (Any): Configuration object.
180
+ params (Optional[Dict[str, Any]], optional): Query parameters. Defaults to None.
181
+ """
182
+ logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
183
+ engine: Engine = _resolve_engine(configs, autocommit=True)
184
+ with engine.connect() as conn:
185
+ statement = text(sql) if isinstance(sql, str) else sql
186
+ conn.execute(statement, params or {})
187
+ logger.info('database execute success')
188
+
189
+
190
+ class ManageTable:
191
+ """
192
+ A class to manage a specific database table's operations.
193
+ """
194
+ def __init__(self, table: str, configs: Any, verify: bool = False) -> None:
195
+ """
196
+ Initialize a ManageTable instance.
197
+
198
+ Args:
199
+ table (str): The name of the table.
200
+ configs (Any): Configuration object for the database.
201
+ verify (bool, optional): Whether to verify if the table exists upon initialization. Defaults to False.
202
+ """
203
+ self.port: int = getattr(configs, 'port', 3306)
204
+ self.table: str = table
205
+ self.name: str = configs.name
206
+ self.user: str = configs.user
207
+ self.password: str = configs.password
208
+ self.host: str = configs.host
209
+ self.database: str = configs.database
210
+ self.configs: Any = configs
211
+ if verify:
212
+ self._check_exists()
213
+
214
+ def _check_exists(self) -> None:
215
+ """
216
+ Check if the table exists and raise an error if not.
217
+
218
+ Raises:
219
+ ValueError: If the table does not exist.
220
+ """
221
+ if not self.exists():
222
+ raise ValueError(f'table not found: {self.table}')
223
+ logger.info('table exists: %s', self.table)
224
+
225
+ def exists(self) -> bool:
226
+ """
227
+ Check if the table exists in the database.
228
+
229
+ Returns:
230
+ bool: True if table exists, False otherwise.
231
+ """
232
+ safe_table: str = _safe_identifier(self.table)
233
+ try:
234
+ engine: Engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
235
+ inspector = inspect(engine)
236
+ return inspector.has_table(safe_table)
237
+ except Exception as e:
238
+ logger.exception('failed to check if table exists: %s', e)
239
+ raise
240
+
241
+ def delete_table(self) -> None:
242
+ """
243
+ Drop the table from the database.
244
+ """
245
+ safe_table: str = _safe_identifier(self.table)
246
+ try:
247
+ opt(text(f'DROP TABLE `{safe_table}`'), self)
248
+ logger.info('%s deleted', self.table)
249
+ except Exception as e:
250
+ logger.exception('table was not deleted: %s', e)
251
+ raise
252
+
253
+ def par_del(self, clause: str, params: Optional[Dict[str, Any]] = None) -> None:
254
+ """
255
+ Delete specific records from the table based on a WHERE clause.
256
+
257
+ Args:
258
+ clause (str): The WHERE clause conditions.
259
+ params (Optional[Dict[str, Any]], optional): Parameters for the WHERE clause. Defaults to None.
260
+ """
261
+ safe_table: str = _safe_identifier(self.table)
262
+ del_clause = text(f'DELETE FROM `{safe_table}` WHERE {clause}')
263
+ opt(del_clause, self, params=params)
264
+ logger.info('records deleted by clause: %s', clause)
265
+
266
+ def change_data_type(self, cols_name: str, data_type: str) -> None:
267
+ """
268
+ Change the data type of a specific column in the table.
269
+
270
+ Args:
271
+ cols_name (str): The name of the column to alter.
272
+ data_type (str): The new data type expression.
273
+ """
274
+ safe_table: str = _safe_identifier(self.table)
275
+ safe_col: str = _safe_identifier(cols_name)
276
+ safe_type: str = _safe_mysql_type(data_type)
277
+ change_clause = text(f'ALTER TABLE `{safe_table}` MODIFY COLUMN `{safe_col}` {safe_type}')
278
+ opt(change_clause, self)
279
+ logger.info('%s changed to %s successfully', cols_name, data_type)
280
+
281
+
282
+ def upload_data(self, df: pd.DataFrame, chunk_size: int = 10000, add_date: bool = False) -> None:
283
+ """
284
+ Upload data from a pandas DataFrame to the database table.
285
+
286
+ Args:
287
+ df (pd.DataFrame): The DataFrame containing data to upload.
288
+ chunk_size (int, optional): Number of rows to upload per chunk. Defaults to 10000.
289
+ add_date (bool, optional): Whether to append the current date to the DataFrame before uploading. Defaults to False.
290
+ """
291
+ engine: Engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
292
+
293
+ with engine.begin() as connection:
294
+ if add_date:
295
+ df_copy: pd.DataFrame = df.copy()
296
+ df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
297
+ else:
298
+ df_copy: pd.DataFrame = df
299
+ total_chunks: int = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
300
+ logger.info('try to upload data now, chunk_size is %s', chunk_size)
301
+ with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
302
+ try:
303
+ for start in range(0, len(df_copy), chunk_size):
304
+ end: int = min(start + chunk_size, len(df_copy))
305
+ chunk: pd.DataFrame = df_copy.iloc[start:end]
306
+ chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
307
+ pbar.update(1)
308
+ except Exception as e:
309
+ logger.exception('an error occurred during upload: %s', e)
310
+ raise
311
+
312
+ def upsert_data(self, df: pd.DataFrame, chunk_size: int = 10000, add_date: bool = False, ignore: bool = False, key: Union[str, List[str], Tuple[str, ...], None] = None) -> None:
313
+ """
314
+ Upsert data from a pandas DataFrame into the database table.
315
+
316
+ This method will perform an "insert or update" (upsert) operation based on the target database type.
317
+ If the record already exists (based on the specified primary key or unique index), it updates the existing record.
318
+ Otherwise, it inserts a new record.
319
+
320
+ Args:
321
+ df (pd.DataFrame): The pandas DataFrame containing the data to be upserted.
322
+ chunk_size (int, optional): The number of rows to insert per batch. Defaults to 10000.
323
+ add_date (bool, optional): Whether to add a 'rundate' column with the current date to the dataframe. Defaults to False.
324
+ ignore (bool, optional): If True, it performs an 'INSERT IGNORE' or 'ON CONFLICT DO NOTHING' operation, skipping existing records instead of updating them. Defaults to False.
325
+ key (Union[str, List[str], Tuple[str, ...], None], optional): The primary key or unique index column(s) used to detect conflicts.
326
+ Required for PostgreSQL. For MySQL/TiDB, this is used to exclude primary key columns from being updated.
327
+ Can be a comma-separated string or a list/tuple of strings. Defaults to None.
328
+
329
+ Raises:
330
+ ValueError: If 'key' is not provided when 'db_type' is 'postgresql', or if an unsupported 'db_type' is used.
331
+ """
332
+ engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
333
+ db_type: str = getattr(self.configs if hasattr(self, 'configs') else self, 'db_type', 'mysql').lower()
334
+
335
+ with engine.begin() as connection:
336
+ if add_date:
337
+ df_copy: pd.DataFrame = df.copy()
338
+ df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
339
+ else:
340
+ df_copy: pd.DataFrame = df
341
+
342
+ total_chunks: int = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
343
+ logger.info('trying to upload data now, chunk_size is %s', chunk_size)
344
+
345
+ with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
346
+ for start in range(0, len(df_copy), chunk_size):
347
+ end: int = min(start + chunk_size, len(df_copy))
348
+ chunk: pd.DataFrame = df_copy.iloc[start:end]
349
+ columns: List[str] = chunk.columns.tolist()
350
+ value_placeholders: str = ', '.join([f':{col}' for col in columns])
351
+
352
+ try:
353
+ if ignore == False:
354
+ keys: List[str] = []
355
+ if key:
356
+ if isinstance(key, str):
357
+ keys = [k.strip() for k in key.split(',')]
358
+ elif isinstance(key, (list, tuple)):
359
+ keys = [str(k).strip() for k in key]
360
+ else:
361
+ raise ValueError('key must be a string or a list of strings')
362
+
363
+ if db_type in ('mysql', 'tidb'):
364
+ if keys:
365
+ update_columns = ', '.join([f"`{col}`=VALUES(`{col}`)" for col in columns if col not in keys])
366
+ else:
367
+ update_columns = ', '.join([f"`{col}`=VALUES(`{col}`)" for col in columns])
368
+
369
+ if update_columns:
370
+ insert_sql = f"""
371
+ INSERT INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
372
+ VALUES ({value_placeholders})
373
+ ON DUPLICATE KEY UPDATE {update_columns}
374
+ """
375
+ else:
376
+ insert_sql = f"""
377
+ INSERT IGNORE INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
378
+ VALUES ({value_placeholders})
379
+ """
380
+ elif db_type == 'postgresql':
381
+ if not keys:
382
+ raise ValueError('key is required for postgresql upsert')
383
+
384
+ safe_keys = [_safe_identifier(k) for k in keys]
385
+ safe_columns = [_safe_identifier(col) for col in columns]
386
+ quoted_columns = ', '.join([f'"{col}"' for col in safe_columns])
387
+ update_columns = ', '.join(
388
+ [f'"{col}"=EXCLUDED."{col}"' for col in safe_columns if col not in safe_keys]
389
+ )
390
+ conflict_keys_str = ', '.join([f'"{k}"' for k in safe_keys])
391
+
392
+ if update_columns:
393
+ insert_sql = f"""
394
+ INSERT INTO {self.table} ({quoted_columns})
395
+ VALUES ({value_placeholders})
396
+ ON CONFLICT ({conflict_keys_str}) DO UPDATE SET {update_columns}
397
+ """
398
+ else:
399
+ insert_sql = f"""
400
+ INSERT INTO {self.table} ({quoted_columns})
401
+ VALUES ({value_placeholders})
402
+ ON CONFLICT ({conflict_keys_str}) DO NOTHING
403
+ """
404
+ else:
405
+ raise ValueError(f'Unsupported db_type for upsert: {db_type}')
406
+ else:
407
+ insert_sql = f"""
408
+ INSERT IGNORE INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
409
+ VALUES ({value_placeholders})
410
+ """
411
+
412
+ data = chunk.where(pd.notna(chunk), None).to_dict(orient='records')
413
+ connection.execute(text(insert_sql), data)
414
+ pbar.update(1)
415
+ except Exception as e:
416
+ logger.exception('an error occurred during upsert: %s', e)
417
+ raise
418
+
419
+ class Manage_table(ManageTable):
420
+ """
421
+ Deprecated class for managing database tables. Use ManageTable instead.
422
+ """
423
+ def __init__(self, table: str, configs: Any, verify: bool = False) -> None:
424
+ """
425
+ Initialize the Manage_table instance. Issues a deprecation warning.
426
+
427
+ Args:
428
+ table (str): The name of the table to manage.
429
+ configs (Any): Configuration object containing database connection details.
430
+ verify (bool, optional): Whether to verify the table configuration. Defaults to False.
431
+ """
432
+ warnings.warn(
433
+ 'Manage_table is deprecated and will be removed in v2.0.0; use ManageTable instead.',
434
+ DeprecationWarning,
435
+ stacklevel=2,
436
+ )
437
+ super().__init__(table, configs, verify=verify)
438
+
439
+ def delete_table(self) -> None:
440
+ """
441
+ Drop the table from the database.
442
+ """
443
+ super().delete_table()
444
+
445
+ def upload_data(self, df: pd.DataFrame, chunk_size: int = 10000, add_date: bool = True) -> None:
446
+ """
447
+ Upload data from a pandas DataFrame to the database table.
448
+
449
+ Args:
450
+ df (pd.DataFrame): The pandas DataFrame containing the data to upload.
451
+ chunk_size (int, optional): The number of rows to insert per batch. Defaults to 10000.
452
+ add_date (bool, optional): Whether to add a 'rundate' column with the current date to the dataframe. Defaults to True.
453
+ """
454
+ engine: Engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
455
+
456
+ with engine.begin() as connection:
457
+ if add_date:
458
+ df_copy: pd.DataFrame = df.copy()
459
+ df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
460
+ else:
461
+ df_copy: pd.DataFrame = df
462
+ total_chunks: int = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
463
+ logger.info('try to upload data now, chunk_size is %s', chunk_size)
464
+ with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
465
+ try:
466
+ for start in range(0, len(df_copy), chunk_size):
467
+ end: int = min(start + chunk_size, len(df_copy))
468
+ chunk: pd.DataFrame = df_copy.iloc[start:end]
469
+ chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
470
+ pbar.update(1)
471
+ except Exception as e:
472
+ logger.exception('an error occurred during upload: %s', e)
473
+ raise
@@ -178,9 +178,10 @@ def fs_write_df(sheet_address, df, feishu_cfg, loc='A1', clear_sheet=True):
178
178
  if clear_resp.json().get('code') == 0:
179
179
  logger.info('sheet cleared')
180
180
  else:
181
- logger.warning("failed to clear sheet: %s", clear_resp.json().get('msg'))
181
+ raise RuntimeError(f"failed to clear sheet: {clear_resp.json().get('msg')}")
182
182
  except Exception as e:
183
- logger.warning('failed to clear sheet: %s', e)
183
+ logger.exception('failed to clear sheet: %s', e)
184
+ raise
184
185
 
185
186
  # 处理 DataFrame 数据类型
186
187
  df_copy = df.copy()
@@ -305,8 +306,7 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
305
306
  existing_fields = _get_bitable_fields(app_token, table_id, header)
306
307
 
307
308
  if not existing_fields:
308
- logger.error('could not fetch table fields or table has no fields')
309
- return None
309
+ raise ValueError('could not fetch table fields or table has no fields')
310
310
 
311
311
  logger.info('table has %s fields', len(existing_fields))
312
312
 
@@ -323,8 +323,7 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
323
323
  logger.warning('skip column: %s', field)
324
324
 
325
325
  if not valid_fields:
326
- logger.error('no valid fields to write, all dataframe columns are missing in bitable')
327
- return None
326
+ raise ValueError('no valid fields to write, all dataframe columns are missing in bitable')
328
327
 
329
328
  logger.info('will write %s valid fields', len(valid_fields))
330
329
 
@@ -360,8 +359,9 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
360
359
  _request_with_retry("post", delete_url, headers=header, json_data=delete_data)
361
360
  logger.info('deleted %s records', len(record_ids))
362
361
 
363
- except Exception as e:
364
- logger.warning('failed to clear table: %s', e)
362
+ except Exception as e:
363
+ logger.exception('failed to clear table: %s', e)
364
+ raise
365
365
 
366
366
  # 处理 DataFrame - 只保留有效字段
367
367
  df_copy = df[list(valid_fields)].copy()
@@ -444,7 +444,7 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
444
444
  str_val = str(value)
445
445
  if str_val and str_val != 'None' and str_val != 'nan':
446
446
  fields[col] = str_val
447
- except:
447
+ except Exception:
448
448
  if col not in skipped_cols:
449
449
  skipped_cols.add(col)
450
450
  continue
@@ -457,6 +457,7 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
457
457
  # 批量写入(每次最多500条)
458
458
  batch_size = 500
459
459
  all_responses = []
460
+ failed_batches = []
460
461
 
461
462
  for i in range(0, len(records), batch_size):
462
463
  batch = records[i:i + batch_size]
@@ -473,9 +474,11 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
473
474
  logger.info('batch %s wrote %s records', i // batch_size + 1, len(batch))
474
475
  else:
475
476
  logger.error('failed to write batch: %s', response.get('msg', 'Unknown error'))
477
+ failed_batches.append((i // batch_size + 1, response.get('msg', 'Unknown error')))
476
478
 
477
479
  except Exception as e:
478
480
  logger.exception('failed to write batch: %s', e)
481
+ failed_batches.append((i // batch_size + 1, str(e)))
479
482
 
480
483
  logger.info('write summary total records: %s', len(records))
481
484
  logger.info('write summary fields written: %s', len(valid_fields))
@@ -483,6 +486,8 @@ def fs_write_base(sheet_address, df, feishu_cfg, clear_table=False):
483
486
  logger.info('write summary fields skipped: %s', len(missing_fields))
484
487
  for field in sorted(missing_fields):
485
488
  logger.info('skip field: %s', field)
489
+ if failed_batches:
490
+ raise RuntimeError(f'bitable write failed for {len(failed_batches)} batch(es): {failed_batches}')
486
491
  logger.info('data is written')
487
492
 
488
493
  return all_responses
@@ -68,14 +68,16 @@ def gs_read_df(address, cfg=None, service_account_path=None):
68
68
  return df
69
69
 
70
70
  except gspread.exceptions.SpreadsheetNotFound:
71
- logger.error("spreadsheet '%s' not found", spreadsheet_identifier)
72
- return None
71
+ message = f"spreadsheet '{spreadsheet_identifier}' not found"
72
+ logger.error(message)
73
+ raise ValueError(message)
73
74
  except gspread.exceptions.WorksheetNotFound:
74
- logger.error("worksheet '%s' not found in spreadsheet", worksheet_name)
75
- return None
75
+ message = f"worksheet '{worksheet_name}' not found in spreadsheet"
76
+ logger.error(message)
77
+ raise ValueError(message)
76
78
  except Exception as e:
77
79
  logger.exception('an unexpected error occurred: %s', e)
78
- return None
80
+ raise
79
81
 
80
82
 
81
83
  def gs_write_df(address, df, cfg=None, loc='A1', service_account_path=None):
@@ -105,8 +107,9 @@ def gs_write_df(address, df, cfg=None, loc='A1', service_account_path=None):
105
107
 
106
108
  except gspread.exceptions.SpreadsheetNotFound:
107
109
  if is_id:
108
- logger.error("spreadsheet ID '%s' not found, cannot create with specific ID", spreadsheet_identifier)
109
- return
110
+ message = f"spreadsheet ID '{spreadsheet_identifier}' not found, cannot create with specific ID"
111
+ logger.error(message)
112
+ raise ValueError(message)
110
113
  else:
111
114
  logger.info("spreadsheet '%s' not found, creating one", spreadsheet_identifier)
112
115
  sh = gc.create(spreadsheet_identifier)
@@ -128,3 +131,4 @@ def gs_write_df(address, df, cfg=None, loc='A1', service_account_path=None):
128
131
  logger.info('data is written')
129
132
  except Exception as e:
130
133
  logger.exception('failed to update worksheet: %s', e)
134
+ raise
@@ -8,6 +8,17 @@ import yaml
8
8
  from .models import ActualTable
9
9
 
10
10
 
11
+ class _QuotedStringDumper(yaml.SafeDumper):
12
+ pass
13
+
14
+
15
+ def _quoted_string_representer(dumper, value):
16
+ return dumper.represent_scalar('tag:yaml.org,2002:str', value, style='"')
17
+
18
+
19
+ _QuotedStringDumper.add_representer(str, _quoted_string_representer)
20
+
21
+
11
22
  def _table_to_payload(table: ActualTable) -> dict:
12
23
  payload: dict = {
13
24
  'table': table.table,
@@ -51,7 +62,13 @@ def write_pulled_schema(
51
62
  target = out / f'{table.table}.yaml'
52
63
  payload = _table_to_payload(table)
53
64
  with target.open('w', encoding='utf-8') as f:
54
- yaml.safe_dump(payload, f, sort_keys=False, allow_unicode=True)
65
+ yaml.dump(
66
+ payload,
67
+ f,
68
+ Dumper=_QuotedStringDumper,
69
+ sort_keys=False,
70
+ allow_unicode=True,
71
+ )
55
72
  written.append(target)
56
73
  return written
57
74
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tablemaster
3
- Version: 2.1.0
3
+ Version: 2.1.2
4
4
  Summary: tablemaster is a Python toolkit for moving and managing tabular data across databases, Feishu/Lark, Google Sheets, and local files with one consistent API.
5
5
  Author-email: Livid <livid.su@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/ilivid/tablemaster
@@ -31,4 +31,5 @@ tablemaster/schema/dialects/base.py
31
31
  tablemaster/schema/dialects/mysql.py
32
32
  tablemaster/schema/dialects/postgresql.py
33
33
  tablemaster/schema/dialects/tidb.py
34
+ tests/test_error_visibility.py
34
35
  tests/test_schema_core.py
@@ -0,0 +1,54 @@
1
+ from types import SimpleNamespace
2
+ from unittest import TestCase
3
+ from unittest.mock import patch
4
+
5
+ import pandas as pd
6
+
7
+ from tablemaster.database import ManageTable
8
+ from tablemaster.feishu import fs_write_base
9
+
10
+
11
+ class _DummyResponse:
12
+ def __init__(self, body, status_code=200):
13
+ self._body = body
14
+ self.status_code = status_code
15
+
16
+ def json(self):
17
+ return self._body
18
+
19
+
20
+ class ErrorVisibilityTests(TestCase):
21
+ def setUp(self):
22
+ self.db_cfg = SimpleNamespace(
23
+ name='test_db',
24
+ user='u',
25
+ password='p',
26
+ host='127.0.0.1',
27
+ database='d',
28
+ db_type='mysql',
29
+ )
30
+ self.feishu_cfg = SimpleNamespace(feishu_app_id='id', feishu_app_secret='secret')
31
+
32
+ def test_manage_table_exists_propagates_errors(self):
33
+ table = ManageTable('orders', self.db_cfg)
34
+ with patch('tablemaster.database._resolve_engine', side_effect=RuntimeError('db unavailable')):
35
+ with self.assertRaises(RuntimeError):
36
+ table.exists()
37
+
38
+ def test_delete_table_propagates_errors(self):
39
+ table = ManageTable('orders', self.db_cfg)
40
+ with patch('tablemaster.database.opt', side_effect=RuntimeError('drop failed')):
41
+ with self.assertRaises(RuntimeError):
42
+ table.delete_table()
43
+
44
+ def test_fs_write_base_raises_when_batch_write_failed(self):
45
+ df = pd.DataFrame({'a': [1]})
46
+
47
+ with patch('tablemaster.feishu._get_tenant_access_token', return_value='token'):
48
+ with patch('tablemaster.feishu._get_bitable_fields', return_value={'a'}):
49
+ with patch(
50
+ 'tablemaster.feishu._request_with_retry',
51
+ return_value=_DummyResponse({'code': 1001, 'msg': 'bad request'}),
52
+ ):
53
+ with self.assertRaises(RuntimeError):
54
+ fs_write_base(['app_token', 'table_id'], df, self.feishu_cfg)
@@ -102,8 +102,35 @@ class SchemaCoreTests(unittest.TestCase):
102
102
  paths = write_pulled_schema(tables, root / 'schema' / 'mydb')
103
103
  self.assertEqual(1, len(paths))
104
104
  content = paths[0].read_text(encoding='utf-8')
105
- self.assertIn('table: orders', content)
106
- self.assertIn('primary_key: true', content)
105
+ self.assertIn('"table": "orders"', content)
106
+ self.assertIn('"primary_key": true', content)
107
+
108
+ def test_pull_quotes_comment_with_colon(self):
109
+ with TemporaryDirectory() as td:
110
+ root = Path(td)
111
+ tables = [
112
+ ActualTable(
113
+ table='orders',
114
+ columns=[
115
+ ActualColumn(
116
+ name='id',
117
+ type='BIGINT',
118
+ nullable=False,
119
+ default=None,
120
+ comment='主键:业务单号',
121
+ primary_key=True,
122
+ )
123
+ ],
124
+ indexes=[],
125
+ comment='订单:主表',
126
+ )
127
+ ]
128
+ paths = write_pulled_schema(tables, root / 'schema' / 'mydb')
129
+ content = paths[0].read_text(encoding='utf-8')
130
+ self.assertIn('"comment": "订单:主表"', content)
131
+ loaded = load_schema_definitions(connection='mydb', root_dir=root / 'schema')
132
+ self.assertEqual('订单:主表', loaded[0].comment)
133
+ self.assertEqual('主键:业务单号', loaded[0].columns[0].comment)
107
134
 
108
135
 
109
136
  if __name__ == '__main__':
@@ -1,286 +0,0 @@
1
- import json
2
- import logging
3
- import re
4
- import warnings
5
- from functools import lru_cache
6
-
7
- from sqlalchemy import create_engine, pool, text
8
- import pandas as pd
9
- from datetime import datetime
10
- from tqdm import tqdm
11
- from urllib.parse import quote_plus
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
-
16
- def get_connect_args(configs):
17
- """
18
- 获取数据库连接参数,支持SSL和其他通用配置
19
-
20
- Args:
21
- configs: 配置对象,可以包含以下属性:
22
- - use_ssl: 是否使用SSL (bool)
23
- - ssl_ca: SSL证书路径 (str)
24
- - connect_args: 自定义连接参数 (dict)
25
- - db_type: 数据库类型 ('tidb', 'mysql' 等)
26
-
27
- Returns:
28
- dict: 连接参数字典
29
- """
30
- connect_args = {}
31
-
32
- if hasattr(configs, 'connect_args') and configs.connect_args:
33
- connect_args = configs.connect_args.copy()
34
- else:
35
- use_ssl = getattr(configs, 'use_ssl', False)
36
- db_type = getattr(configs, 'db_type', 'mysql').lower()
37
-
38
- if db_type == 'tidb' or use_ssl:
39
- ssl_ca = getattr(configs, 'ssl_ca', '/etc/ssl/cert.pem')
40
- connect_args = {
41
- 'ssl': {
42
- 'ca': ssl_ca,
43
- 'check_hostname': False,
44
- 'verify_identity': False
45
- }
46
- }
47
-
48
- return connect_args
49
-
50
-
51
- def _build_conn_str(configs):
52
- db_type = getattr(configs, 'db_type', 'mysql').lower()
53
- password_encoded = quote_plus(configs.password)
54
- match db_type:
55
- case 'mysql' | 'tidb':
56
- cf_port = getattr(configs, 'port', 3306)
57
- return f'mysql+pymysql://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
58
- case 'postgresql':
59
- cf_port = getattr(configs, 'port', 5432)
60
- return f'postgresql+psycopg2://{configs.user}:{password_encoded}@{configs.host}:{cf_port}/{configs.database}'
61
- case _:
62
- raise ValueError(f'Unsupported db_type: {configs.db_type}')
63
-
64
-
65
- @lru_cache(maxsize=16)
66
- def _get_engine(conn_str, connect_args_json='{}', autocommit=False):
67
- connect_args = json.loads(connect_args_json) if connect_args_json else {}
68
- engine_kwargs = {
69
- 'connect_args': connect_args,
70
- 'poolclass': pool.QueuePool,
71
- 'pool_size': 5,
72
- 'max_overflow': 10,
73
- 'pool_pre_ping': True,
74
- }
75
- if autocommit:
76
- engine_kwargs['isolation_level'] = 'AUTOCOMMIT'
77
- return create_engine(conn_str, **engine_kwargs)
78
-
79
-
80
- def _resolve_engine(configs, autocommit=False):
81
- connection_string = _build_conn_str(configs)
82
- connect_args = get_connect_args(configs)
83
- connect_args_json = json.dumps(connect_args, sort_keys=True, default=str)
84
- return _get_engine(connection_string, connect_args_json, autocommit)
85
-
86
-
87
- def _safe_identifier(identifier):
88
- if not re.match(r'^[A-Za-z_][A-Za-z0-9_]*$', identifier):
89
- raise ValueError(f'Invalid identifier: {identifier}')
90
- return identifier
91
-
92
-
93
- def _safe_mysql_type(data_type):
94
- normalized = data_type.strip()
95
- if not re.match(r'^[A-Za-z0-9_,()\s]+$', normalized):
96
- raise ValueError(f'Invalid data type expression: {data_type}')
97
- return normalized
98
-
99
-
100
- def query(sql, configs, params=None):
101
- logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
102
- engine = _resolve_engine(configs, autocommit=False)
103
- with engine.connect() as conn:
104
- statement = text(sql) if isinstance(sql, str) else sql
105
- df = pd.read_sql(statement, conn, params=params)
106
- logger.debug('query preview: %s', df.head())
107
- return df
108
-
109
-
110
- def opt(sql, configs, params=None):
111
- logger.info('try to connect to %s...', getattr(configs, 'name', 'database'))
112
- engine = _resolve_engine(configs, autocommit=True)
113
- with engine.connect() as conn:
114
- statement = text(sql) if isinstance(sql, str) else sql
115
- conn.execute(statement, params or {})
116
- logger.info('database execute success')
117
-
118
-
119
- class ManageTable:
120
- def __init__(self, table, configs, verify=False):
121
- self.port = getattr(configs, 'port', 3306)
122
- self.table = table
123
- self.name = configs.name
124
- self.user = configs.user
125
- self.password = configs.password
126
- self.host = configs.host
127
- self.database = configs.database
128
- self.configs = configs
129
- if verify:
130
- self._check_exists()
131
-
132
- def _check_exists(self):
133
- if not self.exists():
134
- raise ValueError(f'table not found: {self.table}')
135
- logger.info('table exists: %s', self.table)
136
-
137
- def exists(self):
138
- safe_table = _safe_identifier(self.table)
139
- check_sql = text(f'SELECT 1 FROM `{safe_table}` LIMIT 1')
140
- try:
141
- opt(check_sql, self)
142
- return True
143
- except Exception:
144
- return False
145
-
146
- def delete_table(self):
147
- safe_table = _safe_identifier(self.table)
148
- try:
149
- opt(text(f'DROP TABLE `{safe_table}`'), self)
150
- logger.info('%s deleted', self.table)
151
- except Exception:
152
- logger.exception('table was not deleted')
153
-
154
- def par_del(self, clause, params=None):
155
- safe_table = _safe_identifier(self.table)
156
- del_clause = text(f'DELETE FROM `{safe_table}` WHERE {clause}')
157
- opt(del_clause, self, params=params)
158
- logger.info('records deleted by clause: %s', clause)
159
-
160
- def change_data_type(self, cols_name, data_type):
161
- safe_table = _safe_identifier(self.table)
162
- safe_col = _safe_identifier(cols_name)
163
- safe_type = _safe_mysql_type(data_type)
164
- change_clause = text(f'ALTER TABLE `{safe_table}` MODIFY COLUMN `{safe_col}` {safe_type}')
165
- opt(change_clause, self)
166
- logger.info('%s changed to %s successfully', cols_name, data_type)
167
-
168
-
169
- def upload_data(self, df, chunk_size=10000, add_date=False):
170
- engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
171
-
172
- with engine.begin() as connection:
173
- if add_date:
174
- df_copy = df.copy()
175
- df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
176
- else:
177
- df_copy = df
178
- total_chunks = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
179
- logger.info('try to upload data now, chunk_size is %s', chunk_size)
180
- with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
181
- try:
182
- for start in range(0, len(df_copy), chunk_size):
183
- end = min(start + chunk_size, len(df_copy))
184
- chunk = df_copy.iloc[start:end]
185
- chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
186
- pbar.update(1)
187
- except Exception as e:
188
- logger.exception('an error occurred during upload: %s', e)
189
-
190
- def upsert_data(self, df, chunk_size=10000, add_date=False, ignore=False, key=None):
191
- engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
192
- db_type = getattr(self.configs if hasattr(self, 'configs') else self, 'db_type', 'mysql').lower()
193
-
194
- with engine.begin() as connection:
195
- if add_date:
196
- df_copy = df.copy()
197
- df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
198
- else:
199
- df_copy = df
200
-
201
- total_chunks = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
202
- logger.info('trying to upload data now, chunk_size is %s', chunk_size)
203
-
204
- with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
205
- for start in range(0, len(df_copy), chunk_size):
206
- end = min(start + chunk_size, len(df_copy))
207
- chunk = df_copy.iloc[start:end]
208
- columns = chunk.columns.tolist()
209
- value_placeholders = ', '.join([f':{col}' for col in columns])
210
-
211
- try:
212
- if ignore == False:
213
- if db_type in ('mysql', 'tidb'):
214
- update_columns = ', '.join([f"`{col}`=VALUES(`{col}`)" for col in columns])
215
- insert_sql = f"""
216
- INSERT INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
217
- VALUES ({value_placeholders})
218
- ON DUPLICATE KEY UPDATE {update_columns}
219
- """
220
- elif db_type == 'postgresql':
221
- if not key:
222
- raise ValueError('key is required for postgresql upsert')
223
- safe_key = _safe_identifier(key)
224
- safe_columns = [_safe_identifier(col) for col in columns]
225
- quoted_columns = ', '.join([f'"{col}"' for col in safe_columns])
226
- update_columns = ', '.join(
227
- [f'"{col}"=EXCLUDED."{col}"' for col in safe_columns if col != safe_key]
228
- )
229
- if update_columns:
230
- insert_sql = f"""
231
- INSERT INTO {self.table} ({quoted_columns})
232
- VALUES ({value_placeholders})
233
- ON CONFLICT ("{safe_key}") DO UPDATE SET {update_columns}
234
- """
235
- else:
236
- insert_sql = f"""
237
- INSERT INTO {self.table} ({quoted_columns})
238
- VALUES ({value_placeholders})
239
- ON CONFLICT ("{safe_key}") DO NOTHING
240
- """
241
- else:
242
- raise ValueError(f'Unsupported db_type for upsert: {db_type}')
243
- else:
244
- insert_sql = f"""
245
- INSERT IGNORE INTO {self.table} ({', '.join([f'`{col}`' for col in columns])})
246
- VALUES ({value_placeholders})
247
- """
248
-
249
- data = chunk.where(pd.notna(chunk), None).to_dict(orient='records')
250
- connection.execute(text(insert_sql), data)
251
- pbar.update(1)
252
- except Exception as e:
253
- logger.exception('an error occurred during upsert: %s', e)
254
-
255
- class Manage_table(ManageTable):
256
- def __init__(self, table, configs, verify=False):
257
- warnings.warn(
258
- 'Manage_table is deprecated and will be removed in v2.0.0; use ManageTable instead.',
259
- DeprecationWarning,
260
- stacklevel=2,
261
- )
262
- super().__init__(table, configs, verify=verify)
263
-
264
- def delete_table(self):
265
- super().delete_table()
266
-
267
- def upload_data(self, df, chunk_size=10000, add_date=True):
268
- engine = _resolve_engine(self.configs if hasattr(self, 'configs') else self, autocommit=False)
269
-
270
- with engine.begin() as connection:
271
- if add_date:
272
- df_copy = df.copy()
273
- df_copy['rundate'] = datetime.now().strftime('%Y-%m-%d')
274
- else:
275
- df_copy = df
276
- total_chunks = (len(df_copy) // chunk_size) + (0 if len(df_copy) % chunk_size == 0 else 1)
277
- logger.info('try to upload data now, chunk_size is %s', chunk_size)
278
- with tqdm(total=total_chunks, desc="Uploading Chunks", unit="chunk") as pbar:
279
- try:
280
- for start in range(0, len(df_copy), chunk_size):
281
- end = min(start + chunk_size, len(df_copy))
282
- chunk = df_copy.iloc[start:end]
283
- chunk.to_sql(name=self.table, con=connection, if_exists='append', index=False)
284
- pbar.update(1)
285
- except Exception as e:
286
- logger.exception('an error occurred during upload: %s', e)
File without changes
File without changes
File without changes