sqlServerConnector 0.1.3__tar.gz → 0.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/PKG-INFO +1 -1
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/pyproject.toml +1 -1
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/connector.py +62 -24
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/PKG-INFO +1 -1
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/README.md +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/setup.cfg +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/__init__.py +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/SOURCES.txt +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/dependency_links.txt +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/requires.txt +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sqlServerConnector
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: A custom SQL Server Connector for ETL processes with Pandas
|
|
5
5
|
Author-email: Nguyen Minh Son <nguyen.minhson1511@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/johnnyb1509/sqlServerConnector
|
|
@@ -67,38 +67,34 @@ class SQLServerConnector:
|
|
|
67
67
|
# CORE ETL METHODS
|
|
68
68
|
# ========================================================
|
|
69
69
|
|
|
70
|
-
def upsert_data(self,
|
|
71
|
-
|
|
72
|
-
target_table: str,
|
|
73
|
-
primary_key: str = None,
|
|
74
|
-
match_columns: Optional[List[str]] = None,
|
|
75
|
-
auto_evolve_schema: bool = True,
|
|
70
|
+
def upsert_data(self, df: pd.DataFrame, target_table: str, primary_key: str = None,
|
|
71
|
+
match_columns: Optional[List[str]] = None, auto_evolve_schema: bool = True,
|
|
76
72
|
conflict_strategy: str = 'sum'):
|
|
77
|
-
"""
|
|
78
|
-
Upsert data into SQL Server with generalized conflict handling.
|
|
79
|
-
Args:
|
|
80
|
-
conflict_strategy: 'sum' (aggregates numeric rows), 'last' (keeps most recent row).
|
|
81
|
-
"""
|
|
82
73
|
if df.empty: return
|
|
83
74
|
|
|
84
75
|
join_keys = match_columns if match_columns else ([primary_key] if primary_key else [])
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
# 1. Sanitize Data
|
|
76
|
+
|
|
77
|
+
# 1. Sanitize & lọc lấy các cột cần thiết
|
|
78
|
+
# Chỉ giữ lại join_keys và các cột có dữ liệu để tránh "phân mảnh" dữ liệu khi gộp
|
|
89
79
|
df_clean = self._sanitize_dataframe(df, exclude_cols=join_keys)
|
|
90
80
|
|
|
91
|
-
# 2.
|
|
81
|
+
# 2. Xử lý trùng lặp triệt để
|
|
92
82
|
initial_len = len(df_clean)
|
|
93
83
|
if conflict_strategy == 'sum':
|
|
94
|
-
#
|
|
84
|
+
# Xác định cột số để cộng dồn
|
|
95
85
|
num_cols = df_clean.select_dtypes(include=[np.number]).columns.tolist()
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
86
|
+
num_cols = [c for c in num_cols if c not in join_keys]
|
|
87
|
+
|
|
88
|
+
# Chỉ gộp trên các cột số, các cột text khác key sẽ bị loại bỏ hoặc lấy dòng đầu
|
|
89
|
+
# Điều này đảm bảo kết quả trả về CHỈ CÓ 1 DÒNG cho mỗi cặp Key
|
|
90
|
+
agg_logic = {col: 'sum' for col in num_cols}
|
|
91
|
+
|
|
92
|
+
# Đối với các cột không phải số và không phải key, chúng ta lấy dòng đầu tiên
|
|
93
|
+
other_cols = [c for c in df_clean.columns if c not in join_keys and c not in num_cols]
|
|
94
|
+
for c in other_cols:
|
|
95
|
+
agg_logic[c] = 'first'
|
|
96
|
+
|
|
97
|
+
df_clean = df_clean.groupby(join_keys, as_index=False).agg(agg_logic)
|
|
102
98
|
else:
|
|
103
99
|
df_clean = df_clean.drop_duplicates(subset=join_keys, keep='last')
|
|
104
100
|
|
|
@@ -181,4 +177,46 @@ class SQLServerConnector:
|
|
|
181
177
|
for col in new_cols:
|
|
182
178
|
sql_type = "NVARCHAR(MAX)" if df[col].dtype == 'object' else "FLOAT"
|
|
183
179
|
conn.execute(text(f"ALTER TABLE [{table_name}] ADD [{col}] {sql_type} NULL"))
|
|
184
|
-
conn.commit()
|
|
180
|
+
conn.commit()
|
|
181
|
+
|
|
182
|
+
# ========================================================
|
|
183
|
+
# DATA RETRIEVAL METHODS
|
|
184
|
+
# ========================================================
|
|
185
|
+
def get_data(self, query: str, params: Optional[Dict[str, Any]] = None, chunksize: Optional[int] = None) -> Union[pd.DataFrame, Any]:
|
|
186
|
+
"""
|
|
187
|
+
Executes a SQL query and returns a Pandas DataFrame.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
query (str): The SQL query string. Use :param_name for parameters.
|
|
191
|
+
params (dict, optional): Dictionary of parameters to bind to the query.
|
|
192
|
+
chunksize (int, optional): If specified, returns an iterator where each chunk is the given size.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
pd.DataFrame or Iterator[pd.DataFrame]
|
|
196
|
+
"""
|
|
197
|
+
try:
|
|
198
|
+
with self.engine.connect() as conn:
|
|
199
|
+
# Use text() explicitly for SQLAlchemy 2.0 compatibility
|
|
200
|
+
sql_query = text(query)
|
|
201
|
+
|
|
202
|
+
# If chunksize is provided, read_sql returns a generator
|
|
203
|
+
result = pd.read_sql(
|
|
204
|
+
sql_query,
|
|
205
|
+
conn,
|
|
206
|
+
params=params,
|
|
207
|
+
chunksize=chunksize
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
if chunksize is None:
|
|
211
|
+
logger.info(f"Successfully retrieved {len(result)} rows.")
|
|
212
|
+
else:
|
|
213
|
+
logger.info(f"Retrieving data in chunks of {chunksize} rows.")
|
|
214
|
+
|
|
215
|
+
return result
|
|
216
|
+
|
|
217
|
+
except SQLAlchemyError as e:
|
|
218
|
+
logger.error(f"Failed to retrieve data: {e}")
|
|
219
|
+
raise
|
|
220
|
+
except Exception as e:
|
|
221
|
+
logger.error(f"An unexpected error occurred during get_data: {e}")
|
|
222
|
+
raise
|
{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sqlServerConnector
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: A custom SQL Server Connector for ETL processes with Pandas
|
|
5
5
|
Author-email: Nguyen Minh Son <nguyen.minhson1511@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/johnnyb1509/sqlServerConnector
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/requires.txt
RENAMED
|
File without changes
|
{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/top_level.txt
RENAMED
|
File without changes
|