sqlServerConnector 0.1.3__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/PKG-INFO +1 -1
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/pyproject.toml +1 -1
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/connector.py +19 -23
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/sqlServerConnector.egg-info/PKG-INFO +1 -1
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/README.md +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/setup.cfg +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/__init__.py +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/sqlServerConnector.egg-info/SOURCES.txt +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/sqlServerConnector.egg-info/dependency_links.txt +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/sqlServerConnector.egg-info/requires.txt +0 -0
- {sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/sqlServerConnector.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sqlServerConnector
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: A custom SQL Server Connector for ETL processes with Pandas
|
|
5
5
|
Author-email: Nguyen Minh Son <nguyen.minhson1511@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/johnnyb1509/sqlServerConnector
|
|
@@ -67,38 +67,34 @@ class SQLServerConnector:
|
|
|
67
67
|
# CORE ETL METHODS
|
|
68
68
|
# ========================================================
|
|
69
69
|
|
|
70
|
-
def upsert_data(self,
|
|
71
|
-
|
|
72
|
-
target_table: str,
|
|
73
|
-
primary_key: str = None,
|
|
74
|
-
match_columns: Optional[List[str]] = None,
|
|
75
|
-
auto_evolve_schema: bool = True,
|
|
70
|
+
def upsert_data(self, df: pd.DataFrame, target_table: str, primary_key: str = None,
|
|
71
|
+
match_columns: Optional[List[str]] = None, auto_evolve_schema: bool = True,
|
|
76
72
|
conflict_strategy: str = 'sum'):
|
|
77
|
-
"""
|
|
78
|
-
Upsert data into SQL Server with generalized conflict handling.
|
|
79
|
-
Args:
|
|
80
|
-
conflict_strategy: 'sum' (aggregates numeric rows), 'last' (keeps most recent row).
|
|
81
|
-
"""
|
|
82
73
|
if df.empty: return
|
|
83
74
|
|
|
84
75
|
join_keys = match_columns if match_columns else ([primary_key] if primary_key else [])
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
# 1. Sanitize Data
|
|
76
|
+
|
|
77
|
+
# 1. Sanitize & lọc lấy các cột cần thiết
|
|
78
|
+
# Chỉ giữ lại join_keys và các cột có dữ liệu để tránh "phân mảnh" dữ liệu khi gộp
|
|
89
79
|
df_clean = self._sanitize_dataframe(df, exclude_cols=join_keys)
|
|
90
80
|
|
|
91
|
-
# 2.
|
|
81
|
+
# 2. Xử lý trùng lặp triệt để
|
|
92
82
|
initial_len = len(df_clean)
|
|
93
83
|
if conflict_strategy == 'sum':
|
|
94
|
-
#
|
|
84
|
+
# Xác định cột số để cộng dồn
|
|
95
85
|
num_cols = df_clean.select_dtypes(include=[np.number]).columns.tolist()
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
86
|
+
num_cols = [c for c in num_cols if c not in join_keys]
|
|
87
|
+
|
|
88
|
+
# Chỉ gộp trên các cột số, các cột text khác key sẽ bị loại bỏ hoặc lấy dòng đầu
|
|
89
|
+
# Điều này đảm bảo kết quả trả về CHỈ CÓ 1 DÒNG cho mỗi cặp Key
|
|
90
|
+
agg_logic = {col: 'sum' for col in num_cols}
|
|
91
|
+
|
|
92
|
+
# Đối với các cột không phải số và không phải key, chúng ta lấy dòng đầu tiên
|
|
93
|
+
other_cols = [c for c in df_clean.columns if c not in join_keys and c not in num_cols]
|
|
94
|
+
for c in other_cols:
|
|
95
|
+
agg_logic[c] = 'first'
|
|
96
|
+
|
|
97
|
+
df_clean = df_clean.groupby(join_keys, as_index=False).agg(agg_logic)
|
|
102
98
|
else:
|
|
103
99
|
df_clean = df_clean.drop_duplicates(subset=join_keys, keep='last')
|
|
104
100
|
|
{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/sqlServerConnector.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sqlServerConnector
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: A custom SQL Server Connector for ETL processes with Pandas
|
|
5
5
|
Author-email: Nguyen Minh Son <nguyen.minhson1511@gmail.com>
|
|
6
6
|
Project-URL: Homepage, https://github.com/johnnyb1509/sqlServerConnector
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/sqlServerConnector.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/sqlServerConnector.egg-info/requires.txt
RENAMED
|
File without changes
|
{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.4}/src/sqlServerConnector.egg-info/top_level.txt
RENAMED
|
File without changes
|