PyPI - sqlServerConnector - Versions diffs - 0.1.3__tar.gz → 0.1.5__tar.gz - Mend

sqlServerConnector 0.1.3tar.gz → 0.1.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sqlServerConnector
-Version: 0.1.3
+Version: 0.1.5
 Summary: A custom SQL Server Connector for ETL processes with Pandas
 Author-email: Nguyen Minh Son <nguyen.minhson1511@gmail.com>
 Project-URL: Homepage, https://github.com/johnnyb1509/sqlServerConnector

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "sqlServerConnector"
-version = "0.1.3"
+version = "0.1.5"
 description = "A custom SQL Server Connector for ETL processes with Pandas"
 readme = "README.md"
 requires-python = ">=3.8"

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/connector.py RENAMED Viewed

@@ -67,38 +67,34 @@ class SQLServerConnector:
     # CORE ETL METHODS
     # ========================================================
-    def upsert_data(self,
-                    df: pd.DataFrame,
-                    target_table: str,
-                    primary_key: str = None,
-                    match_columns: Optional[List[str]] = None,
-                    auto_evolve_schema: bool = True,
+    def upsert_data(self, df: pd.DataFrame, target_table: str, primary_key: str = None,
+                    match_columns: Optional[List[str]] = None, auto_evolve_schema: bool = True,
                     conflict_strategy: str = 'sum'):
-        """
-        Upsert data into SQL Server with generalized conflict handling.
-        Args:
-            conflict_strategy: 'sum' (aggregates numeric rows), 'last' (keeps most recent row).
-        """
         if df.empty: return
         join_keys = match_columns if match_columns else ([primary_key] if primary_key else [])
-        if not join_keys:
-            raise ValueError("Upsert requires match_columns or primary_key.")
-        # 1. Sanitize Data
+        # 1. Sanitize & lọc lấy các cột cần thiết
+        # Chỉ giữ lại join_keys và các cột có dữ liệu để tránh "phân mảnh" dữ liệu khi gộp
         df_clean = self._sanitize_dataframe(df, exclude_cols=join_keys)
-        # 2. Generalization: Handle Duplicates/Conflicts in Source
+        # 2. Xử lý trùng lặp triệt để
         initial_len = len(df_clean)
         if conflict_strategy == 'sum':
-            # Logic: Group by keys and SUM all numeric columns to prevent MERGE errors
+            # Xác định cột số để cộng dồn
             num_cols = df_clean.select_dtypes(include=[np.number]).columns.tolist()
-            agg_map = {col: 'sum' for col in num_cols if col not in join_keys}
-            # For non-numeric columns, just take the last record
-            for col in df_clean.columns:
-                if col not in join_keys and col not in agg_map:
-                    agg_map[col] = 'last'
-            df_clean = df_clean.groupby(join_keys, as_index=False).agg(agg_map)
+            num_cols = [c for c in num_cols if c not in join_keys]
+            # Chỉ gộp trên các cột số, các cột text khác key sẽ bị loại bỏ hoặc lấy dòng đầu
+            # Điều này đảm bảo kết quả trả về CHỈ CÓ 1 DÒNG cho mỗi cặp Key
+            agg_logic = {col: 'sum' for col in num_cols}
+            # Đối với các cột không phải số và không phải key, chúng ta lấy dòng đầu tiên
+            other_cols = [c for c in df_clean.columns if c not in join_keys and c not in num_cols]
+            for c in other_cols:
+                agg_logic[c] = 'first'
+            df_clean = df_clean.groupby(join_keys, as_index=False).agg(agg_logic)
         else:
             df_clean = df_clean.drop_duplicates(subset=join_keys, keep='last')
@@ -181,4 +177,46 @@ class SQLServerConnector:
                 for col in new_cols:
                     sql_type = "NVARCHAR(MAX)" if df[col].dtype == 'object' else "FLOAT"
                     conn.execute(text(f"ALTER TABLE [{table_name}] ADD [{col}] {sql_type} NULL"))
-                conn.commit()
+                conn.commit()
+    # ========================================================
+    # DATA RETRIEVAL METHODS
+    # ========================================================
+    def get_data(self,  query: str, params: Optional[Dict[str, Any]] = None, chunksize: Optional[int] = None) -> Union[pd.DataFrame, Any]:
+        """
+        Executes a SQL query and returns a Pandas DataFrame.
+        Args:
+            query (str): The SQL query string. Use :param_name for parameters.
+            params (dict, optional): Dictionary of parameters to bind to the query.
+            chunksize (int, optional): If specified, returns an iterator where each chunk is the given size.
+        Returns:
+            pd.DataFrame or Iterator[pd.DataFrame]
+        """
+        try:
+            with self.engine.connect() as conn:
+                # Use text() explicitly for SQLAlchemy 2.0 compatibility
+                sql_query = text(query)
+                # If chunksize is provided, read_sql returns a generator
+                result = pd.read_sql(
+                    sql_query,
+                    conn,
+                    params=params,
+                    chunksize=chunksize
+                )
+                if chunksize is None:
+                    logger.info(f"Successfully retrieved {len(result)} rows.")
+                else:
+                    logger.info(f"Retrieving data in chunks of {chunksize} rows.")
+                return result
+        except SQLAlchemyError as e:
+            logger.error(f"Failed to retrieve data: {e}")
+            raise
+        except Exception as e:
+            logger.error(f"An unexpected error occurred during get_data: {e}")
+            raise

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sqlServerConnector
-Version: 0.1.3
+Version: 0.1.5
 Summary: A custom SQL Server Connector for ETL processes with Pandas
 Author-email: Nguyen Minh Son <nguyen.minhson1511@gmail.com>
 Project-URL: Homepage, https://github.com/johnnyb1509/sqlServerConnector

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/README.md RENAMED Viewed

File without changes

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/setup.cfg RENAMED Viewed

File without changes

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/__init__.py RENAMED Viewed

File without changes

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/requires.txt RENAMED Viewed

File without changes

{sqlserverconnector-0.1.3 → sqlserverconnector-0.1.5}/src/sqlServerConnector.egg-info/top_level.txt RENAMED Viewed

File without changes

sqlServerConnector 0.1.3__tar.gz → 0.1.5__tar.gz

sqlServerConnector 0.1.3tar.gz → 0.1.5tar.gz