PyPI - snowflake-ml-python - Versions diffs - 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl - Mend

snowflake-ml-python 1.5.1py3-none-any.whl → 1.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (207) hide show

snowflake/ml/modeling/lightgbm/lgbm_classifier.py CHANGED Viewed

@@ -262,7 +262,7 @@ class LGBMClassifier(BaseTransformer):
                         inspect.currentframe(), LGBMClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -595,7 +595,14 @@ class LGBMClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/lightgbm/lgbm_regressor.py CHANGED Viewed

@@ -262,7 +262,7 @@ class LGBMRegressor(BaseTransformer):
                         inspect.currentframe(), LGBMRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -595,7 +595,14 @@ class LGBMRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/ard_regression.py CHANGED Viewed

@@ -287,7 +287,7 @@ class ARDRegression(BaseTransformer):
                         inspect.currentframe(), ARDRegression.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -620,7 +620,14 @@ class ARDRegression(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/bayesian_ridge.py CHANGED Viewed

@@ -298,7 +298,7 @@ class BayesianRidge(BaseTransformer):
                         inspect.currentframe(), BayesianRidge.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -631,7 +631,14 @@ class BayesianRidge(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/elastic_net.py CHANGED Viewed

@@ -297,7 +297,7 @@ class ElasticNet(BaseTransformer):
                         inspect.currentframe(), ElasticNet.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -630,7 +630,14 @@ class ElasticNet(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/elastic_net_cv.py CHANGED Viewed

@@ -333,7 +333,7 @@ class ElasticNetCV(BaseTransformer):
                         inspect.currentframe(), ElasticNetCV.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -666,7 +666,14 @@ class ElasticNetCV(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/gamma_regressor.py CHANGED Viewed

@@ -278,7 +278,7 @@ class GammaRegressor(BaseTransformer):
                         inspect.currentframe(), GammaRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -611,7 +611,14 @@ class GammaRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/huber_regressor.py CHANGED Viewed

@@ -261,7 +261,7 @@ class HuberRegressor(BaseTransformer):
                         inspect.currentframe(), HuberRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -594,7 +594,14 @@ class HuberRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lars.py CHANGED Viewed

@@ -290,7 +290,7 @@ class Lars(BaseTransformer):
                         inspect.currentframe(), Lars.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -623,7 +623,14 @@ class Lars(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lars_cv.py CHANGED Viewed

@@ -298,7 +298,7 @@ class LarsCV(BaseTransformer):
                         inspect.currentframe(), LarsCV.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -631,7 +631,14 @@ class LarsCV(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lasso.py CHANGED Viewed

@@ -291,7 +291,7 @@ class Lasso(BaseTransformer):
                         inspect.currentframe(), Lasso.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -624,7 +624,14 @@ class Lasso(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lasso_cv.py CHANGED Viewed

@@ -319,7 +319,7 @@ class LassoCV(BaseTransformer):
                         inspect.currentframe(), LassoCV.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -652,7 +652,14 @@ class LassoCV(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lasso_lars.py CHANGED Viewed

@@ -311,7 +311,7 @@ class LassoLars(BaseTransformer):
                         inspect.currentframe(), LassoLars.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -644,7 +644,14 @@ class LassoLars(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lasso_lars_cv.py CHANGED Viewed

@@ -312,7 +312,7 @@ class LassoLarsCV(BaseTransformer):
                         inspect.currentframe(), LassoLarsCV.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -645,7 +645,14 @@ class LassoLarsCV(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lasso_lars_ic.py CHANGED Viewed

@@ -295,7 +295,7 @@ class LassoLarsIC(BaseTransformer):
                         inspect.currentframe(), LassoLarsIC.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -628,7 +628,14 @@ class LassoLarsIC(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/linear_regression.py CHANGED Viewed

@@ -248,7 +248,7 @@ class LinearRegression(BaseTransformer):
                         inspect.currentframe(), LinearRegression.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -581,7 +581,14 @@ class LinearRegression(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/logistic_regression.py CHANGED Viewed

@@ -362,7 +362,7 @@ class LogisticRegression(BaseTransformer):
                         inspect.currentframe(), LogisticRegression.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -695,7 +695,14 @@ class LogisticRegression(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/logistic_regression_cv.py CHANGED Viewed

@@ -383,7 +383,7 @@ class LogisticRegressionCV(BaseTransformer):
                         inspect.currentframe(), LogisticRegressionCV.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -716,7 +716,14 @@ class LogisticRegressionCV(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/multi_task_elastic_net.py CHANGED Viewed

@@ -281,7 +281,7 @@ class MultiTaskElasticNet(BaseTransformer):
                         inspect.currentframe(), MultiTaskElasticNet.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -614,7 +614,14 @@ class MultiTaskElasticNet(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py CHANGED Viewed

@@ -322,7 +322,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
                         inspect.currentframe(), MultiTaskElasticNetCV.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -655,7 +655,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/multi_task_lasso.py CHANGED Viewed

@@ -273,7 +273,7 @@ class MultiTaskLasso(BaseTransformer):
                         inspect.currentframe(), MultiTaskLasso.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -606,7 +606,14 @@ class MultiTaskLasso(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py CHANGED Viewed

@@ -308,7 +308,7 @@ class MultiTaskLassoCV(BaseTransformer):
                         inspect.currentframe(), MultiTaskLassoCV.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -641,7 +641,14 @@ class MultiTaskLassoCV(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py CHANGED Viewed

@@ -256,7 +256,7 @@ class OrthogonalMatchingPursuit(BaseTransformer):
                         inspect.currentframe(), OrthogonalMatchingPursuit.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -589,7 +589,14 @@ class OrthogonalMatchingPursuit(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py CHANGED Viewed

@@ -330,7 +330,7 @@ class PassiveAggressiveClassifier(BaseTransformer):
                         inspect.currentframe(), PassiveAggressiveClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -663,7 +663,14 @@ class PassiveAggressiveClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl

snowflake-ml-python 1.5.1py3-none-any.whl → 1.5.3py3-none-any.whl