PyPI - snowflake-ml-python - Versions diffs - 1.5.1__py3-none-any.whl → 1.5.2__py3-none-any.whl - Mend

snowflake-ml-python 1.5.1py3-none-any.whl → 1.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (174) hide show

snowflake/ml/modeling/ensemble/stacking_regressor.py CHANGED Viewed

@@ -621,7 +621,14 @@ class StackingRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/ensemble/voting_classifier.py CHANGED Viewed

@@ -603,7 +603,14 @@ class VotingClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/ensemble/voting_regressor.py CHANGED Viewed

@@ -585,7 +585,14 @@ class VotingRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/feature_selection/generic_univariate_select.py CHANGED Viewed

@@ -573,7 +573,14 @@ class GenericUnivariateSelect(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/feature_selection/select_fdr.py CHANGED Viewed

@@ -569,7 +569,14 @@ class SelectFdr(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/feature_selection/select_fpr.py CHANGED Viewed

@@ -569,7 +569,14 @@ class SelectFpr(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/feature_selection/select_fwe.py CHANGED Viewed

@@ -569,7 +569,14 @@ class SelectFwe(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/feature_selection/select_k_best.py CHANGED Viewed

@@ -570,7 +570,14 @@ class SelectKBest(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/feature_selection/select_percentile.py CHANGED Viewed

@@ -569,7 +569,14 @@ class SelectPercentile(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/feature_selection/sequential_feature_selector.py CHANGED Viewed

@@ -627,7 +627,14 @@ class SequentialFeatureSelector(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/feature_selection/variance_threshold.py CHANGED Viewed

@@ -560,7 +560,14 @@ class VarianceThreshold(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py CHANGED Viewed

@@ -653,7 +653,14 @@ class GaussianProcessClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py CHANGED Viewed

@@ -644,7 +644,14 @@ class GaussianProcessRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/impute/iterative_imputer.py CHANGED Viewed

@@ -688,7 +688,14 @@ class IterativeImputer(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/impute/knn_imputer.py CHANGED Viewed

@@ -614,7 +614,14 @@ class KNNImputer(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/impute/missing_indicator.py CHANGED Viewed

@@ -588,7 +588,14 @@ class MissingIndicator(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/impute/simple_imputer.py CHANGED Viewed

@@ -158,6 +158,7 @@ class SimpleImputer(base.BaseTransformer):
         self.fill_value = fill_value
         self.missing_values = missing_values
+        self.statistics_: Dict[str, Any] = {}
         # TODO(hayu): [SNOW-752265] Support SimpleImputer keep_empty_features.
         #  Add back when `keep_empty_features` is supported.
         # self.keep_empty_features = keep_empty_features
@@ -229,8 +230,27 @@ class SimpleImputer(base.BaseTransformer):
         return input_col_datatypes
+    def fit(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> "SimpleImputer":
+        if isinstance(dataset, snowpark.DataFrame):
+            return self._fit_snowpark(dataset)
+        else:
+            return self._fit_sklearn(dataset)
+    def _fit_sklearn(self, dataset: pd.DataFrame) -> "SimpleImputer":
+        dataset = self._use_input_cols_only(dataset)
+        sklearn_simple_imputer = self._create_sklearn_object()
+        sklearn_simple_imputer = sklearn_simple_imputer.fit(dataset)
+        self._sklearn_object = sklearn_simple_imputer
+        for input_col, fill_value in zip(self.input_cols, sklearn_simple_imputer.statistics_.tolist()):
+            self.statistics_[input_col] = fill_value
+        self._sklearn_fit_dtype = sklearn_simple_imputer._fit_dtype
+        self.n_features_in_ = len(self.input_cols)
+        self.feature_names_in_ = self.input_cols
+        self._is_fitted = True
+        return self
     @telemetry.send_api_usage_telemetry(project=base.PROJECT, subproject=_SUBPROJECT)
-    def fit(self, dataset: snowpark.DataFrame) -> "SimpleImputer":
+    def _fit_snowpark(self, dataset: snowpark.DataFrame) -> "SimpleImputer":
         """
         Compute values to impute for the dataset according to the strategy.
@@ -245,7 +265,6 @@ class SimpleImputer(base.BaseTransformer):
         # In order to fit, the input columns should have the same type.
         input_col_datatypes = self._get_dataset_input_col_datatypes(dataset)
-        self.statistics_: Dict[str, Any] = {}
         statement_params = telemetry.get_statement_params(base.PROJECT, _SUBPROJECT, self.__class__.__name__)
         if self.strategy == "constant":

snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py CHANGED Viewed

@@ -563,7 +563,14 @@ class AdditiveChi2Sampler(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/kernel_approximation/nystroem.py CHANGED Viewed

@@ -611,7 +611,14 @@ class Nystroem(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py CHANGED Viewed

@@ -587,7 +587,14 @@ class PolynomialCountSketch(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/kernel_approximation/rbf_sampler.py CHANGED Viewed

@@ -574,7 +574,14 @@ class RBFSampler(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py CHANGED Viewed

@@ -572,7 +572,14 @@ class SkewedChi2Sampler(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/kernel_ridge/kernel_ridge.py CHANGED Viewed

@@ -606,7 +606,14 @@ class KernelRidge(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/lightgbm/lgbm_classifier.py CHANGED Viewed

@@ -595,7 +595,14 @@ class LGBMClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/lightgbm/lgbm_regressor.py CHANGED Viewed

@@ -595,7 +595,14 @@ class LGBMRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/ard_regression.py CHANGED Viewed

@@ -620,7 +620,14 @@ class ARDRegression(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/bayesian_ridge.py CHANGED Viewed

@@ -631,7 +631,14 @@ class BayesianRidge(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/elastic_net.py CHANGED Viewed

@@ -630,7 +630,14 @@ class ElasticNet(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/elastic_net_cv.py CHANGED Viewed

@@ -666,7 +666,14 @@ class ElasticNetCV(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/gamma_regressor.py CHANGED Viewed

@@ -611,7 +611,14 @@ class GammaRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/huber_regressor.py CHANGED Viewed

@@ -594,7 +594,14 @@ class HuberRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lars.py CHANGED Viewed

@@ -623,7 +623,14 @@ class Lars(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lars_cv.py CHANGED Viewed

@@ -631,7 +631,14 @@ class LarsCV(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/lasso.py CHANGED Viewed

@@ -624,7 +624,14 @@ class Lasso(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.2__py3-none-any.whl

snowflake-ml-python 1.5.1py3-none-any.whl → 1.5.2py3-none-any.whl