PyPI - snowflake-ml-python - Versions diffs - 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl - Mend

snowflake-ml-python 1.5.0py3-none-any.whl → 1.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (197) hide show

snowflake/ml/modeling/linear_model/theil_sen_regressor.py CHANGED Viewed

@@ -618,7 +618,14 @@ class TheilSenRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/linear_model/tweedie_regressor.py CHANGED Viewed

@@ -644,7 +644,14 @@ class TweedieRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/manifold/isomap.py CHANGED Viewed

@@ -642,7 +642,14 @@ class Isomap(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/manifold/mds.py CHANGED Viewed

@@ -623,7 +623,14 @@ class MDS(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/manifold/spectral_embedding.py CHANGED Viewed

@@ -625,7 +625,14 @@ class SpectralEmbedding(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/manifold/tsne.py CHANGED Viewed

@@ -684,7 +684,14 @@ class TSNE(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py CHANGED Viewed

@@ -689,7 +689,14 @@ class BayesianGaussianMixture(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/mixture/gaussian_mixture.py CHANGED Viewed

@@ -662,7 +662,14 @@ class GaussianMixture(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/multiclass/one_vs_one_classifier.py CHANGED Viewed

@@ -572,7 +572,14 @@ class OneVsOneClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py CHANGED Viewed

@@ -581,7 +581,14 @@ class OneVsRestClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/multiclass/output_code_classifier.py CHANGED Viewed

@@ -584,7 +584,14 @@ class OutputCodeClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/naive_bayes/bernoulli_nb.py CHANGED Viewed

@@ -584,7 +584,14 @@ class BernoulliNB(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/naive_bayes/categorical_nb.py CHANGED Viewed

@@ -590,7 +590,14 @@ class CategoricalNB(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/naive_bayes/complement_nb.py CHANGED Viewed

@@ -584,7 +584,14 @@ class ComplementNB(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/naive_bayes/gaussian_nb.py CHANGED Viewed

@@ -565,7 +565,14 @@ class GaussianNB(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/naive_bayes/multinomial_nb.py CHANGED Viewed

@@ -578,7 +578,14 @@ class MultinomialNB(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neighbors/k_neighbors_classifier.py CHANGED Viewed

@@ -635,7 +635,14 @@ class KNeighborsClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neighbors/k_neighbors_regressor.py CHANGED Viewed

@@ -637,7 +637,14 @@ class KNeighborsRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neighbors/kernel_density.py CHANGED Viewed

@@ -612,7 +612,14 @@ class KernelDensity(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neighbors/local_outlier_factor.py CHANGED Viewed

@@ -644,7 +644,14 @@ class LocalOutlierFactor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neighbors/nearest_centroid.py CHANGED Viewed

@@ -575,7 +575,14 @@ class NearestCentroid(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neighbors/nearest_neighbors.py CHANGED Viewed

@@ -623,7 +623,14 @@ class NearestNeighbors(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py CHANGED Viewed

@@ -648,7 +648,14 @@ class NeighborhoodComponentsAnalysis(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py CHANGED Viewed

@@ -647,7 +647,14 @@ class RadiusNeighborsClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py CHANGED Viewed

@@ -637,7 +637,14 @@ class RadiusNeighborsRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neural_network/bernoulli_rbm.py CHANGED Viewed

@@ -596,7 +596,14 @@ class BernoulliRBM(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neural_network/mlp_classifier.py CHANGED Viewed

@@ -749,7 +749,14 @@ class MLPClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/neural_network/mlp_regressor.py CHANGED Viewed

@@ -745,7 +745,14 @@ class MLPRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/parameters/enable_anonymous_sproc.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Enables the anonymous stored procedures for running modeling fit"""
+from snowflake.ml.modeling._internal.snowpark_implementations import snowpark_trainer
+snowpark_trainer._ENABLE_ANONYMOUS_SPROC = True

snowflake/ml/modeling/pipeline/pipeline.py CHANGED Viewed

@@ -115,7 +115,7 @@ class Pipeline(base.BaseTransformer):
         self._feature_names_in: List[np.ndarray[Any, np.dtype[Any]]] = []
         self._n_features_in: List[int] = []
         self._transformers_to_input_indices: Dict[str, List[int]] = {}
-        self._is_convertible_to_sklearn = True
+        self._modifies_label_or_sample_weight = True
         self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
@@ -126,6 +126,9 @@ class Pipeline(base.BaseTransformer):
         self._deps = list(deps)
         self._sklearn_object = None
         self.label_cols = self._get_label_cols()
+        self._is_convertible_to_sklearn = self._is_convertible_to_sklearn_object()
+        self._send_pipeline_configuration_telemetry()
     @staticmethod
     def _is_estimator(obj: object) -> bool:
@@ -228,7 +231,7 @@ class Pipeline(base.BaseTransformer):
         return [c for c in columns if c not in target_cols]
     def _append_step_feature_consumption_info(self, step_name: str, all_cols: List[str], input_cols: List[str]) -> None:
-        if self._is_convertible_to_sklearn:
+        if self._modifies_label_or_sample_weight:
             all_cols = self._get_sanitized_list_of_columns(all_cols)
             self._feature_names_in.append(np.asarray(all_cols, dtype=object))
             self._n_features_in.append(len(all_cols))
@@ -248,7 +251,7 @@ class Pipeline(base.BaseTransformer):
         self, dataset: Union[snowpark.DataFrame, pd.DataFrame]
     ) -> Union[snowpark.DataFrame, pd.DataFrame]:
         self._reset()
-        self._is_convertible_to_sklearn = not self._is_pipeline_modifying_label_or_sample_weight()
+        self._modifies_label_or_sample_weight = not self._is_pipeline_modifying_label_or_sample_weight()
         transformed_dataset = dataset
         for name, trans in self._get_transformers():
             self._append_step_feature_consumption_info(
@@ -425,7 +428,7 @@ class Pipeline(base.BaseTransformer):
         )
         if self._can_be_trained_in_ml_runtime(dataset):
-            if not self._is_convertible_to_sklearn_object():
+            if not self._is_convertible_to_sklearn:
                 raise ValueError("This pipeline cannot be converted to an sklearn pipeline.")
             self._fit_ml_runtime(dataset)
@@ -947,7 +950,7 @@ class Pipeline(base.BaseTransformer):
         if not os.environ.get(IN_ML_RUNTIME_ENV_VAR):
             return False
-        return self._is_convertible_to_sklearn_object()
+        return self._is_convertible_to_sklearn
     @staticmethod
     def _wrap_transformer_in_column_transformer(
@@ -1003,7 +1006,7 @@ class Pipeline(base.BaseTransformer):
         if not self._is_fitted:
             return self._create_unfitted_sklearn_object()
-        if not self._is_convertible_to_sklearn:
+        if not self._modifies_label_or_sample_weight:
             raise exceptions.SnowflakeMLException(
                 error_code=error_codes.METHOD_NOT_ALLOWED,
                 original_exception=ValueError(
@@ -1109,7 +1112,24 @@ class Pipeline(base.BaseTransformer):
             else:
                 return self._create_sklearn_object()
         else:
-            if self._is_convertible_to_sklearn_object():
+            if self._is_convertible_to_sklearn:
                 return self._create_unfitted_sklearn_object()
             else:
                 raise ValueError("This pipeline can not be converted to an sklearn pipeline.")
+    def _send_pipeline_configuration_telemetry(self) -> None:
+        """Track information about the pipeline setup. Currently, we want to track:
+        - Whether the pipeline is converible to an sklearn pipeline
+        - Whether the pipeline is being used in the SPCS ml runtime.
+        """
+        telemetry_data = {
+            "pipeline_is_convertible_to_sklearn": self._is_convertible_to_sklearn,
+            "in_spcs_ml_runtime": bool(os.environ.get(IN_ML_RUNTIME_ENV_VAR)),
+        }
+        telemetry.send_custom_usage(
+            project=_PROJECT,
+            subproject=_SUBPROJECT,
+            telemetry_type=telemetry.TelemetryField.TYPE_SNOWML_PIPELINE_USAGE.value,
+            data=telemetry_data,
+        )

snowflake/ml/modeling/preprocessing/polynomial_features.py CHANGED Viewed

@@ -586,7 +586,14 @@ class PolynomialFeatures(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/semi_supervised/label_propagation.py CHANGED Viewed

@@ -590,7 +590,14 @@ class LabelPropagation(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/semi_supervised/label_spreading.py CHANGED Viewed

@@ -599,7 +599,14 @@ class LabelSpreading(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake-ml-python 1.5.0__py3-none-any.whl → 1.5.2__py3-none-any.whl

snowflake-ml-python 1.5.0py3-none-any.whl → 1.5.2py3-none-any.whl