PyPI - snowflake-ml-python - Versions diffs - 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl - Mend

snowflake-ml-python 1.5.1py3-none-any.whl → 1.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (207) hide show

snowflake/ml/modeling/preprocessing/ordinal_encoder.py CHANGED Viewed

@@ -67,11 +67,14 @@ class OrdinalEncoder(base.BaseTransformer):
             The value to be used to encode unknown categories.
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be encoded.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be encoded. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The prefix to be used for encoded output for each input column. The number of
+            output column prefixes specified must equal the number of input columns. Output column prefixes must be
+            specified before transform with this argument or after initialization with the `set_output_cols` method.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any
@@ -247,7 +250,7 @@ class OrdinalEncoder(base.BaseTransformer):
         # columns: COLUMN_NAME, CATEGORY, INDEX
         state_df = self._get_category_index_state_df(dataset)
         # save the dataframe on server side so that transform doesn't need to upload
-        state_df.write.save_as_table(  # type: ignore[call-overload]
+        state_df.write.save_as_table(
             self._vocab_table_name,
             mode="overwrite",
             table_type="temporary",
@@ -520,7 +523,7 @@ class OrdinalEncoder(base.BaseTransformer):
                 )
             batch_table_name = snowpark_utils.random_name_for_temp_object(snowpark_utils.TempObjectType.TABLE)
-            transformed_dataset.write.save_as_table(  # type: ignore[call-overload]
+            transformed_dataset.write.save_as_table(
                 batch_table_name,
                 mode="overwrite",
                 table_type="temporary",

snowflake/ml/modeling/preprocessing/polynomial_features.py CHANGED Viewed

@@ -251,7 +251,7 @@ class PolynomialFeatures(BaseTransformer):
                         inspect.currentframe(), PolynomialFeatures.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -586,7 +586,14 @@ class PolynomialFeatures(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/preprocessing/robust_scaler.py CHANGED Viewed

@@ -37,12 +37,15 @@ class RobustScaler(base.BaseTransformer):
             the dataset is scaled down. If less than 1, the dataset is scaled up.
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be scaled.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns. For dense output, the column names specified are
-            used as base names for the columns created for each category.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/preprocessing/standard_scaler.py CHANGED Viewed

@@ -26,11 +26,15 @@ class StandardScaler(base.BaseTransformer):
             If True, scale the data unit variance (i.e. unit standard deviation).
         input_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame containing a feature to be scaled.
+            The name(s) of one or more columns in the input DataFrame containing feature(s) to be scaled. Input
+            columns must be specified before fit with this argument or after initialization with the
+            `set_input_cols` method. This argument is optional for API consistency.
         output_cols: Optional[Union[str, List[str]]], default=None
-            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
-            columns specified must match the number of input columns.
+            The name(s) to assign output columns in the output DataFrame. The number of
+            columns specified must equal the number of input columns. Output columns must be specified before transform
+            with this argument or after initialization with the `set_output_cols` method. This argument is optional for
+            API consistency.
         passthrough_cols: Optional[Union[str, List[str]]], default=None
             A string or a list of strings indicating column names to be excluded from any

snowflake/ml/modeling/semi_supervised/label_propagation.py CHANGED Viewed

@@ -257,7 +257,7 @@ class LabelPropagation(BaseTransformer):
                         inspect.currentframe(), LabelPropagation.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -590,7 +590,14 @@ class LabelPropagation(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/semi_supervised/label_spreading.py CHANGED Viewed

@@ -266,7 +266,7 @@ class LabelSpreading(BaseTransformer):
                         inspect.currentframe(), LabelSpreading.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -599,7 +599,14 @@ class LabelSpreading(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/svm/linear_svc.py CHANGED Viewed

@@ -322,7 +322,7 @@ class LinearSVC(BaseTransformer):
                         inspect.currentframe(), LinearSVC.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -655,7 +655,14 @@ class LinearSVC(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/svm/linear_svr.py CHANGED Viewed

@@ -294,7 +294,7 @@ class LinearSVR(BaseTransformer):
                         inspect.currentframe(), LinearSVR.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -627,7 +627,14 @@ class LinearSVR(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/svm/nu_svc.py CHANGED Viewed

@@ -328,7 +328,7 @@ class NuSVC(BaseTransformer):
                         inspect.currentframe(), NuSVC.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -661,7 +661,14 @@ class NuSVC(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/svm/nu_svr.py CHANGED Viewed

@@ -289,7 +289,7 @@ class NuSVR(BaseTransformer):
                         inspect.currentframe(), NuSVR.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -622,7 +622,14 @@ class NuSVR(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/svm/svc.py CHANGED Viewed

@@ -331,7 +331,7 @@ class SVC(BaseTransformer):
                         inspect.currentframe(), SVC.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -664,7 +664,14 @@ class SVC(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/svm/svr.py CHANGED Viewed

@@ -292,7 +292,7 @@ class SVR(BaseTransformer):
                         inspect.currentframe(), SVR.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -625,7 +625,14 @@ class SVR(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/tree/decision_tree_classifier.py CHANGED Viewed

@@ -359,7 +359,7 @@ class DecisionTreeClassifier(BaseTransformer):
                         inspect.currentframe(), DecisionTreeClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -692,7 +692,14 @@ class DecisionTreeClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/tree/decision_tree_regressor.py CHANGED Viewed

@@ -341,7 +341,7 @@ class DecisionTreeRegressor(BaseTransformer):
                         inspect.currentframe(), DecisionTreeRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -674,7 +674,14 @@ class DecisionTreeRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/tree/extra_tree_classifier.py CHANGED Viewed

@@ -351,7 +351,7 @@ class ExtraTreeClassifier(BaseTransformer):
                         inspect.currentframe(), ExtraTreeClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -684,7 +684,14 @@ class ExtraTreeClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/tree/extra_tree_regressor.py CHANGED Viewed

@@ -333,7 +333,7 @@ class ExtraTreeRegressor(BaseTransformer):
                         inspect.currentframe(), ExtraTreeRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -666,7 +666,14 @@ class ExtraTreeRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/xgboost/xgb_classifier.py CHANGED Viewed

@@ -451,7 +451,7 @@ class XGBClassifier(BaseTransformer):
                         inspect.currentframe(), XGBClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -784,7 +784,14 @@ class XGBClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/xgboost/xgb_regressor.py CHANGED Viewed

@@ -450,7 +450,7 @@ class XGBRegressor(BaseTransformer):
                         inspect.currentframe(), XGBRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -783,7 +783,14 @@ class XGBRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/xgboost/xgbrf_classifier.py CHANGED Viewed

@@ -455,7 +455,7 @@ class XGBRFClassifier(BaseTransformer):
                         inspect.currentframe(), XGBRFClassifier.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -788,7 +788,14 @@ class XGBRFClassifier(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/modeling/xgboost/xgbrf_regressor.py CHANGED Viewed

@@ -455,7 +455,7 @@ class XGBRFRegressor(BaseTransformer):
                         inspect.currentframe(), XGBRFRegressor.__class__.__name__
                     ),
                     api_calls=[Session.call],
-                    custom_tags=dict([("autogen", True)]) if self._autogenerated else None,
+                    custom_tags={"autogen": True} if self._autogenerated else None,
                 )
                 pd_df: pd.DataFrame = dataset.to_pandas(statement_params=statement_params)
                 pd_df.columns = dataset.columns
@@ -788,7 +788,14 @@ class XGBRFRegressor(BaseTransformer):
     ) -> List[str]:
         # in case the inferred output column names dimension is different
         # we use one line of snowpark dataframe and put it into sklearn estimator using pandas
-        output_df_pd = getattr(self, method)(dataset.limit(1).to_pandas(), output_cols_prefix)
+        sample_pd_df = dataset.select(self.input_cols).limit(1).to_pandas()
+        # Rename the pandas df column names to snowflake identifiers and reorder columns to match the order
+        # seen during the fit.
+        snowpark_column_names = dataset.select(self.input_cols).columns
+        sample_pd_df.columns = snowpark_column_names
+        output_df_pd = getattr(self, method)(sample_pd_df, output_cols_prefix)
         output_df_columns = list(output_df_pd.columns)
         output_df_columns_set: Set[str] = set(output_df_columns) - set(dataset.columns)
         if self.sample_weight_col:

snowflake/ml/registry/_manager/model_manager.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from types import ModuleType
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 import pandas as pd
 from absl.logging import logging
@@ -31,6 +31,64 @@ class ModelManager:
         self._hrid_generator = hrid_generator.HRID16()
     def log_model(
+        self,
+        *,
+        model: Union[model_types.SupportedModelType, model_version_impl.ModelVersion],
+        model_name: str,
+        version_name: Optional[str] = None,
+        comment: Optional[str] = None,
+        metrics: Optional[Dict[str, Any]] = None,
+        conda_dependencies: Optional[List[str]] = None,
+        pip_requirements: Optional[List[str]] = None,
+        python_version: Optional[str] = None,
+        signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
+        sample_input_data: Optional[model_types.SupportedDataType] = None,
+        code_paths: Optional[List[str]] = None,
+        ext_modules: Optional[List[ModuleType]] = None,
+        options: Optional[model_types.ModelSaveOption] = None,
+        statement_params: Optional[Dict[str, Any]] = None,
+    ) -> model_version_impl.ModelVersion:
+        if not version_name:
+            version_name = self._hrid_generator.generate()[1]
+        if isinstance(model, model_version_impl.ModelVersion):
+            (
+                source_database_name_id,
+                source_schema_name_id,
+                source_model_name_id,
+            ) = sql_identifier.parse_fully_qualified_name(model.fully_qualified_model_name)
+            self._model_ops.create_from_model_version(
+                source_database_name=source_database_name_id,
+                source_schema_name=source_schema_name_id,
+                source_model_name=source_model_name_id,
+                source_version_name=sql_identifier.SqlIdentifier(model.version_name),
+                database_name=None,
+                schema_name=None,
+                model_name=sql_identifier.SqlIdentifier(model_name),
+                version_name=sql_identifier.SqlIdentifier(version_name),
+                statement_params=statement_params,
+            )
+            return self.get_model(model_name=model_name, statement_params=statement_params).version(version_name)
+        return self._log_model(
+            model=model,
+            model_name=model_name,
+            version_name=version_name,
+            comment=comment,
+            metrics=metrics,
+            conda_dependencies=conda_dependencies,
+            pip_requirements=pip_requirements,
+            python_version=python_version,
+            signatures=signatures,
+            sample_input_data=sample_input_data,
+            code_paths=code_paths,
+            ext_modules=ext_modules,
+            options=options,
+            statement_params=statement_params,
+        )
+    def _log_model(
         self,
         model: model_types.SupportedModelType,
         *,

snowflake/ml/registry/registry.py CHANGED Viewed

@@ -71,6 +71,16 @@ class Registry:
     @telemetry.send_api_usage_telemetry(
         project=_TELEMETRY_PROJECT,
         subproject=_MODEL_TELEMETRY_SUBPROJECT,
+        func_params_to_log=[
+            "model_name",
+            "version_name",
+            "comment",
+            "metrics",
+            "conda_dependencies",
+            "pip_requirements",
+            "python_version",
+            "signatures",
+        ],
     )
     def log_model(
         self,
@@ -142,7 +152,6 @@ class Registry:
         Returns:
             ModelVersion: ModelVersion object corresponding to the model just logged.
         """
         statement_params = telemetry.get_statement_params(
             project=_TELEMETRY_PROJECT,
             subproject=_MODEL_TELEMETRY_SUBPROJECT,

snowflake/ml/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION="1.5.1"
1	+ VERSION="1.5.3"

snowflake-ml-python 1.5.1__py3-none-any.whl → 1.5.3__py3-none-any.whl

snowflake-ml-python 1.5.1py3-none-any.whl → 1.5.3py3-none-any.whl