PyPI - snowflake-ml-python - Versions diffs - 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl - Mend

snowflake-ml-python 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (225) hide show

snowflake/cortex/_complete.py +1 -1
snowflake/cortex/_extract_answer.py +1 -1
snowflake/cortex/_sentiment.py +1 -1
snowflake/cortex/_summarize.py +1 -1
snowflake/cortex/_translate.py +1 -1
snowflake/ml/_internal/env_utils.py +68 -6
snowflake/ml/_internal/file_utils.py +34 -4
snowflake/ml/_internal/telemetry.py +79 -91
snowflake/ml/_internal/utils/identifier.py +78 -72
snowflake/ml/_internal/utils/retryable_http.py +16 -4
snowflake/ml/_internal/utils/spcs_attribution_utils.py +122 -0
snowflake/ml/dataset/dataset.py +1 -1
snowflake/ml/model/_api.py +21 -14
snowflake/ml/model/_client/model/model_impl.py +176 -0
snowflake/ml/model/_client/model/model_method_info.py +19 -0
snowflake/ml/model/_client/model/model_version_impl.py +291 -0
snowflake/ml/model/_client/ops/metadata_ops.py +107 -0
snowflake/ml/model/_client/ops/model_ops.py +308 -0
snowflake/ml/model/_client/sql/model.py +75 -0
snowflake/ml/model/_client/sql/model_version.py +213 -0
snowflake/ml/model/_client/sql/stage.py +40 -0
snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -4
snowflake/ml/model/_deploy_client/image_builds/templates/image_build_job_spec_template +24 -8
snowflake/ml/model/_deploy_client/image_builds/templates/kaniko_shell_script_template +23 -0
snowflake/ml/model/_deploy_client/snowservice/deploy.py +14 -2
snowflake/ml/model/_deploy_client/utils/constants.py +1 -0
snowflake/ml/model/_deploy_client/warehouse/deploy.py +2 -2
snowflake/ml/model/_model_composer/model_composer.py +31 -9
snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +25 -10
snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -2
snowflake/ml/model/_model_composer/model_method/infer_function.py_template +2 -1
snowflake/ml/model/_model_composer/model_method/model_method.py +34 -3
snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +1 -1
snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +3 -1
snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +10 -28
snowflake/ml/model/_packager/model_meta/model_meta.py +18 -16
snowflake/ml/model/_signatures/snowpark_handler.py +1 -1
snowflake/ml/model/model_signature.py +108 -53
snowflake/ml/model/type_hints.py +1 -0
snowflake/ml/modeling/_internal/distributed_hpo_trainer.py +554 -0
snowflake/ml/modeling/_internal/estimator_protocols.py +1 -60
snowflake/ml/modeling/_internal/model_specifications.py +146 -0
snowflake/ml/modeling/_internal/model_trainer.py +13 -0
snowflake/ml/modeling/_internal/model_trainer_builder.py +78 -0
snowflake/ml/modeling/_internal/pandas_trainer.py +54 -0
snowflake/ml/modeling/_internal/snowpark_handlers.py +6 -760
snowflake/ml/modeling/_internal/snowpark_trainer.py +331 -0
snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +108 -135
snowflake/ml/modeling/cluster/affinity_propagation.py +106 -135
snowflake/ml/modeling/cluster/agglomerative_clustering.py +106 -135
snowflake/ml/modeling/cluster/birch.py +106 -135
snowflake/ml/modeling/cluster/bisecting_k_means.py +106 -135
snowflake/ml/modeling/cluster/dbscan.py +106 -135
snowflake/ml/modeling/cluster/feature_agglomeration.py +106 -135
snowflake/ml/modeling/cluster/k_means.py +105 -135
snowflake/ml/modeling/cluster/mean_shift.py +106 -135
snowflake/ml/modeling/cluster/mini_batch_k_means.py +105 -135
snowflake/ml/modeling/cluster/optics.py +106 -135
snowflake/ml/modeling/cluster/spectral_biclustering.py +106 -135
snowflake/ml/modeling/cluster/spectral_clustering.py +106 -135
snowflake/ml/modeling/cluster/spectral_coclustering.py +106 -135
snowflake/ml/modeling/compose/column_transformer.py +106 -135
snowflake/ml/modeling/compose/transformed_target_regressor.py +108 -135
snowflake/ml/modeling/covariance/elliptic_envelope.py +106 -135
snowflake/ml/modeling/covariance/empirical_covariance.py +99 -128
snowflake/ml/modeling/covariance/graphical_lasso.py +106 -135
snowflake/ml/modeling/covariance/graphical_lasso_cv.py +106 -135
snowflake/ml/modeling/covariance/ledoit_wolf.py +104 -133
snowflake/ml/modeling/covariance/min_cov_det.py +106 -135
snowflake/ml/modeling/covariance/oas.py +99 -128
snowflake/ml/modeling/covariance/shrunk_covariance.py +103 -132
snowflake/ml/modeling/decomposition/dictionary_learning.py +106 -135
snowflake/ml/modeling/decomposition/factor_analysis.py +106 -135
snowflake/ml/modeling/decomposition/fast_ica.py +106 -135
snowflake/ml/modeling/decomposition/incremental_pca.py +106 -135
snowflake/ml/modeling/decomposition/kernel_pca.py +106 -135
snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +106 -135
snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +106 -135
snowflake/ml/modeling/decomposition/pca.py +106 -135
snowflake/ml/modeling/decomposition/sparse_pca.py +106 -135
snowflake/ml/modeling/decomposition/truncated_svd.py +106 -135
snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +108 -135
snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +108 -135
snowflake/ml/modeling/ensemble/ada_boost_classifier.py +108 -135
snowflake/ml/modeling/ensemble/ada_boost_regressor.py +108 -135
snowflake/ml/modeling/ensemble/bagging_classifier.py +108 -135
snowflake/ml/modeling/ensemble/bagging_regressor.py +108 -135
snowflake/ml/modeling/ensemble/extra_trees_classifier.py +108 -135
snowflake/ml/modeling/ensemble/extra_trees_regressor.py +108 -135
snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +108 -135
snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +108 -135
snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +108 -135
snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +108 -135
snowflake/ml/modeling/ensemble/isolation_forest.py +106 -135
snowflake/ml/modeling/ensemble/random_forest_classifier.py +108 -135
snowflake/ml/modeling/ensemble/random_forest_regressor.py +108 -135
snowflake/ml/modeling/ensemble/stacking_regressor.py +108 -135
snowflake/ml/modeling/ensemble/voting_classifier.py +108 -135
snowflake/ml/modeling/ensemble/voting_regressor.py +108 -135
snowflake/ml/modeling/feature_selection/generic_univariate_select.py +101 -128
snowflake/ml/modeling/feature_selection/select_fdr.py +99 -126
snowflake/ml/modeling/feature_selection/select_fpr.py +99 -126
snowflake/ml/modeling/feature_selection/select_fwe.py +99 -126
snowflake/ml/modeling/feature_selection/select_k_best.py +100 -127
snowflake/ml/modeling/feature_selection/select_percentile.py +99 -126
snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +106 -135
snowflake/ml/modeling/feature_selection/variance_threshold.py +95 -124
snowflake/ml/modeling/framework/base.py +83 -1
snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +108 -135
snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +108 -135
snowflake/ml/modeling/impute/iterative_imputer.py +106 -135
snowflake/ml/modeling/impute/knn_imputer.py +106 -135
snowflake/ml/modeling/impute/missing_indicator.py +106 -135
snowflake/ml/modeling/impute/simple_imputer.py +9 -1
snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +96 -125
snowflake/ml/modeling/kernel_approximation/nystroem.py +106 -135
snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +106 -135
snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +105 -134
snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +103 -132
snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +108 -135
snowflake/ml/modeling/lightgbm/lgbm_classifier.py +90 -118
snowflake/ml/modeling/lightgbm/lgbm_regressor.py +90 -118
snowflake/ml/modeling/linear_model/ard_regression.py +108 -135
snowflake/ml/modeling/linear_model/bayesian_ridge.py +108 -135
snowflake/ml/modeling/linear_model/elastic_net.py +108 -135
snowflake/ml/modeling/linear_model/elastic_net_cv.py +108 -135
snowflake/ml/modeling/linear_model/gamma_regressor.py +108 -135
snowflake/ml/modeling/linear_model/huber_regressor.py +108 -135
snowflake/ml/modeling/linear_model/lars.py +108 -135
snowflake/ml/modeling/linear_model/lars_cv.py +108 -135
snowflake/ml/modeling/linear_model/lasso.py +108 -135
snowflake/ml/modeling/linear_model/lasso_cv.py +108 -135
snowflake/ml/modeling/linear_model/lasso_lars.py +108 -135
snowflake/ml/modeling/linear_model/lasso_lars_cv.py +108 -135
snowflake/ml/modeling/linear_model/lasso_lars_ic.py +108 -135
snowflake/ml/modeling/linear_model/linear_regression.py +108 -135
snowflake/ml/modeling/linear_model/logistic_regression.py +108 -135
snowflake/ml/modeling/linear_model/logistic_regression_cv.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_lasso.py +108 -135
snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +108 -135
snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +108 -135
snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +108 -135
snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +107 -135
snowflake/ml/modeling/linear_model/perceptron.py +107 -135
snowflake/ml/modeling/linear_model/poisson_regressor.py +108 -135
snowflake/ml/modeling/linear_model/ransac_regressor.py +108 -135
snowflake/ml/modeling/linear_model/ridge.py +108 -135
snowflake/ml/modeling/linear_model/ridge_classifier.py +108 -135
snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +108 -135
snowflake/ml/modeling/linear_model/ridge_cv.py +108 -135
snowflake/ml/modeling/linear_model/sgd_classifier.py +108 -135
snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +106 -135
snowflake/ml/modeling/linear_model/sgd_regressor.py +108 -135
snowflake/ml/modeling/linear_model/theil_sen_regressor.py +108 -135
snowflake/ml/modeling/linear_model/tweedie_regressor.py +108 -135
snowflake/ml/modeling/manifold/isomap.py +106 -135
snowflake/ml/modeling/manifold/mds.py +106 -135
snowflake/ml/modeling/manifold/spectral_embedding.py +106 -135
snowflake/ml/modeling/manifold/tsne.py +106 -135
snowflake/ml/modeling/metrics/classification.py +196 -55
snowflake/ml/modeling/metrics/correlation.py +4 -2
snowflake/ml/modeling/metrics/covariance.py +7 -4
snowflake/ml/modeling/metrics/ranking.py +32 -16
snowflake/ml/modeling/metrics/regression.py +60 -32
snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +106 -135
snowflake/ml/modeling/mixture/gaussian_mixture.py +106 -135
snowflake/ml/modeling/model_selection/grid_search_cv.py +91 -148
snowflake/ml/modeling/model_selection/randomized_search_cv.py +93 -154
snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +105 -132
snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +108 -135
snowflake/ml/modeling/multiclass/output_code_classifier.py +108 -135
snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +108 -135
snowflake/ml/modeling/naive_bayes/categorical_nb.py +108 -135
snowflake/ml/modeling/naive_bayes/complement_nb.py +108 -135
snowflake/ml/modeling/naive_bayes/gaussian_nb.py +98 -125
snowflake/ml/modeling/naive_bayes/multinomial_nb.py +107 -134
snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +108 -135
snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +108 -135
snowflake/ml/modeling/neighbors/kernel_density.py +106 -135
snowflake/ml/modeling/neighbors/local_outlier_factor.py +106 -135
snowflake/ml/modeling/neighbors/nearest_centroid.py +108 -135
snowflake/ml/modeling/neighbors/nearest_neighbors.py +106 -135
snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +108 -135
snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +108 -135
snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +108 -135
snowflake/ml/modeling/neural_network/bernoulli_rbm.py +106 -135
snowflake/ml/modeling/neural_network/mlp_classifier.py +108 -135
snowflake/ml/modeling/neural_network/mlp_regressor.py +108 -135
snowflake/ml/modeling/parameters/disable_distributed_hpo.py +2 -6
snowflake/ml/modeling/preprocessing/binarizer.py +25 -8
snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +9 -4
snowflake/ml/modeling/preprocessing/label_encoder.py +31 -11
snowflake/ml/modeling/preprocessing/max_abs_scaler.py +27 -9
snowflake/ml/modeling/preprocessing/min_max_scaler.py +42 -14
snowflake/ml/modeling/preprocessing/normalizer.py +9 -4
snowflake/ml/modeling/preprocessing/one_hot_encoder.py +26 -10
snowflake/ml/modeling/preprocessing/ordinal_encoder.py +37 -13
snowflake/ml/modeling/preprocessing/polynomial_features.py +106 -135
snowflake/ml/modeling/preprocessing/robust_scaler.py +39 -13
snowflake/ml/modeling/preprocessing/standard_scaler.py +36 -12
snowflake/ml/modeling/semi_supervised/label_propagation.py +108 -135
snowflake/ml/modeling/semi_supervised/label_spreading.py +108 -135
snowflake/ml/modeling/svm/linear_svc.py +108 -135
snowflake/ml/modeling/svm/linear_svr.py +108 -135
snowflake/ml/modeling/svm/nu_svc.py +108 -135
snowflake/ml/modeling/svm/nu_svr.py +108 -135
snowflake/ml/modeling/svm/svc.py +108 -135
snowflake/ml/modeling/svm/svr.py +108 -135
snowflake/ml/modeling/tree/decision_tree_classifier.py +108 -135
snowflake/ml/modeling/tree/decision_tree_regressor.py +108 -135
snowflake/ml/modeling/tree/extra_tree_classifier.py +108 -135
snowflake/ml/modeling/tree/extra_tree_regressor.py +108 -135
snowflake/ml/modeling/xgboost/xgb_classifier.py +108 -136
snowflake/ml/modeling/xgboost/xgb_regressor.py +108 -136
snowflake/ml/modeling/xgboost/xgbrf_classifier.py +108 -136
snowflake/ml/modeling/xgboost/xgbrf_regressor.py +108 -136
snowflake/ml/registry/model_registry.py +2 -0
snowflake/ml/registry/registry.py +215 -0
snowflake/ml/version.py +1 -1
{snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/METADATA +34 -1
snowflake_ml_python-1.1.2.dist-info/RECORD +347 -0
snowflake_ml_python-1.1.0.dist-info/RECORD +0 -331
{snowflake_ml_python-1.1.0.dist-info → snowflake_ml_python-1.1.2.dist-info}/WHEEL +0 -0

snowflake/ml/modeling/preprocessing/max_abs_scaler.py CHANGED Viewed

@@ -27,14 +27,29 @@ class MaxAbsScaler(base.BaseTransformer):
     (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MaxAbsScaler.html).
     Args:
-        input_cols: The name(s) of one or more columns in a DataFrame containing a feature to be scaled.
-        output_cols: The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
+        input_cols: Optional[Union[str, List[str]]], default=None
+            The name(s) of one or more columns in a DataFrame containing a feature to be scaled.
+        output_cols: Optional[Union[str, List[str]]], default=None
+            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
             columns specified must match the number of input columns.
-        drop_input_cols: Remove input columns from output if set True. False by default.
+        passthrough_cols: Optional[Union[str, List[str]]], default=None
+            A string or a list of strings indicating column names to be excluded from any
+            operations (such as train, transform, or inference). These specified column(s)
+            will remain untouched throughout the process. This option is helpful in scenarios
+            requiring automatic input_cols inference, but need to avoid using specific
+            columns, like index columns, during training or inference.
+        drop_input_cols: Optional[bool], default=False
+            Remove input columns from output if set True. False by default.
     Attributes:
-        scale_: dict {column_name: value} or None. Per-feature relative scaling factor.
-        max_abs_: dict {column_name: value} or None. Per-feature maximum absolute value.
+        scale_: Dict[str, float]
+            dict {column_name: value} or None. Per-feature relative scaling factor.
+        max_abs_: Dict[str, float]
+            dict {column_name: value} or None. Per-feature maximum absolute value.
     """
     def __init__(
@@ -42,6 +57,7 @@ class MaxAbsScaler(base.BaseTransformer):
         *,
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
+        passthrough_cols: Optional[Union[str, Iterable[str]]] = None,
         drop_input_cols: Optional[bool] = False,
     ) -> None:
         """
@@ -55,6 +71,11 @@ class MaxAbsScaler(base.BaseTransformer):
         Args:
             input_cols: Single or multiple input columns.
             output_cols: Single or multiple output columns.
+            passthrough_cols: A string or a list of strings indicating column names to be excluded from any
+                operations (such as train, transform, or inference). These specified column(s)
+                will remain untouched throughout the process. This option is helful in scenarios
+                requiring automatic input_cols inference, but need to avoid using specific
+                columns, like index columns, during in training or inference.
             drop_input_cols: Remove input columns from output if set True. False by default.
         Attributes:
@@ -74,6 +95,7 @@ class MaxAbsScaler(base.BaseTransformer):
         self.set_input_cols(input_cols)
         self.set_output_cols(output_cols)
+        self.set_passthrough_cols(passthrough_cols)
     def _reset(self) -> None:
         """
@@ -138,10 +160,6 @@ class MaxAbsScaler(base.BaseTransformer):
         project=base.PROJECT,
         subproject=base.SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=base.PROJECT,
-        subproject=base.SUBPROJECT,
-    )
     def transform(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> Union[snowpark.DataFrame, pd.DataFrame]:
         """
         Scale the data.

snowflake/ml/modeling/preprocessing/min_max_scaler.py CHANGED Viewed

@@ -21,20 +21,45 @@ class MinMaxScaler(base.BaseTransformer):
     (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.MinMaxScaler.html).
     Args:
-        feature_range: Desired range of transformed data (default is 0 to 1).
-        clip: Whether to clip transformed values of held-out data to the specified feature range (default is True).
-        input_cols: The name(s) of one or more columns in a DataFrame containing a feature to be scaled. Each specified
+        feature_range: Tuple[float, float], default=(0, 1)
+            Desired range of transformed data (default is 0 to 1).
+        clip: bool, default=False
+            Whether to clip transformed values of held-out data to the specified feature range (default is True).
+        input_cols: Optional[Union[str, List[str]]], default=None
+            The name(s) of one or more columns in a DataFrame containing a feature to be scaled. Each specified
             input column is scaled independently and stored in the corresponding output column.
-        output_cols: The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
+        output_cols: Optional[Union[str, List[str]]], default=None
+            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
             columns specified must match the number of input columns.
-        drop_input_cols: Remove input columns from output if set True. False by default.
+        passthrough_cols: Optional[Union[str, List[str]]], default=None
+            A string or a list of strings indicating column names to be excluded from any
+            operations (such as train, transform, or inference). These specified column(s)
+            will remain untouched throughout the process. This option is helpful in scenarios
+            requiring automatic input_cols inference, but need to avoid using specific
+            columns, like index columns, during training or inference.
+        drop_input_cols: Optional[bool], default=False
+            Remove input columns from output if set True. False by default.
     Attributes:
-        min_: dict {column_name: value} or None. Per-feature adjustment for minimum.
-        scale_: dict {column_name: value} or None. Per-feature relative scaling factor.
-        data_min_: dict {column_name: value} or None. Per-feature minimum seen in the data.
-        data_max_: dict {column_name: value} or None. Per-feature maximum seen in the data.
-        data_range_: dict {column_name: value} or None. Per-feature range seen in the data as a (min, max) tuple.
+        min_: Dict[str, float]
+            dict {column_name: value} or None. Per-feature adjustment for minimum.
+        scale_: Dict[str, float]
+            dict {column_name: value} or None. Per-feature relative scaling factor.
+        data_min_: Dict[str, float]
+            dict {column_name: value} or None. Per-feature minimum seen in the data.
+        data_max_: Dict[str, float]
+            dict {column_name: value} or None. Per-feature maximum seen in the data.
+        data_range_: Dict[str, float]
+            dict {column_name: value} or None. Per-feature range seen in the data as a (min, max) tuple.
     """
     def __init__(
@@ -44,6 +69,7 @@ class MinMaxScaler(base.BaseTransformer):
         clip: bool = False,
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
+        passthrough_cols: Optional[Union[str, Iterable[str]]] = None,
         drop_input_cols: Optional[bool] = False,
     ) -> None:
         """
@@ -54,6 +80,11 @@ class MinMaxScaler(base.BaseTransformer):
             clip: Set to True to clip transformed values of held-out data to provided `feature range`.
             input_cols: Single or multiple input columns.
             output_cols: Single or multiple output columns.
+            passthrough_cols: A string or a list of strings indicating column names to be excluded from any
+                operations (such as train, transform, or inference). These specified column(s)
+                will remain untouched throughout the process. This option is helful in scenarios
+                requiring automatic input_cols inference, but need to avoid using specific
+                columns, like index columns, during in training or inference.
             drop_input_cols: Remove input columns from output if set True. False by default.
         Attributes:
@@ -78,6 +109,7 @@ class MinMaxScaler(base.BaseTransformer):
         self.set_input_cols(input_cols)
         self.set_output_cols(output_cols)
+        self.set_passthrough_cols(passthrough_cols)
     def _reset(self) -> None:
         """
@@ -158,10 +190,6 @@ class MinMaxScaler(base.BaseTransformer):
         project=base.PROJECT,
         subproject=base.SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=base.PROJECT,
-        subproject=base.SUBPROJECT,
-    )
     def transform(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> Union[snowpark.DataFrame, pd.DataFrame]:
         """
         Scale features according to feature_range.

snowflake/ml/modeling/preprocessing/normalizer.py CHANGED Viewed

@@ -34,6 +34,13 @@ class Normalizer(base.BaseTransformer):
             A string or list of strings representing column names that will store the output of transform operation.
             The length of `output_cols` must equal the length of `input_cols`.
+        passthrough_cols: Optional[Union[str, List[str]]]
+            A string or a list of strings indicating column names to be excluded from any
+            operations (such as train, transform, or inference). These specified column(s)
+            will remain untouched throughout the process. This option is helpful in scenarios
+            requiring automatic input_cols inference, but need to avoid using specific
+            columns, like index columns, during training or inference.
         drop_input_cols: bool, default=False
             Remove input columns from output if set `True`.
     """
@@ -44,6 +51,7 @@ class Normalizer(base.BaseTransformer):
         norm: str = "l2",
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
+        passthrough_cols: Optional[Union[str, Iterable[str]]] = None,
         drop_input_cols: Optional[bool] = False,
     ) -> None:
         super().__init__(drop_input_cols=drop_input_cols)
@@ -51,6 +59,7 @@ class Normalizer(base.BaseTransformer):
         self._is_fitted = False
         self.set_input_cols(input_cols)
         self.set_output_cols(output_cols)
+        self.set_passthrough_cols(passthrough_cols)
     def _reset(self) -> None:
         """
@@ -82,10 +91,6 @@ class Normalizer(base.BaseTransformer):
         project=base.PROJECT,
         subproject=base.SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=base.PROJECT,
-        subproject=base.SUBPROJECT,
-    )
     def transform(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> Union[snowpark.DataFrame, pd.DataFrame]:
         """
         Scale each non-zero row of the input dataset to the unit norm.

snowflake/ml/modeling/preprocessing/one_hot_encoder.py CHANGED Viewed

@@ -38,7 +38,7 @@ _N_FEATURES_OUT = "_N_FEATURES_OUT"
 # transformer with the sklearn version
 _SKLEARN_INITIAL_KEYWORDS = ("sparse", "handle_unknown")  # initial keywords in sklearn
 _SKLEARN_UNUSED_KEYWORDS = "dtype"  # sklearn keywords that are unused in snowml
-_SNOWML_ONLY_KEYWORDS = ["input_cols", "output_cols"]  # snowml only keywords not present in sklearn
+_SNOWML_ONLY_KEYWORDS = ["input_cols", "output_cols", "passthrough_cols"]  # snowml only keywords not present in sklearn
 # Added keywords mapped to the sklearn versions in which they were added. Update mappings in new
 # sklearn versions to support parameter validation.
@@ -101,7 +101,7 @@ class OneHotEncoder(base.BaseTransformer):
     (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html).
     Args:
-        categories: 'auto' or dict {column_name: ndarray([category])}, default='auto'
+        categories: 'auto' or dict {column_name: np.ndarray([category])}, default='auto'
             Categories (unique values) per feature:
             - 'auto': Determine categories automatically from the training data.
             - dict: ``categories[column_name]`` holds the categories expected in
@@ -109,6 +109,7 @@ class OneHotEncoder(base.BaseTransformer):
               and numeric values within a single feature, and should be sorted in
               case of numeric values.
             The used categories can be found in the ``categories_`` attribute.
         drop: {‘first’, ‘if_binary’} or an array-like of shape (n_features,), default=None
             Specifies a methodology to use to drop one of the categories per
             feature. This is useful in situations where perfectly collinear
@@ -128,15 +129,18 @@ class OneHotEncoder(base.BaseTransformer):
             When `max_categories` or `min_frequency` is configured to group
             infrequent categories, the dropping behavior is handled after the
             grouping.
         sparse: bool, default=False
             Will return a column with sparse representation if set True else will return
             a separate column for each category.
         handle_unknown: {'error', 'ignore'}, default='error'
             Specifies the way unknown categories are handled during :meth:`transform`.
             - 'error': Raise an error if an unknown category is present during transform.
             - 'ignore': When an unknown category is encountered during
               transform, the resulting one-hot encoded columns for this feature
               will be all zeros.
         min_frequency: int or float, default=None
             Specifies the minimum frequency below which a category will be
             considered infrequent.
@@ -144,17 +148,29 @@ class OneHotEncoder(base.BaseTransformer):
               infrequent.
             - If `float`, categories with a smaller cardinality than
               `min_frequency * n_samples`  will be considered infrequent.
         max_categories: int, default=None
             Specifies an upper limit to the number of output features for each input
             feature when considering infrequent categories. If there are infrequent
             categories, `max_categories` includes the category representing the
             infrequent categories along with the frequent categories. If `None`,
             there is no limit to the number of output features.
-        input_cols: str or Iterable [column_name], default=None
+        input_cols: Optional[Union[str, List[str]]], default=None
             Single or multiple input columns.
-        output_cols: str or Iterable [column_name], default=None
+        output_cols: Optional[Union[str, List[str]]], default=None
             Single or multiple output columns.
-        drop_input_cols: Remove input columns from output if set True. False by default.
+        passthrough_cols: Optional[Union[str, List[str]]]
+            A string or a list of strings indicating column names to be excluded from any
+            operations (such as train, transform, or inference). These specified column(s)
+            will remain untouched throughout the process. This option is helpful in scenarios
+            requiring automatic input_cols inference, but need to avoid using specific
+            columns, like index columns, during training or inference.
+        drop_input_cols: Optional[Union[str, List[str]]]
+            Remove input columns from output if set True. False by default.
     Attributes:
         categories_: dict {column_name: ndarray([category])}
@@ -190,6 +206,7 @@ class OneHotEncoder(base.BaseTransformer):
         max_categories: Optional[int] = None,
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
+        passthrough_cols: Optional[Union[str, Iterable[str]]] = None,
         drop_input_cols: Optional[bool] = False,
     ) -> None:
         """See class-level docstring."""
@@ -218,6 +235,7 @@ class OneHotEncoder(base.BaseTransformer):
         self.set_input_cols(input_cols)
         self.set_output_cols(output_cols)
+        self.set_passthrough_cols(passthrough_cols)
     @property
     def infrequent_categories_(self) -> List[Optional[type_utils.LiteralNDArrayType]]:
@@ -658,10 +676,6 @@ class OneHotEncoder(base.BaseTransformer):
         project=base.PROJECT,
         subproject=base.SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=base.PROJECT,
-        subproject=base.SUBPROJECT,
-    )
     def transform(
         self, dataset: Union[snowpark.DataFrame, pd.DataFrame]
     ) -> Union[snowpark.DataFrame, pd.DataFrame, sparse.csr_matrix]:
@@ -1319,7 +1333,9 @@ class OneHotEncoder(base.BaseTransformer):
         """
         category_counts_list = []  # list of ndarray
         for idx, input_col in enumerate(self.input_cols):
-            counts = np.vectorize(lambda x: category_counts[input_col][x])(self._categories_list[idx])
+            counts = np.vectorize(lambda x, input_col=input_col: category_counts[input_col][x])(
+                self._categories_list[idx]
+            )
             category_counts_list.append(np.array(counts))
         self._infrequent_indices = [
             self._identify_infrequent(category_count, n_samples) for category_count in category_counts_list

snowflake/ml/modeling/preprocessing/ordinal_encoder.py CHANGED Viewed

@@ -24,7 +24,7 @@ _COLUMN_BATCH_SIZE = 20
 # transformer with the sklearn version
 _SKLEARN_INITIAL_KEYWORDS = "categories"  # initial keywords in sklearn
 _SKLEARN_UNUSED_KEYWORDS = "dtype"  # sklearn keywords that are unused in snowml
-_SNOWML_ONLY_KEYWORDS = ["input_cols", "output_cols"]  # snowml only keywords not present in sklearn
+_SNOWML_ONLY_KEYWORDS = ["input_cols", "output_cols", "passthrough_cols"]  # snowml only keywords not present in sklearn
 # Added keywords mapped to the sklearn versions in which they were added. Update mappings in new
 # sklearn versions to support parameter validation.
@@ -45,26 +45,47 @@ class OrdinalEncoder(base.BaseTransformer):
     (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OrdinalEncoder.html).
     Args:
-        categories: The string 'auto' (the default) causes the categories to be extracted from the input columns.
+        categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]], default="auto"
+            The string 'auto' (the default) causes the categories to be extracted from the input columns.
             To specify the categories yourself, pass a dictionary mapping the column name to an ndarray containing the
             categories.
-        handle_unknown: Specifies how unknown categories are handled during transformation. Applicable only if
+        handle_unknown: str, default="error"
+            Specifies how unknown categories are handled during transformation. Applicable only if
             categories is not 'auto'.
             Valid values are:
                 - 'error': Raise an error if an unknown category is present during transform (default).
                 - 'use_encoded_value': When an unknown category is encountered during transform, the specified
                     encoded_missing_value (below) is used.
-        unknown_value: When the parameter handle_unknown is set to 'use_encoded_value', this parameter is required and
+        unknown_value: Optional[Union[int, float]], default=None
+            When the parameter handle_unknown is set to 'use_encoded_value', this parameter is required and
             will set the encoded value of unknown categories. It has to be distinct from the values used to encode any
             of the categories in `fit`.
-        encoded_missing_value: The value to be used to encode unknown categories.
-        input_cols: The name(s) of one or more columns in a DataFrame containing a feature to be encoded.
-        output_cols: The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
+        encoded_missing_value: Union[int, float], default=np.nan
+            The value to be used to encode unknown categories.
+        input_cols: Optional[Union[str, List[str]]], default=None
+            The name(s) of one or more columns in a DataFrame containing a feature to be encoded.
+        output_cols: Optional[Union[str, List[str]]], default=None
+            The name(s) of one or more columns in a DataFrame in which results will be stored. The number of
             columns specified must match the number of input columns.
-        drop_input_cols: Remove input columns from output if set True. False by default.
+        passthrough_cols: Optional[Union[str, List[str]]], default=None
+            A string or a list of strings indicating column names to be excluded from any
+            operations (such as train, transform, or inference). These specified column(s)
+            will remain untouched throughout the process. This option is helpful in scenarios
+            requiring automatic input_cols inference, but need to avoid using specific
+            columns, like index columns, during training or inference.
+        drop_input_cols: Optional[bool], default=False
+            Remove input columns from output if set True. False by default.
     Attributes:
-        categories_ (dict of ndarray): The categories of each feature determined during fitting. Maps input column
+        categories_ (dict of ndarray): List[type_utils.LiteralNDArrayType]
+            The categories of each feature determined during fitting. Maps input column
             names to an array of the detected categories.
             Attributes are valid only after fit() has been called.
     """
@@ -78,6 +99,7 @@ class OrdinalEncoder(base.BaseTransformer):
         encoded_missing_value: Union[int, float] = np.nan,
         input_cols: Optional[Union[str, Iterable[str]]] = None,
         output_cols: Optional[Union[str, Iterable[str]]] = None,
+        passthrough_cols: Optional[Union[str, Iterable[str]]] = None,
         drop_input_cols: Optional[bool] = False,
     ) -> None:
         """
@@ -110,6 +132,11 @@ class OrdinalEncoder(base.BaseTransformer):
             encoded_missing_value: Encoded value of missing categories.
             input_cols: Single or multiple input columns.
             output_cols: Single or multiple output columns.
+            passthrough_cols: A string or a list of strings indicating column names to be excluded from any
+                operations (such as train, transform, or inference). These specified column(s)
+                will remain untouched throughout the process. This option is helful in scenarios
+                requiring automatic input_cols inference, but need to avoid using specific
+                columns, like index columns, during in training or inference.
             drop_input_cols: Remove input columns from output if set True. False by default.
         Attributes:
@@ -129,6 +156,7 @@ class OrdinalEncoder(base.BaseTransformer):
         self.set_input_cols(input_cols)
         self.set_output_cols(output_cols)
+        self.set_passthrough_cols(passthrough_cols)
     def _reset(self) -> None:
         """
@@ -417,10 +445,6 @@ class OrdinalEncoder(base.BaseTransformer):
         project=base.PROJECT,
         subproject=base.SUBPROJECT,
     )
-    @telemetry.add_stmt_params_to_df(
-        project=base.PROJECT,
-        subproject=base.SUBPROJECT,
-    )
     def transform(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> Union[snowpark.DataFrame, pd.DataFrame]:
         """
         Transform dataset to ordinal codes.

snowflake-ml-python 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

snowflake-ml-python 1.1.0py3-none-any.whl → 1.1.2py3-none-any.whl