snowflake-ml-python 1.5.3__py3-none-any.whl → 1.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +4 -1
- snowflake/cortex/_classify_text.py +36 -0
- snowflake/cortex/_complete.py +281 -21
- snowflake/cortex/_extract_answer.py +0 -1
- snowflake/cortex/_sentiment.py +0 -1
- snowflake/cortex/_summarize.py +0 -1
- snowflake/cortex/_translate.py +0 -1
- snowflake/cortex/_util.py +12 -85
- snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
- snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
- snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
- snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
- snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
- snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
- snowflake/ml/_internal/lineage/lineage_utils.py +4 -4
- snowflake/ml/_internal/telemetry.py +38 -2
- snowflake/ml/_internal/utils/identifier.py +14 -0
- snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
- snowflake/ml/data/_internal/arrow_ingestor.py +228 -0
- snowflake/ml/data/_internal/ingestor_utils.py +58 -0
- snowflake/ml/data/data_connector.py +133 -0
- snowflake/ml/data/data_ingestor.py +28 -0
- snowflake/ml/data/data_source.py +23 -0
- snowflake/ml/dataset/dataset.py +39 -32
- snowflake/ml/dataset/dataset_reader.py +18 -118
- snowflake/ml/feature_store/access_manager.py +7 -1
- snowflake/ml/feature_store/entity.py +19 -2
- snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +20 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +31 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +24 -0
- snowflake/ml/feature_store/examples/citibike_trip_features/source.yaml +4 -0
- snowflake/ml/feature_store/examples/example_helper.py +240 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +12 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/dropoff_features.py +39 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/pickup_features.py +58 -0
- snowflake/ml/feature_store/examples/new_york_taxi_features/source.yaml +5 -0
- snowflake/ml/feature_store/examples/source_data/citibike_trips.yaml +36 -0
- snowflake/ml/feature_store/examples/source_data/fraud_transactions.yaml +29 -0
- snowflake/ml/feature_store/examples/source_data/nyc_yellow_trips.yaml +4 -0
- snowflake/ml/feature_store/examples/source_data/winequality_red.yaml +32 -0
- snowflake/ml/feature_store/examples/wine_quality_features/entities.py +14 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +29 -0
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +21 -0
- snowflake/ml/feature_store/examples/wine_quality_features/source.yaml +5 -0
- snowflake/ml/feature_store/feature_store.py +987 -264
- snowflake/ml/feature_store/feature_view.py +228 -13
- snowflake/ml/fileset/embedded_stage_fs.py +25 -21
- snowflake/ml/fileset/fileset.py +2 -2
- snowflake/ml/fileset/snowfs.py +4 -15
- snowflake/ml/fileset/stage_fs.py +24 -18
- snowflake/ml/lineage/__init__.py +3 -0
- snowflake/ml/lineage/lineage_node.py +139 -0
- snowflake/ml/model/_client/model/model_impl.py +47 -14
- snowflake/ml/model/_client/model/model_version_impl.py +82 -2
- snowflake/ml/model/_client/ops/model_ops.py +77 -5
- snowflake/ml/model/_client/sql/model.py +1 -0
- snowflake/ml/model/_client/sql/model_version.py +45 -2
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +4 -6
- snowflake/ml/model/_model_composer/model_composer.py +15 -17
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +31 -17
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +2 -1
- snowflake/ml/model/_model_composer/model_method/function_generator.py +20 -4
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +3 -32
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +55 -0
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +5 -34
- snowflake/ml/model/_model_composer/model_method/model_method.py +10 -7
- snowflake/ml/model/_packager/model_handlers/_base.py +13 -3
- snowflake/ml/model/_packager/model_handlers/_utils.py +59 -1
- snowflake/ml/model/_packager/model_handlers/catboost.py +44 -2
- snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +70 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
- snowflake/ml/model/_packager/model_handlers/xgboost.py +61 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_packager.py +9 -4
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +3 -5
- snowflake/ml/model/custom_model.py +22 -2
- snowflake/ml/model/model_signature.py +4 -4
- snowflake/ml/model/type_hints.py +77 -4
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +3 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_search_udf_file.py +13 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +1 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +6 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +1 -0
- snowflake/ml/modeling/cluster/affinity_propagation.py +4 -2
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +4 -2
- snowflake/ml/modeling/cluster/birch.py +4 -2
- snowflake/ml/modeling/cluster/bisecting_k_means.py +4 -2
- snowflake/ml/modeling/cluster/dbscan.py +4 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +4 -2
- snowflake/ml/modeling/cluster/k_means.py +4 -2
- snowflake/ml/modeling/cluster/mean_shift.py +4 -2
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +4 -2
- snowflake/ml/modeling/cluster/optics.py +4 -2
- snowflake/ml/modeling/cluster/spectral_biclustering.py +4 -2
- snowflake/ml/modeling/cluster/spectral_clustering.py +4 -2
- snowflake/ml/modeling/cluster/spectral_coclustering.py +4 -2
- snowflake/ml/modeling/compose/column_transformer.py +4 -2
- snowflake/ml/modeling/covariance/elliptic_envelope.py +4 -2
- snowflake/ml/modeling/covariance/empirical_covariance.py +4 -2
- snowflake/ml/modeling/covariance/graphical_lasso.py +4 -2
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +4 -2
- snowflake/ml/modeling/covariance/ledoit_wolf.py +4 -2
- snowflake/ml/modeling/covariance/min_cov_det.py +4 -2
- snowflake/ml/modeling/covariance/oas.py +4 -2
- snowflake/ml/modeling/covariance/shrunk_covariance.py +4 -2
- snowflake/ml/modeling/decomposition/dictionary_learning.py +4 -2
- snowflake/ml/modeling/decomposition/factor_analysis.py +4 -2
- snowflake/ml/modeling/decomposition/fast_ica.py +4 -2
- snowflake/ml/modeling/decomposition/incremental_pca.py +4 -2
- snowflake/ml/modeling/decomposition/kernel_pca.py +4 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +4 -2
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +4 -2
- snowflake/ml/modeling/decomposition/pca.py +4 -2
- snowflake/ml/modeling/decomposition/sparse_pca.py +4 -2
- snowflake/ml/modeling/decomposition/truncated_svd.py +4 -2
- snowflake/ml/modeling/ensemble/isolation_forest.py +4 -2
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +4 -2
- snowflake/ml/modeling/feature_selection/variance_threshold.py +4 -2
- snowflake/ml/modeling/impute/iterative_imputer.py +4 -2
- snowflake/ml/modeling/impute/knn_imputer.py +4 -2
- snowflake/ml/modeling/impute/missing_indicator.py +4 -2
- snowflake/ml/modeling/impute/simple_imputer.py +26 -0
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +4 -2
- snowflake/ml/modeling/kernel_approximation/nystroem.py +4 -2
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +4 -2
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +4 -2
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +4 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +4 -2
- snowflake/ml/modeling/manifold/isomap.py +4 -2
- snowflake/ml/modeling/manifold/mds.py +4 -2
- snowflake/ml/modeling/manifold/spectral_embedding.py +4 -2
- snowflake/ml/modeling/manifold/tsne.py +4 -2
- snowflake/ml/modeling/metrics/ranking.py +3 -0
- snowflake/ml/modeling/metrics/regression.py +3 -0
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +4 -2
- snowflake/ml/modeling/mixture/gaussian_mixture.py +4 -2
- snowflake/ml/modeling/neighbors/kernel_density.py +4 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +4 -2
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +4 -2
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +4 -2
- snowflake/ml/modeling/pipeline/pipeline.py +5 -4
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +43 -9
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +36 -8
- snowflake/ml/modeling/preprocessing/polynomial_features.py +4 -2
- snowflake/ml/registry/_manager/model_manager.py +16 -3
- snowflake/ml/registry/registry.py +100 -13
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/METADATA +81 -7
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/RECORD +165 -139
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/lineage/data_source.py +0 -10
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.6.0.dist-info}/top_level.txt +0 -0
@@ -76,8 +76,10 @@ class ShrunkCovariance(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class DictionaryLearning(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class FactorAnalysis(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class FastICA(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class IncrementalPCA(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class KernelPCA(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class MiniBatchDictionaryLearning(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class MiniBatchSparsePCA(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class PCA(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class SparsePCA(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class TruncatedSVD(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class IsolationForest(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class SequentialFeatureSelector(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class VarianceThreshold(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -77,8 +77,10 @@ class IterativeImputer(BaseTransformer):
|
|
77
77
|
initialization with the `set_input_cols` method.
|
78
78
|
|
79
79
|
label_cols: Optional[Union[str, List[str]]]
|
80
|
-
|
81
|
-
|
80
|
+
A string or list of strings representing column names that contain labels.
|
81
|
+
Label columns must be specified with this parameter during initialization
|
82
|
+
or with the `set_label_cols` method before fitting.
|
83
|
+
|
82
84
|
output_cols: Optional[Union[str, List[str]]]
|
83
85
|
A string or list of strings representing column names that will store the
|
84
86
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class KNNImputer(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class MissingIndicator(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -1,5 +1,6 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
import copy
|
3
|
+
import warnings
|
3
4
|
from typing import Any, Dict, Iterable, Optional, Type, Union
|
4
5
|
|
5
6
|
import numpy as np
|
@@ -10,6 +11,7 @@ from sklearn import impute
|
|
10
11
|
from snowflake import snowpark
|
11
12
|
from snowflake.ml._internal import telemetry
|
12
13
|
from snowflake.ml._internal.exceptions import error_codes, exceptions
|
14
|
+
from snowflake.ml._internal.utils import formatting
|
13
15
|
from snowflake.ml.modeling.framework import _utils, base
|
14
16
|
from snowflake.snowpark import functions as F, types as T
|
15
17
|
from snowflake.snowpark._internal import utils as snowpark_utils
|
@@ -171,6 +173,14 @@ class SimpleImputer(base.BaseTransformer):
|
|
171
173
|
self.set_output_cols(output_cols)
|
172
174
|
self.set_passthrough_cols(passthrough_cols)
|
173
175
|
|
176
|
+
def _is_integer_type(self, column_type: T.DataType) -> bool:
|
177
|
+
return (
|
178
|
+
isinstance(column_type, T.ByteType)
|
179
|
+
or isinstance(column_type, T.ShortType)
|
180
|
+
or isinstance(column_type, T.IntegerType)
|
181
|
+
or isinstance(column_type, T.LongType)
|
182
|
+
)
|
183
|
+
|
174
184
|
def _reset(self) -> None:
|
175
185
|
"""
|
176
186
|
Reset internal data-dependent state of the imputer, if necessary.
|
@@ -389,6 +399,22 @@ class SimpleImputer(base.BaseTransformer):
|
|
389
399
|
# Use `fillna` for replacing nans. Check if the column has a string data type, or coerce a float.
|
390
400
|
if not isinstance(input_col_datatypes[input_col], T.StringType):
|
391
401
|
statistic = float(statistic)
|
402
|
+
|
403
|
+
if self._is_integer_type(input_col_datatypes[input_col]):
|
404
|
+
if statistic.is_integer():
|
405
|
+
statistic = int(statistic)
|
406
|
+
else:
|
407
|
+
warnings.warn(
|
408
|
+
formatting.unwrap(
|
409
|
+
f"""
|
410
|
+
Integer column may not be imputed with a non-integer value {statistic}.
|
411
|
+
In order to impute a non-integer value, convert the column to FloatType before imputing.
|
412
|
+
"""
|
413
|
+
),
|
414
|
+
category=UserWarning,
|
415
|
+
stacklevel=1,
|
416
|
+
)
|
417
|
+
|
392
418
|
transformed_dataset = transformed_dataset.na.fill({output_col: statistic})
|
393
419
|
else:
|
394
420
|
transformed_dataset = transformed_dataset.na.replace(
|
@@ -76,8 +76,10 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class Nystroem(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class PolynomialCountSketch(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class RBFSampler(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class SkewedChi2Sampler(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class SGDOneClassSVM(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class Isomap(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class MDS(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class SpectralEmbedding(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class TSNE(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -102,6 +102,7 @@ def precision_recall_curve(
|
|
102
102
|
],
|
103
103
|
statement_params=statement_params,
|
104
104
|
anonymous=True,
|
105
|
+
execute_as="caller",
|
105
106
|
)
|
106
107
|
def precision_recall_curve_anon_sproc(session: snowpark.Session) -> bytes:
|
107
108
|
for query in queries[:-1]:
|
@@ -249,6 +250,7 @@ def roc_auc_score(
|
|
249
250
|
],
|
250
251
|
statement_params=statement_params,
|
251
252
|
anonymous=True,
|
253
|
+
execute_as="caller",
|
252
254
|
)
|
253
255
|
def roc_auc_score_anon_sproc(session: snowpark.Session) -> bytes:
|
254
256
|
for query in queries[:-1]:
|
@@ -352,6 +354,7 @@ def roc_curve(
|
|
352
354
|
],
|
353
355
|
statement_params=statement_params,
|
354
356
|
anonymous=True,
|
357
|
+
execute_as="caller",
|
355
358
|
)
|
356
359
|
def roc_curve_anon_sproc(session: snowpark.Session) -> bytes:
|
357
360
|
for query in queries[:-1]:
|
@@ -87,6 +87,7 @@ def d2_absolute_error_score(
|
|
87
87
|
],
|
88
88
|
statement_params=statement_params,
|
89
89
|
anonymous=True,
|
90
|
+
execute_as="caller",
|
90
91
|
)
|
91
92
|
def d2_absolute_error_score_anon_sproc(session: snowpark.Session) -> bytes:
|
92
93
|
for query in queries[:-1]:
|
@@ -184,6 +185,7 @@ def d2_pinball_score(
|
|
184
185
|
],
|
185
186
|
statement_params=statement_params,
|
186
187
|
anonymous=True,
|
188
|
+
execute_as="caller",
|
187
189
|
)
|
188
190
|
def d2_pinball_score_anon_sproc(session: snowpark.Session) -> bytes:
|
189
191
|
for query in queries[:-1]:
|
@@ -299,6 +301,7 @@ def explained_variance_score(
|
|
299
301
|
],
|
300
302
|
statement_params=statement_params,
|
301
303
|
anonymous=True,
|
304
|
+
execute_as="caller",
|
302
305
|
)
|
303
306
|
def explained_variance_score_anon_sproc(session: snowpark.Session) -> bytes:
|
304
307
|
for query in queries[:-1]:
|
@@ -76,8 +76,10 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class GaussianMixture(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class KernelDensity(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class LocalOutlierFactor(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class NearestNeighbors(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class BernoulliRBM(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -99,10 +99,6 @@ class Pipeline(base.BaseTransformer):
|
|
99
99
|
must implement `fit` and `transform` methods.
|
100
100
|
The final step can be a transform or estimator, that is, it must implement
|
101
101
|
`fit` and `transform`/`predict` methods.
|
102
|
-
TODO: SKLearn pipeline expects last step(and only the last step) to be an estimator obj or a dummy
|
103
|
-
estimator(like None or passthrough). Currently this Pipeline class works with a list of all
|
104
|
-
transforms or a list of transforms ending with an estimator. Should we change this implementation
|
105
|
-
to only work with list of steps ending with an estimator or a dummy estimator like SKLearn?
|
106
102
|
|
107
103
|
Args:
|
108
104
|
steps: List of (name, transform) tuples (implementing `fit`/`transform`) that
|
@@ -111,6 +107,10 @@ class Pipeline(base.BaseTransformer):
|
|
111
107
|
"""
|
112
108
|
super().__init__()
|
113
109
|
self.steps = steps
|
110
|
+
# TODO(snandamuri): SKLearn pipeline expects last step(and only the last step) to be an estimator obj or a dummy
|
111
|
+
# estimator(like None or passthrough). Currently this Pipeline class works with a list of all
|
112
|
+
# transforms or a list of transforms ending with an estimator. Should we change this implementation
|
113
|
+
# to only work with list of steps ending with an estimator or a dummy estimator like SKLearn?
|
114
114
|
self._is_final_step_estimator = Pipeline._is_estimator(steps[-1][1])
|
115
115
|
self._is_fitted = False
|
116
116
|
self._feature_names_in: List[np.ndarray[Any, np.dtype[Any]]] = []
|
@@ -378,6 +378,7 @@ class Pipeline(base.BaseTransformer):
|
|
378
378
|
anonymous=True,
|
379
379
|
imports=imports, # type: ignore[arg-type]
|
380
380
|
statement_params=sproc_statement_params,
|
381
|
+
execute_as="caller",
|
381
382
|
)
|
382
383
|
|
383
384
|
sproc_export_file_name: str = pipeline_within_one_sproc(
|