snowflake-ml-python 1.5.3__py3-none-any.whl → 1.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +2 -1
- snowflake/cortex/_complete.py +224 -21
- snowflake/cortex/_extract_answer.py +0 -1
- snowflake/cortex/_sentiment.py +0 -1
- snowflake/cortex/_summarize.py +0 -1
- snowflake/cortex/_translate.py +0 -1
- snowflake/cortex/_util.py +12 -85
- snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
- snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
- snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
- snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
- snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
- snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
- snowflake/ml/_internal/telemetry.py +26 -0
- snowflake/ml/_internal/utils/identifier.py +14 -0
- snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
- snowflake/ml/dataset/dataset.py +39 -20
- snowflake/ml/feature_store/feature_store.py +440 -243
- snowflake/ml/feature_store/feature_view.py +61 -9
- snowflake/ml/fileset/embedded_stage_fs.py +25 -21
- snowflake/ml/fileset/fileset.py +2 -2
- snowflake/ml/fileset/snowfs.py +4 -15
- snowflake/ml/fileset/stage_fs.py +6 -8
- snowflake/ml/lineage/__init__.py +3 -0
- snowflake/ml/lineage/lineage_node.py +139 -0
- snowflake/ml/model/_client/model/model_impl.py +47 -14
- snowflake/ml/model/_client/model/model_version_impl.py +82 -2
- snowflake/ml/model/_client/ops/model_ops.py +77 -5
- snowflake/ml/model/_client/sql/model.py +1 -0
- snowflake/ml/model/_client/sql/model_version.py +45 -2
- snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
- snowflake/ml/model/_model_composer/model_composer.py +5 -4
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +7 -1
- snowflake/ml/model/_model_composer/model_method/function_generator.py +17 -1
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +79 -0
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -2
- snowflake/ml/model/_model_composer/model_method/model_method.py +5 -5
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +1 -0
- snowflake/ml/model/_packager/model_handlers/catboost.py +2 -2
- snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
- snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
- snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
- snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
- snowflake/ml/model/_packager/model_handlers/xgboost.py +2 -2
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
- snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_packager.py +9 -4
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
- snowflake/ml/model/custom_model.py +22 -2
- snowflake/ml/model/type_hints.py +73 -4
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +1 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +6 -0
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +1 -0
- snowflake/ml/modeling/cluster/affinity_propagation.py +4 -2
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +4 -2
- snowflake/ml/modeling/cluster/birch.py +4 -2
- snowflake/ml/modeling/cluster/bisecting_k_means.py +4 -2
- snowflake/ml/modeling/cluster/dbscan.py +4 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +4 -2
- snowflake/ml/modeling/cluster/k_means.py +4 -2
- snowflake/ml/modeling/cluster/mean_shift.py +4 -2
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +4 -2
- snowflake/ml/modeling/cluster/optics.py +4 -2
- snowflake/ml/modeling/cluster/spectral_biclustering.py +4 -2
- snowflake/ml/modeling/cluster/spectral_clustering.py +4 -2
- snowflake/ml/modeling/cluster/spectral_coclustering.py +4 -2
- snowflake/ml/modeling/compose/column_transformer.py +4 -2
- snowflake/ml/modeling/covariance/elliptic_envelope.py +4 -2
- snowflake/ml/modeling/covariance/empirical_covariance.py +4 -2
- snowflake/ml/modeling/covariance/graphical_lasso.py +4 -2
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +4 -2
- snowflake/ml/modeling/covariance/ledoit_wolf.py +4 -2
- snowflake/ml/modeling/covariance/min_cov_det.py +4 -2
- snowflake/ml/modeling/covariance/oas.py +4 -2
- snowflake/ml/modeling/covariance/shrunk_covariance.py +4 -2
- snowflake/ml/modeling/decomposition/dictionary_learning.py +4 -2
- snowflake/ml/modeling/decomposition/factor_analysis.py +4 -2
- snowflake/ml/modeling/decomposition/fast_ica.py +4 -2
- snowflake/ml/modeling/decomposition/incremental_pca.py +4 -2
- snowflake/ml/modeling/decomposition/kernel_pca.py +4 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +4 -2
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +4 -2
- snowflake/ml/modeling/decomposition/pca.py +4 -2
- snowflake/ml/modeling/decomposition/sparse_pca.py +4 -2
- snowflake/ml/modeling/decomposition/truncated_svd.py +4 -2
- snowflake/ml/modeling/ensemble/isolation_forest.py +4 -2
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +4 -2
- snowflake/ml/modeling/feature_selection/variance_threshold.py +4 -2
- snowflake/ml/modeling/impute/iterative_imputer.py +4 -2
- snowflake/ml/modeling/impute/knn_imputer.py +4 -2
- snowflake/ml/modeling/impute/missing_indicator.py +4 -2
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +4 -2
- snowflake/ml/modeling/kernel_approximation/nystroem.py +4 -2
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +4 -2
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +4 -2
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +4 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +4 -2
- snowflake/ml/modeling/manifold/isomap.py +4 -2
- snowflake/ml/modeling/manifold/mds.py +4 -2
- snowflake/ml/modeling/manifold/spectral_embedding.py +4 -2
- snowflake/ml/modeling/manifold/tsne.py +4 -2
- snowflake/ml/modeling/metrics/ranking.py +3 -0
- snowflake/ml/modeling/metrics/regression.py +3 -0
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +4 -2
- snowflake/ml/modeling/mixture/gaussian_mixture.py +4 -2
- snowflake/ml/modeling/neighbors/kernel_density.py +4 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +4 -2
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +4 -2
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +4 -2
- snowflake/ml/modeling/pipeline/pipeline.py +1 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +43 -9
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +36 -8
- snowflake/ml/modeling/preprocessing/polynomial_features.py +4 -2
- snowflake/ml/registry/_manager/model_manager.py +16 -3
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/METADATA +35 -7
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/RECORD +131 -127
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/WHEEL +1 -1
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/top_level.txt +0 -0
@@ -76,8 +76,10 @@ class MissingIndicator(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class AdditiveChi2Sampler(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class Nystroem(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class PolynomialCountSketch(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class RBFSampler(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class SkewedChi2Sampler(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class SGDOneClassSVM(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class Isomap(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class MDS(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class SpectralEmbedding(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class TSNE(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -102,6 +102,7 @@ def precision_recall_curve(
|
|
102
102
|
],
|
103
103
|
statement_params=statement_params,
|
104
104
|
anonymous=True,
|
105
|
+
execute_as="caller",
|
105
106
|
)
|
106
107
|
def precision_recall_curve_anon_sproc(session: snowpark.Session) -> bytes:
|
107
108
|
for query in queries[:-1]:
|
@@ -249,6 +250,7 @@ def roc_auc_score(
|
|
249
250
|
],
|
250
251
|
statement_params=statement_params,
|
251
252
|
anonymous=True,
|
253
|
+
execute_as="caller",
|
252
254
|
)
|
253
255
|
def roc_auc_score_anon_sproc(session: snowpark.Session) -> bytes:
|
254
256
|
for query in queries[:-1]:
|
@@ -352,6 +354,7 @@ def roc_curve(
|
|
352
354
|
],
|
353
355
|
statement_params=statement_params,
|
354
356
|
anonymous=True,
|
357
|
+
execute_as="caller",
|
355
358
|
)
|
356
359
|
def roc_curve_anon_sproc(session: snowpark.Session) -> bytes:
|
357
360
|
for query in queries[:-1]:
|
@@ -87,6 +87,7 @@ def d2_absolute_error_score(
|
|
87
87
|
],
|
88
88
|
statement_params=statement_params,
|
89
89
|
anonymous=True,
|
90
|
+
execute_as="caller",
|
90
91
|
)
|
91
92
|
def d2_absolute_error_score_anon_sproc(session: snowpark.Session) -> bytes:
|
92
93
|
for query in queries[:-1]:
|
@@ -184,6 +185,7 @@ def d2_pinball_score(
|
|
184
185
|
],
|
185
186
|
statement_params=statement_params,
|
186
187
|
anonymous=True,
|
188
|
+
execute_as="caller",
|
187
189
|
)
|
188
190
|
def d2_pinball_score_anon_sproc(session: snowpark.Session) -> bytes:
|
189
191
|
for query in queries[:-1]:
|
@@ -299,6 +301,7 @@ def explained_variance_score(
|
|
299
301
|
],
|
300
302
|
statement_params=statement_params,
|
301
303
|
anonymous=True,
|
304
|
+
execute_as="caller",
|
302
305
|
)
|
303
306
|
def explained_variance_score_anon_sproc(session: snowpark.Session) -> bytes:
|
304
307
|
for query in queries[:-1]:
|
@@ -76,8 +76,10 @@ class BayesianGaussianMixture(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class GaussianMixture(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class KernelDensity(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class LocalOutlierFactor(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class NearestNeighbors(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -76,8 +76,10 @@ class BernoulliRBM(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -101,16 +101,20 @@ class OneHotEncoder(base.BaseTransformer):
|
|
101
101
|
(https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html).
|
102
102
|
|
103
103
|
Args:
|
104
|
-
categories: 'auto' or dict {column_name: np.ndarray([category])}, default='auto'
|
104
|
+
categories: 'auto', list of array-like, or dict {column_name: np.ndarray([category])}, default='auto'
|
105
105
|
Categories (unique values) per feature:
|
106
106
|
- 'auto': Determine categories automatically from the training data.
|
107
|
+
- list: ``categories[i]`` holds the categories expected in the ith
|
108
|
+
column. The passed categories should not mix strings and numeric
|
109
|
+
values within a single feature, and should be sorted in case of
|
110
|
+
numeric values.
|
107
111
|
- dict: ``categories[column_name]`` holds the categories expected in
|
108
112
|
the column provided. The passed categories should not mix strings
|
109
113
|
and numeric values within a single feature, and should be sorted in
|
110
114
|
case of numeric values.
|
111
115
|
The used categories can be found in the ``categories_`` attribute.
|
112
116
|
|
113
|
-
drop: {
|
117
|
+
drop: {'first', 'if_binary'} or an array-like of shape (n_features,), default=None
|
114
118
|
Specifies a methodology to use to drop one of the categories per
|
115
119
|
feature. This is useful in situations where perfectly collinear
|
116
120
|
features cause problems, such as when feeding the resulting data
|
@@ -206,7 +210,7 @@ class OneHotEncoder(base.BaseTransformer):
|
|
206
210
|
def __init__(
|
207
211
|
self,
|
208
212
|
*,
|
209
|
-
categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]] = "auto",
|
213
|
+
categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]] = "auto",
|
210
214
|
drop: Optional[Union[str, npt.ArrayLike]] = None,
|
211
215
|
sparse: bool = False,
|
212
216
|
handle_unknown: str = "error",
|
@@ -440,8 +444,19 @@ class OneHotEncoder(base.BaseTransformer):
|
|
440
444
|
assert found_state_df is not None
|
441
445
|
if self.categories != "auto":
|
442
446
|
state_data = []
|
443
|
-
|
444
|
-
|
447
|
+
if isinstance(self.categories, list):
|
448
|
+
categories_map = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
|
449
|
+
elif isinstance(self.categories, dict):
|
450
|
+
categories_map = self.categories
|
451
|
+
else:
|
452
|
+
raise exceptions.SnowflakeMLException(
|
453
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
454
|
+
original_exception=ValueError(
|
455
|
+
f"Invalid type {type(self.categories)} provided for argument `categories`"
|
456
|
+
),
|
457
|
+
)
|
458
|
+
|
459
|
+
for input_col, cats in categories_map.items():
|
445
460
|
for cat in cats.tolist():
|
446
461
|
state_data.append([input_col, cat])
|
447
462
|
# states of given categories
|
@@ -565,6 +580,8 @@ class OneHotEncoder(base.BaseTransformer):
|
|
565
580
|
else:
|
566
581
|
categories[k] = vectorized_func(v)
|
567
582
|
self.categories_ = categories
|
583
|
+
elif isinstance(self.categories, list):
|
584
|
+
self.categories_ = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
|
568
585
|
else:
|
569
586
|
self.categories_ = self.categories
|
570
587
|
|
@@ -850,8 +867,15 @@ class OneHotEncoder(base.BaseTransformer):
|
|
850
867
|
# In case of fitting with pandas dataframe and transforming with snowpark dataframe
|
851
868
|
# state_pandas cannot recognize the datatype of _CATEGORY and _FITTED_CATEGORY column
|
852
869
|
# Therefore, apply the convert_to_string_excluding_nan function to _CATEGORY and _FITTED_CATEGORY
|
853
|
-
|
854
|
-
|
870
|
+
# applymap is depreciated since pandas 2.1.0, replaced by map
|
871
|
+
if pd.__version__ < "2.1.0":
|
872
|
+
state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].applymap(convert_to_string_excluding_nan)
|
873
|
+
state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].applymap(
|
874
|
+
convert_to_string_excluding_nan
|
875
|
+
)
|
876
|
+
else:
|
877
|
+
state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].map(convert_to_string_excluding_nan)
|
878
|
+
state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].map(convert_to_string_excluding_nan)
|
855
879
|
state_df = dataset._session.create_dataframe(state_pandas)
|
856
880
|
|
857
881
|
transformed_dataset = dataset
|
@@ -1009,7 +1033,7 @@ class OneHotEncoder(base.BaseTransformer):
|
|
1009
1033
|
error_code=error_codes.INVALID_ATTRIBUTE,
|
1010
1034
|
original_exception=ValueError(f"Unsupported `categories` value: {self.categories}."),
|
1011
1035
|
)
|
1012
|
-
elif isinstance(self.categories, dict):
|
1036
|
+
elif isinstance(self.categories, (dict, list)):
|
1013
1037
|
if len(self.categories) != len(self.input_cols):
|
1014
1038
|
raise exceptions.SnowflakeMLException(
|
1015
1039
|
error_code=error_codes.INVALID_ATTRIBUTE,
|
@@ -1018,7 +1042,7 @@ class OneHotEncoder(base.BaseTransformer):
|
|
1018
1042
|
f"({len(self.input_cols)})."
|
1019
1043
|
),
|
1020
1044
|
)
|
1021
|
-
elif set(self.categories.keys()) != set(self.input_cols):
|
1045
|
+
elif isinstance(self.categories, dict) and set(self.categories.keys()) != set(self.input_cols):
|
1022
1046
|
raise exceptions.SnowflakeMLException(
|
1023
1047
|
error_code=error_codes.INVALID_ATTRIBUTE,
|
1024
1048
|
original_exception=ValueError(
|
@@ -1537,6 +1561,16 @@ class OneHotEncoder(base.BaseTransformer):
|
|
1537
1561
|
default_sklearn_args = _utils.get_default_args(default_sklearn_obj.__class__.__init__)
|
1538
1562
|
given_args = self.get_params()
|
1539
1563
|
|
1564
|
+
if "categories" in given_args and isinstance(given_args["categories"], dict):
|
1565
|
+
# sklearn requires a list of array-like to satisfy the `categories` arg
|
1566
|
+
try:
|
1567
|
+
given_args["categories"] = [given_args["categories"][input_col] for input_col in self.input_cols]
|
1568
|
+
except KeyError as e:
|
1569
|
+
raise exceptions.SnowflakeMLException(
|
1570
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
1571
|
+
original_exception=e,
|
1572
|
+
)
|
1573
|
+
|
1540
1574
|
# replace 'sparse' with 'sparse_output' when scikit-learn>=1.2
|
1541
1575
|
sklearn_version = sklearn.__version__
|
1542
1576
|
if version.parse(sklearn_version) >= version.parse(_SKLEARN_DEPRECATED_KEYWORD_TO_VERSION_DICT["sparse"]):
|
@@ -45,9 +45,11 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
45
45
|
(https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OrdinalEncoder.html).
|
46
46
|
|
47
47
|
Args:
|
48
|
-
categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]],
|
48
|
+
categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]],
|
49
|
+
default="auto"
|
49
50
|
The string 'auto' (the default) causes the categories to be extracted from the input columns.
|
50
|
-
To specify the categories yourself, pass
|
51
|
+
To specify the categories yourself, pass either (1) a list of ndarrays containing the categories or
|
52
|
+
(2) a dictionary mapping the column name to an ndarray containing the
|
51
53
|
categories.
|
52
54
|
|
53
55
|
handle_unknown: str, default="error"
|
@@ -96,7 +98,7 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
96
98
|
def __init__(
|
97
99
|
self,
|
98
100
|
*,
|
99
|
-
categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]] = "auto",
|
101
|
+
categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]] = "auto",
|
100
102
|
handle_unknown: str = "error",
|
101
103
|
unknown_value: Optional[Union[int, float]] = None,
|
102
104
|
encoded_missing_value: Union[int, float] = np.nan,
|
@@ -114,9 +116,13 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
114
116
|
a single column of integers (0 to n_categories - 1) per feature.
|
115
117
|
|
116
118
|
Args:
|
117
|
-
categories: 'auto' or dict {column_name: ndarray([category])}, default='auto'
|
119
|
+
categories: 'auto', list of array-like, or dict {column_name: ndarray([category])}, default='auto'
|
118
120
|
Categories (unique values) per feature:
|
119
121
|
- 'auto': Determine categories automatically from the training data.
|
122
|
+
- list: ``categories[i]`` holds the categories expected in the ith
|
123
|
+
column. The passed categories should not mix strings and numeric
|
124
|
+
values within a single feature, and should be sorted in case of
|
125
|
+
numeric values.
|
120
126
|
- dict: ``categories[column_name]`` holds the categories expected in
|
121
127
|
the column provided. The passed categories should not mix strings
|
122
128
|
and numeric values within a single feature, and should be sorted in
|
@@ -317,8 +323,19 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
317
323
|
assert found_state_df is not None
|
318
324
|
if self.categories != "auto":
|
319
325
|
state_data = []
|
320
|
-
|
321
|
-
|
326
|
+
if isinstance(self.categories, list):
|
327
|
+
categories_map = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
|
328
|
+
elif isinstance(self.categories, dict):
|
329
|
+
categories_map = self.categories
|
330
|
+
else:
|
331
|
+
raise exceptions.SnowflakeMLException(
|
332
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
333
|
+
original_exception=ValueError(
|
334
|
+
f"Invalid type {type(self.categories)} provided for argument `categories`"
|
335
|
+
),
|
336
|
+
)
|
337
|
+
|
338
|
+
for input_col, cats in categories_map.items():
|
322
339
|
for idx, cat in enumerate(cats.tolist()):
|
323
340
|
state_data.append([input_col, cat, idx])
|
324
341
|
# states of given categories
|
@@ -368,6 +385,8 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
368
385
|
for col_name, cats in grouped_categories.items()
|
369
386
|
}
|
370
387
|
self.categories_ = categories
|
388
|
+
elif isinstance(self.categories, list):
|
389
|
+
self.categories_ = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
|
371
390
|
else:
|
372
391
|
self.categories_ = self.categories
|
373
392
|
|
@@ -548,6 +567,15 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
548
567
|
snowml_only_keywords=_SNOWML_ONLY_KEYWORDS,
|
549
568
|
sklearn_added_keyword_to_version_dict=_SKLEARN_ADDED_KEYWORD_TO_VERSION_DICT,
|
550
569
|
)
|
570
|
+
if "categories" in sklearn_args and isinstance(sklearn_args["categories"], dict):
|
571
|
+
# sklearn requires a list of array-like to satisfy the `categories` arg
|
572
|
+
try:
|
573
|
+
sklearn_args["categories"] = [sklearn_args["categories"][input_col] for input_col in self.input_cols]
|
574
|
+
except KeyError as e:
|
575
|
+
raise exceptions.SnowflakeMLException(
|
576
|
+
error_code=error_codes.INVALID_ARGUMENT,
|
577
|
+
original_exception=e,
|
578
|
+
)
|
551
579
|
return preprocessing.OrdinalEncoder(**sklearn_args)
|
552
580
|
|
553
581
|
def _create_sklearn_object(self) -> preprocessing.OrdinalEncoder:
|
@@ -570,7 +598,7 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
570
598
|
error_code=error_codes.INVALID_ATTRIBUTE,
|
571
599
|
original_exception=ValueError(f"Unsupported `categories` value: {self.categories}."),
|
572
600
|
)
|
573
|
-
elif isinstance(self.categories, dict):
|
601
|
+
elif isinstance(self.categories, (dict, list)):
|
574
602
|
if len(self.categories) != len(self.input_cols):
|
575
603
|
raise exceptions.SnowflakeMLException(
|
576
604
|
error_code=error_codes.INVALID_ATTRIBUTE,
|
@@ -579,7 +607,7 @@ class OrdinalEncoder(base.BaseTransformer):
|
|
579
607
|
f"({len(self.input_cols)})."
|
580
608
|
),
|
581
609
|
)
|
582
|
-
elif set(self.categories.keys()) != set(self.input_cols):
|
610
|
+
elif isinstance(self.categories, dict) and set(self.categories.keys()) != set(self.input_cols):
|
583
611
|
raise exceptions.SnowflakeMLException(
|
584
612
|
error_code=error_codes.INVALID_ATTRIBUTE,
|
585
613
|
original_exception=ValueError(
|
@@ -76,8 +76,10 @@ class PolynomialFeatures(BaseTransformer):
|
|
76
76
|
initialization with the `set_input_cols` method.
|
77
77
|
|
78
78
|
label_cols: Optional[Union[str, List[str]]]
|
79
|
-
|
80
|
-
|
79
|
+
A string or list of strings representing column names that contain labels.
|
80
|
+
Label columns must be specified with this parameter during initialization
|
81
|
+
or with the `set_label_cols` method before fitting.
|
82
|
+
|
81
83
|
output_cols: Optional[Union[str, List[str]]]
|
82
84
|
A string or list of strings representing column names that will store the
|
83
85
|
output of predict and transform operations. The length of output_cols must
|
@@ -4,12 +4,14 @@ from typing import Any, Dict, List, Optional, Union
|
|
4
4
|
import pandas as pd
|
5
5
|
from absl.logging import logging
|
6
6
|
|
7
|
+
from snowflake.ml._internal import telemetry
|
7
8
|
from snowflake.ml._internal.human_readable_id import hrid_generator
|
8
9
|
from snowflake.ml._internal.utils import sql_identifier
|
9
10
|
from snowflake.ml.model import model_signature, type_hints as model_types
|
10
11
|
from snowflake.ml.model._client.model import model_impl, model_version_impl
|
11
12
|
from snowflake.ml.model._client.ops import metadata_ops, model_ops
|
12
13
|
from snowflake.ml.model._model_composer import model_composer
|
14
|
+
from snowflake.ml.model._packager.model_meta import model_meta
|
13
15
|
from snowflake.snowpark import session
|
14
16
|
|
15
17
|
logger = logging.getLogger(__name__)
|
@@ -124,7 +126,10 @@ class ModelManager:
|
|
124
126
|
version_name=version_name_id,
|
125
127
|
statement_params=statement_params,
|
126
128
|
):
|
127
|
-
raise ValueError(
|
129
|
+
raise ValueError(
|
130
|
+
f"Model {model_name} version {version_name} already existed. "
|
131
|
+
+ "To auto-generate `version_name`, skip that argument."
|
132
|
+
)
|
128
133
|
|
129
134
|
stage_path = self._model_ops.prepare_model_stage_path(
|
130
135
|
database_name=database_name_id,
|
@@ -134,8 +139,10 @@ class ModelManager:
|
|
134
139
|
|
135
140
|
logger.info("Start packaging and uploading your model. It might take some time based on the size of the model.")
|
136
141
|
|
137
|
-
mc = model_composer.ModelComposer(
|
138
|
-
|
142
|
+
mc = model_composer.ModelComposer(
|
143
|
+
self._model_ops._session, stage_path=stage_path, statement_params=statement_params
|
144
|
+
)
|
145
|
+
model_metadata: model_meta.ModelMetadata = mc.save(
|
139
146
|
name=model_name_id.resolved(),
|
140
147
|
model=model,
|
141
148
|
signatures=signatures,
|
@@ -147,6 +154,12 @@ class ModelManager:
|
|
147
154
|
ext_modules=ext_modules,
|
148
155
|
options=options,
|
149
156
|
)
|
157
|
+
statement_params = telemetry.add_statement_params_custom_tags(
|
158
|
+
statement_params, model_metadata.telemetry_metadata()
|
159
|
+
)
|
160
|
+
statement_params = telemetry.add_statement_params_custom_tags(
|
161
|
+
statement_params, {"model_version_name": version_name_id}
|
162
|
+
)
|
150
163
|
|
151
164
|
logger.info("Start creating MODEL object for you in the Snowflake.")
|
152
165
|
|
snowflake/ml/version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
VERSION="1.5.
|
1
|
+
VERSION="1.5.4"
|