snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +3 -3
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/telemetry.py +11 -2
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/feature_store/feature_store.py +15 -106
- snowflake/ml/fileset/sfcfs.py +4 -3
- snowflake/ml/fileset/stage_fs.py +18 -0
- snowflake/ml/model/_api.py +9 -9
- snowflake/ml/model/_client/model/model_version_impl.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
- snowflake/ml/model/_model_composer/model_composer.py +10 -8
- snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
- snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
- snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
- snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_packager.py +8 -6
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/model_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
- snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
- snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
- snowflake/ml/modeling/cluster/birch.py +33 -61
- snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
- snowflake/ml/modeling/cluster/dbscan.py +33 -61
- snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
- snowflake/ml/modeling/cluster/k_means.py +33 -61
- snowflake/ml/modeling/cluster/mean_shift.py +33 -61
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
- snowflake/ml/modeling/cluster/optics.py +33 -61
- snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
- snowflake/ml/modeling/compose/column_transformer.py +33 -61
- snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
- snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
- snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
- snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
- snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
- snowflake/ml/modeling/covariance/oas.py +33 -61
- snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
- snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
- snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
- snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
- snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/pca.py +33 -61
- snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
- snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
- snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
- snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
- snowflake/ml/modeling/framework/base.py +55 -5
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
- snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
- snowflake/ml/modeling/impute/knn_imputer.py +33 -61
- snowflake/ml/modeling/impute/missing_indicator.py +33 -61
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
- snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/lars.py +33 -61
- snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
- snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/perceptron.py +33 -61
- snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ridge.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
- snowflake/ml/modeling/manifold/isomap.py +33 -61
- snowflake/ml/modeling/manifold/mds.py +33 -61
- snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
- snowflake/ml/modeling/manifold/tsne.py +33 -61
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
- snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
- snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
- snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
- snowflake/ml/modeling/svm/linear_svc.py +33 -61
- snowflake/ml/modeling/svm/linear_svr.py +33 -61
- snowflake/ml/modeling/svm/nu_svc.py +33 -61
- snowflake/ml/modeling/svm/nu_svr.py +33 -61
- snowflake/ml/modeling/svm/svc.py +33 -61
- snowflake/ml/modeling/svm/svr.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
- snowflake/ml/registry/_manager/model_manager.py +6 -2
- snowflake/ml/registry/model_registry.py +100 -27
- snowflake/ml/registry/registry.py +6 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -305,18 +305,24 @@ class KernelRidge(BaseTransformer):
|
|
305
305
|
self._get_model_signatures(dataset)
|
306
306
|
return self
|
307
307
|
|
308
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
309
|
-
if self._drop_input_cols:
|
310
|
-
return []
|
311
|
-
else:
|
312
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
313
|
-
|
314
308
|
def _batch_inference_validate_snowpark(
|
315
309
|
self,
|
316
310
|
dataset: DataFrame,
|
317
311
|
inference_method: str,
|
318
312
|
) -> List[str]:
|
319
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
313
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
314
|
+
return the available package that exists in the snowflake anaconda channel
|
315
|
+
|
316
|
+
Args:
|
317
|
+
dataset: snowpark dataframe
|
318
|
+
inference_method: the inference method such as predict, score...
|
319
|
+
|
320
|
+
Raises:
|
321
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
322
|
+
SnowflakeMLException: If the session is None, raise error
|
323
|
+
|
324
|
+
Returns:
|
325
|
+
A list of available package that exists in the snowflake anaconda channel
|
320
326
|
"""
|
321
327
|
if not self._is_fitted:
|
322
328
|
raise exceptions.SnowflakeMLException(
|
@@ -390,7 +396,7 @@ class KernelRidge(BaseTransformer):
|
|
390
396
|
transform_kwargs = dict(
|
391
397
|
session = dataset._session,
|
392
398
|
dependencies = self._deps,
|
393
|
-
|
399
|
+
drop_input_cols = self._drop_input_cols,
|
394
400
|
expected_output_cols_type = expected_type_inferred,
|
395
401
|
)
|
396
402
|
|
@@ -450,16 +456,16 @@ class KernelRidge(BaseTransformer):
|
|
450
456
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
451
457
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
452
458
|
# each row containing a list of values.
|
453
|
-
expected_dtype = "
|
459
|
+
expected_dtype = "array"
|
454
460
|
|
455
461
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
456
462
|
if expected_dtype == "":
|
457
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
463
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
458
464
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
459
|
-
expected_dtype = "
|
460
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
465
|
+
expected_dtype = "array"
|
466
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
461
467
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
462
|
-
expected_dtype = "
|
468
|
+
expected_dtype = "array"
|
463
469
|
else:
|
464
470
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
465
471
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -477,7 +483,7 @@ class KernelRidge(BaseTransformer):
|
|
477
483
|
transform_kwargs = dict(
|
478
484
|
session = dataset._session,
|
479
485
|
dependencies = self._deps,
|
480
|
-
|
486
|
+
drop_input_cols = self._drop_input_cols,
|
481
487
|
expected_output_cols_type = expected_dtype,
|
482
488
|
)
|
483
489
|
|
@@ -528,7 +534,7 @@ class KernelRidge(BaseTransformer):
|
|
528
534
|
subproject=_SUBPROJECT,
|
529
535
|
)
|
530
536
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
531
|
-
|
537
|
+
drop_input_cols=self._drop_input_cols,
|
532
538
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
533
539
|
)
|
534
540
|
self._sklearn_object = fitted_estimator
|
@@ -546,44 +552,6 @@ class KernelRidge(BaseTransformer):
|
|
546
552
|
assert self._sklearn_object is not None
|
547
553
|
return self._sklearn_object.embedding_
|
548
554
|
|
549
|
-
|
550
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
551
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
552
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
553
|
-
"""
|
554
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
555
|
-
if output_cols:
|
556
|
-
output_cols = [
|
557
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
558
|
-
for c in output_cols
|
559
|
-
]
|
560
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
561
|
-
output_cols = [output_cols_prefix]
|
562
|
-
elif self._sklearn_object is not None:
|
563
|
-
classes = self._sklearn_object.classes_
|
564
|
-
if isinstance(classes, numpy.ndarray):
|
565
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
566
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
567
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
568
|
-
output_cols = []
|
569
|
-
for i, cl in enumerate(classes):
|
570
|
-
# For binary classification, there is only one output column for each class
|
571
|
-
# ndarray as the two classes are complementary.
|
572
|
-
if len(cl) == 2:
|
573
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
574
|
-
else:
|
575
|
-
output_cols.extend([
|
576
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
577
|
-
])
|
578
|
-
else:
|
579
|
-
output_cols = []
|
580
|
-
|
581
|
-
# Make sure column names are valid snowflake identifiers.
|
582
|
-
assert output_cols is not None # Make MyPy happy
|
583
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
584
|
-
|
585
|
-
return rv
|
586
|
-
|
587
555
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
588
556
|
@telemetry.send_api_usage_telemetry(
|
589
557
|
project=_PROJECT,
|
@@ -623,7 +591,7 @@ class KernelRidge(BaseTransformer):
|
|
623
591
|
transform_kwargs = dict(
|
624
592
|
session=dataset._session,
|
625
593
|
dependencies=self._deps,
|
626
|
-
|
594
|
+
drop_input_cols = self._drop_input_cols,
|
627
595
|
expected_output_cols_type="float",
|
628
596
|
)
|
629
597
|
|
@@ -688,7 +656,7 @@ class KernelRidge(BaseTransformer):
|
|
688
656
|
transform_kwargs = dict(
|
689
657
|
session=dataset._session,
|
690
658
|
dependencies=self._deps,
|
691
|
-
|
659
|
+
drop_input_cols = self._drop_input_cols,
|
692
660
|
expected_output_cols_type="float",
|
693
661
|
)
|
694
662
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -749,7 +717,7 @@ class KernelRidge(BaseTransformer):
|
|
749
717
|
transform_kwargs = dict(
|
750
718
|
session=dataset._session,
|
751
719
|
dependencies=self._deps,
|
752
|
-
|
720
|
+
drop_input_cols = self._drop_input_cols,
|
753
721
|
expected_output_cols_type="float",
|
754
722
|
)
|
755
723
|
|
@@ -814,7 +782,7 @@ class KernelRidge(BaseTransformer):
|
|
814
782
|
transform_kwargs = dict(
|
815
783
|
session=dataset._session,
|
816
784
|
dependencies=self._deps,
|
817
|
-
|
785
|
+
drop_input_cols = self._drop_input_cols,
|
818
786
|
expected_output_cols_type="float",
|
819
787
|
)
|
820
788
|
|
@@ -870,13 +838,17 @@ class KernelRidge(BaseTransformer):
|
|
870
838
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
871
839
|
|
872
840
|
if isinstance(dataset, DataFrame):
|
841
|
+
self._deps = self._batch_inference_validate_snowpark(
|
842
|
+
dataset=dataset,
|
843
|
+
inference_method="score",
|
844
|
+
)
|
873
845
|
selected_cols = self._get_active_columns()
|
874
846
|
if len(selected_cols) > 0:
|
875
847
|
dataset = dataset.select(selected_cols)
|
876
848
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
877
849
|
transform_kwargs = dict(
|
878
850
|
session=dataset._session,
|
879
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
851
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
880
852
|
score_sproc_imports=['sklearn'],
|
881
853
|
)
|
882
854
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -950,9 +922,9 @@ class KernelRidge(BaseTransformer):
|
|
950
922
|
transform_kwargs = dict(
|
951
923
|
session = dataset._session,
|
952
924
|
dependencies = self._deps,
|
953
|
-
|
954
|
-
expected_output_cols_type
|
955
|
-
n_neighbors =
|
925
|
+
drop_input_cols = self._drop_input_cols,
|
926
|
+
expected_output_cols_type="array",
|
927
|
+
n_neighbors = n_neighbors,
|
956
928
|
return_distance = return_distance
|
957
929
|
)
|
958
930
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -16,6 +16,7 @@ from numpy import typing as npt
|
|
16
16
|
|
17
17
|
|
18
18
|
import numpy
|
19
|
+
import sklearn
|
19
20
|
import lightgbm
|
20
21
|
from sklearn.utils.metaestimators import available_if
|
21
22
|
|
@@ -160,7 +161,7 @@ class LGBMClassifier(BaseTransformer):
|
|
160
161
|
self.set_sample_weight_col(sample_weight_col)
|
161
162
|
self._use_external_memory_version = False
|
162
163
|
self._batch_size = -1
|
163
|
-
deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'])
|
164
|
+
deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}', f'scikit-learn=={sklearn.__version__}'])
|
164
165
|
|
165
166
|
self._deps = list(deps)
|
166
167
|
|
@@ -293,18 +294,24 @@ class LGBMClassifier(BaseTransformer):
|
|
293
294
|
self._get_model_signatures(dataset)
|
294
295
|
return self
|
295
296
|
|
296
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
297
|
-
if self._drop_input_cols:
|
298
|
-
return []
|
299
|
-
else:
|
300
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
301
|
-
|
302
297
|
def _batch_inference_validate_snowpark(
|
303
298
|
self,
|
304
299
|
dataset: DataFrame,
|
305
300
|
inference_method: str,
|
306
301
|
) -> List[str]:
|
307
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
302
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
303
|
+
return the available package that exists in the snowflake anaconda channel
|
304
|
+
|
305
|
+
Args:
|
306
|
+
dataset: snowpark dataframe
|
307
|
+
inference_method: the inference method such as predict, score...
|
308
|
+
|
309
|
+
Raises:
|
310
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
311
|
+
SnowflakeMLException: If the session is None, raise error
|
312
|
+
|
313
|
+
Returns:
|
314
|
+
A list of available package that exists in the snowflake anaconda channel
|
308
315
|
"""
|
309
316
|
if not self._is_fitted:
|
310
317
|
raise exceptions.SnowflakeMLException(
|
@@ -378,7 +385,7 @@ class LGBMClassifier(BaseTransformer):
|
|
378
385
|
transform_kwargs = dict(
|
379
386
|
session = dataset._session,
|
380
387
|
dependencies = self._deps,
|
381
|
-
|
388
|
+
drop_input_cols = self._drop_input_cols,
|
382
389
|
expected_output_cols_type = expected_type_inferred,
|
383
390
|
)
|
384
391
|
|
@@ -438,16 +445,16 @@ class LGBMClassifier(BaseTransformer):
|
|
438
445
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
439
446
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
440
447
|
# each row containing a list of values.
|
441
|
-
expected_dtype = "
|
448
|
+
expected_dtype = "array"
|
442
449
|
|
443
450
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
444
451
|
if expected_dtype == "":
|
445
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
452
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
446
453
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
447
|
-
expected_dtype = "
|
448
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
454
|
+
expected_dtype = "array"
|
455
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
449
456
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
450
|
-
expected_dtype = "
|
457
|
+
expected_dtype = "array"
|
451
458
|
else:
|
452
459
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
453
460
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -465,7 +472,7 @@ class LGBMClassifier(BaseTransformer):
|
|
465
472
|
transform_kwargs = dict(
|
466
473
|
session = dataset._session,
|
467
474
|
dependencies = self._deps,
|
468
|
-
|
475
|
+
drop_input_cols = self._drop_input_cols,
|
469
476
|
expected_output_cols_type = expected_dtype,
|
470
477
|
)
|
471
478
|
|
@@ -516,7 +523,7 @@ class LGBMClassifier(BaseTransformer):
|
|
516
523
|
subproject=_SUBPROJECT,
|
517
524
|
)
|
518
525
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
519
|
-
|
526
|
+
drop_input_cols=self._drop_input_cols,
|
520
527
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
521
528
|
)
|
522
529
|
self._sklearn_object = fitted_estimator
|
@@ -534,44 +541,6 @@ class LGBMClassifier(BaseTransformer):
|
|
534
541
|
assert self._sklearn_object is not None
|
535
542
|
return self._sklearn_object.embedding_
|
536
543
|
|
537
|
-
|
538
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
539
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
540
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
541
|
-
"""
|
542
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
543
|
-
if output_cols:
|
544
|
-
output_cols = [
|
545
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
546
|
-
for c in output_cols
|
547
|
-
]
|
548
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
549
|
-
output_cols = [output_cols_prefix]
|
550
|
-
elif self._sklearn_object is not None:
|
551
|
-
classes = self._sklearn_object.classes_
|
552
|
-
if isinstance(classes, numpy.ndarray):
|
553
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
554
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
555
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
556
|
-
output_cols = []
|
557
|
-
for i, cl in enumerate(classes):
|
558
|
-
# For binary classification, there is only one output column for each class
|
559
|
-
# ndarray as the two classes are complementary.
|
560
|
-
if len(cl) == 2:
|
561
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
562
|
-
else:
|
563
|
-
output_cols.extend([
|
564
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
565
|
-
])
|
566
|
-
else:
|
567
|
-
output_cols = []
|
568
|
-
|
569
|
-
# Make sure column names are valid snowflake identifiers.
|
570
|
-
assert output_cols is not None # Make MyPy happy
|
571
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
572
|
-
|
573
|
-
return rv
|
574
|
-
|
575
544
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
576
545
|
@telemetry.send_api_usage_telemetry(
|
577
546
|
project=_PROJECT,
|
@@ -613,7 +582,7 @@ class LGBMClassifier(BaseTransformer):
|
|
613
582
|
transform_kwargs = dict(
|
614
583
|
session=dataset._session,
|
615
584
|
dependencies=self._deps,
|
616
|
-
|
585
|
+
drop_input_cols = self._drop_input_cols,
|
617
586
|
expected_output_cols_type="float",
|
618
587
|
)
|
619
588
|
|
@@ -680,7 +649,7 @@ class LGBMClassifier(BaseTransformer):
|
|
680
649
|
transform_kwargs = dict(
|
681
650
|
session=dataset._session,
|
682
651
|
dependencies=self._deps,
|
683
|
-
|
652
|
+
drop_input_cols = self._drop_input_cols,
|
684
653
|
expected_output_cols_type="float",
|
685
654
|
)
|
686
655
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -741,7 +710,7 @@ class LGBMClassifier(BaseTransformer):
|
|
741
710
|
transform_kwargs = dict(
|
742
711
|
session=dataset._session,
|
743
712
|
dependencies=self._deps,
|
744
|
-
|
713
|
+
drop_input_cols = self._drop_input_cols,
|
745
714
|
expected_output_cols_type="float",
|
746
715
|
)
|
747
716
|
|
@@ -806,7 +775,7 @@ class LGBMClassifier(BaseTransformer):
|
|
806
775
|
transform_kwargs = dict(
|
807
776
|
session=dataset._session,
|
808
777
|
dependencies=self._deps,
|
809
|
-
|
778
|
+
drop_input_cols = self._drop_input_cols,
|
810
779
|
expected_output_cols_type="float",
|
811
780
|
)
|
812
781
|
|
@@ -862,14 +831,18 @@ class LGBMClassifier(BaseTransformer):
|
|
862
831
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
863
832
|
|
864
833
|
if isinstance(dataset, DataFrame):
|
834
|
+
self._deps = self._batch_inference_validate_snowpark(
|
835
|
+
dataset=dataset,
|
836
|
+
inference_method="score",
|
837
|
+
)
|
865
838
|
selected_cols = self._get_active_columns()
|
866
839
|
if len(selected_cols) > 0:
|
867
840
|
dataset = dataset.select(selected_cols)
|
868
841
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
869
842
|
transform_kwargs = dict(
|
870
843
|
session=dataset._session,
|
871
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
872
|
-
score_sproc_imports=['lightgbm'],
|
844
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
845
|
+
score_sproc_imports=['lightgbm', 'sklearn'],
|
873
846
|
)
|
874
847
|
elif isinstance(dataset, pd.DataFrame):
|
875
848
|
# pandas_handler.score() does not require any extra kwargs.
|
@@ -942,9 +915,9 @@ class LGBMClassifier(BaseTransformer):
|
|
942
915
|
transform_kwargs = dict(
|
943
916
|
session = dataset._session,
|
944
917
|
dependencies = self._deps,
|
945
|
-
|
946
|
-
expected_output_cols_type
|
947
|
-
n_neighbors =
|
918
|
+
drop_input_cols = self._drop_input_cols,
|
919
|
+
expected_output_cols_type="array",
|
920
|
+
n_neighbors = n_neighbors,
|
948
921
|
return_distance = return_distance
|
949
922
|
)
|
950
923
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -16,6 +16,7 @@ from numpy import typing as npt
|
|
16
16
|
|
17
17
|
|
18
18
|
import numpy
|
19
|
+
import sklearn
|
19
20
|
import lightgbm
|
20
21
|
from sklearn.utils.metaestimators import available_if
|
21
22
|
|
@@ -160,7 +161,7 @@ class LGBMRegressor(BaseTransformer):
|
|
160
161
|
self.set_sample_weight_col(sample_weight_col)
|
161
162
|
self._use_external_memory_version = False
|
162
163
|
self._batch_size = -1
|
163
|
-
deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'])
|
164
|
+
deps: Set[str] = set([f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}', f'scikit-learn=={sklearn.__version__}'])
|
164
165
|
|
165
166
|
self._deps = list(deps)
|
166
167
|
|
@@ -293,18 +294,24 @@ class LGBMRegressor(BaseTransformer):
|
|
293
294
|
self._get_model_signatures(dataset)
|
294
295
|
return self
|
295
296
|
|
296
|
-
def _get_pass_through_columns(self, dataset: DataFrame) -> List[str]:
|
297
|
-
if self._drop_input_cols:
|
298
|
-
return []
|
299
|
-
else:
|
300
|
-
return list(set(dataset.columns) - set(self.output_cols))
|
301
|
-
|
302
297
|
def _batch_inference_validate_snowpark(
|
303
298
|
self,
|
304
299
|
dataset: DataFrame,
|
305
300
|
inference_method: str,
|
306
301
|
) -> List[str]:
|
307
|
-
"""Util method to run validate that batch inference can be run on a snowpark dataframe
|
302
|
+
"""Util method to run validate that batch inference can be run on a snowpark dataframe and
|
303
|
+
return the available package that exists in the snowflake anaconda channel
|
304
|
+
|
305
|
+
Args:
|
306
|
+
dataset: snowpark dataframe
|
307
|
+
inference_method: the inference method such as predict, score...
|
308
|
+
|
309
|
+
Raises:
|
310
|
+
SnowflakeMLException: If the estimator is not fitted, raise error
|
311
|
+
SnowflakeMLException: If the session is None, raise error
|
312
|
+
|
313
|
+
Returns:
|
314
|
+
A list of available package that exists in the snowflake anaconda channel
|
308
315
|
"""
|
309
316
|
if not self._is_fitted:
|
310
317
|
raise exceptions.SnowflakeMLException(
|
@@ -378,7 +385,7 @@ class LGBMRegressor(BaseTransformer):
|
|
378
385
|
transform_kwargs = dict(
|
379
386
|
session = dataset._session,
|
380
387
|
dependencies = self._deps,
|
381
|
-
|
388
|
+
drop_input_cols = self._drop_input_cols,
|
382
389
|
expected_output_cols_type = expected_type_inferred,
|
383
390
|
)
|
384
391
|
|
@@ -438,16 +445,16 @@ class LGBMRegressor(BaseTransformer):
|
|
438
445
|
# from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes) (and everything in between)
|
439
446
|
# based on init param values. We will convert that to pandas dataframe of shape (n_samples, 1) with
|
440
447
|
# each row containing a list of values.
|
441
|
-
expected_dtype = "
|
448
|
+
expected_dtype = "array"
|
442
449
|
|
443
450
|
# If we were unable to assign a type to this transform in the factory, infer the type here.
|
444
451
|
if expected_dtype == "":
|
445
|
-
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "
|
452
|
+
# If this is a clustering transformer, if the number of output columns does not equal the number of clusters the response will be an "array"
|
446
453
|
if hasattr(self._sklearn_object, "n_clusters") and getattr(self._sklearn_object, "n_clusters") != len(self.output_cols):
|
447
|
-
expected_dtype = "
|
448
|
-
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "
|
454
|
+
expected_dtype = "array"
|
455
|
+
# If this is a decomposition transformer, if the number of output columns does not equal the number of components the response will be an "array"
|
449
456
|
elif hasattr(self._sklearn_object, "n_components") and getattr(self._sklearn_object, "n_components") != len(self.output_cols):
|
450
|
-
expected_dtype = "
|
457
|
+
expected_dtype = "array"
|
451
458
|
else:
|
452
459
|
output_types = [signature.as_snowpark_type() for signature in _infer_signature(dataset[self.input_cols], "output", use_snowflake_identifiers=True)]
|
453
460
|
# We can only infer the output types from the input types if the following two statemetns are true:
|
@@ -465,7 +472,7 @@ class LGBMRegressor(BaseTransformer):
|
|
465
472
|
transform_kwargs = dict(
|
466
473
|
session = dataset._session,
|
467
474
|
dependencies = self._deps,
|
468
|
-
|
475
|
+
drop_input_cols = self._drop_input_cols,
|
469
476
|
expected_output_cols_type = expected_dtype,
|
470
477
|
)
|
471
478
|
|
@@ -516,7 +523,7 @@ class LGBMRegressor(BaseTransformer):
|
|
516
523
|
subproject=_SUBPROJECT,
|
517
524
|
)
|
518
525
|
output_result, fitted_estimator = model_trainer.train_fit_predict(
|
519
|
-
|
526
|
+
drop_input_cols=self._drop_input_cols,
|
520
527
|
expected_output_cols_list=self.output_cols if self.output_cols else self._get_output_column_names(output_cols_prefix),
|
521
528
|
)
|
522
529
|
self._sklearn_object = fitted_estimator
|
@@ -534,44 +541,6 @@ class LGBMRegressor(BaseTransformer):
|
|
534
541
|
assert self._sklearn_object is not None
|
535
542
|
return self._sklearn_object.embedding_
|
536
543
|
|
537
|
-
|
538
|
-
def _get_output_column_names(self, output_cols_prefix: str, output_cols: Optional[List[str]] = None) -> List[str]:
|
539
|
-
""" Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
|
540
|
-
Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
|
541
|
-
"""
|
542
|
-
output_cols_prefix = identifier.resolve_identifier(output_cols_prefix)
|
543
|
-
if output_cols:
|
544
|
-
output_cols = [
|
545
|
-
identifier.concat_names([output_cols_prefix, identifier.resolve_identifier(c)])
|
546
|
-
for c in output_cols
|
547
|
-
]
|
548
|
-
elif getattr(self._sklearn_object, "classes_", None) is None:
|
549
|
-
output_cols = [output_cols_prefix]
|
550
|
-
elif self._sklearn_object is not None:
|
551
|
-
classes = self._sklearn_object.classes_
|
552
|
-
if isinstance(classes, numpy.ndarray):
|
553
|
-
output_cols = [f'{output_cols_prefix}{str(c)}' for c in classes.tolist()]
|
554
|
-
elif isinstance(classes, list) and len(classes) > 0 and isinstance(classes[0], numpy.ndarray):
|
555
|
-
# If the estimator is a multioutput estimator, classes_ will be a list of ndarrays.
|
556
|
-
output_cols = []
|
557
|
-
for i, cl in enumerate(classes):
|
558
|
-
# For binary classification, there is only one output column for each class
|
559
|
-
# ndarray as the two classes are complementary.
|
560
|
-
if len(cl) == 2:
|
561
|
-
output_cols.append(f'{output_cols_prefix}{i}_{cl[0]}')
|
562
|
-
else:
|
563
|
-
output_cols.extend([
|
564
|
-
f'{output_cols_prefix}{i}_{c}' for c in cl.tolist()
|
565
|
-
])
|
566
|
-
else:
|
567
|
-
output_cols = []
|
568
|
-
|
569
|
-
# Make sure column names are valid snowflake identifiers.
|
570
|
-
assert output_cols is not None # Make MyPy happy
|
571
|
-
rv = [identifier.rename_to_valid_snowflake_identifier(c) for c in output_cols]
|
572
|
-
|
573
|
-
return rv
|
574
|
-
|
575
544
|
@available_if(original_estimator_has_callable("predict_proba")) # type: ignore[misc]
|
576
545
|
@telemetry.send_api_usage_telemetry(
|
577
546
|
project=_PROJECT,
|
@@ -611,7 +580,7 @@ class LGBMRegressor(BaseTransformer):
|
|
611
580
|
transform_kwargs = dict(
|
612
581
|
session=dataset._session,
|
613
582
|
dependencies=self._deps,
|
614
|
-
|
583
|
+
drop_input_cols = self._drop_input_cols,
|
615
584
|
expected_output_cols_type="float",
|
616
585
|
)
|
617
586
|
|
@@ -676,7 +645,7 @@ class LGBMRegressor(BaseTransformer):
|
|
676
645
|
transform_kwargs = dict(
|
677
646
|
session=dataset._session,
|
678
647
|
dependencies=self._deps,
|
679
|
-
|
648
|
+
drop_input_cols = self._drop_input_cols,
|
680
649
|
expected_output_cols_type="float",
|
681
650
|
)
|
682
651
|
elif isinstance(dataset, pd.DataFrame):
|
@@ -737,7 +706,7 @@ class LGBMRegressor(BaseTransformer):
|
|
737
706
|
transform_kwargs = dict(
|
738
707
|
session=dataset._session,
|
739
708
|
dependencies=self._deps,
|
740
|
-
|
709
|
+
drop_input_cols = self._drop_input_cols,
|
741
710
|
expected_output_cols_type="float",
|
742
711
|
)
|
743
712
|
|
@@ -802,7 +771,7 @@ class LGBMRegressor(BaseTransformer):
|
|
802
771
|
transform_kwargs = dict(
|
803
772
|
session=dataset._session,
|
804
773
|
dependencies=self._deps,
|
805
|
-
|
774
|
+
drop_input_cols = self._drop_input_cols,
|
806
775
|
expected_output_cols_type="float",
|
807
776
|
)
|
808
777
|
|
@@ -858,14 +827,18 @@ class LGBMRegressor(BaseTransformer):
|
|
858
827
|
transform_kwargs: ScoreKwargsTypedDict = dict()
|
859
828
|
|
860
829
|
if isinstance(dataset, DataFrame):
|
830
|
+
self._deps = self._batch_inference_validate_snowpark(
|
831
|
+
dataset=dataset,
|
832
|
+
inference_method="score",
|
833
|
+
)
|
861
834
|
selected_cols = self._get_active_columns()
|
862
835
|
if len(selected_cols) > 0:
|
863
836
|
dataset = dataset.select(selected_cols)
|
864
837
|
assert isinstance(dataset._session, Session) # keep mypy happy
|
865
838
|
transform_kwargs = dict(
|
866
839
|
session=dataset._session,
|
867
|
-
dependencies=["snowflake-snowpark-python"] + self.
|
868
|
-
score_sproc_imports=['lightgbm'],
|
840
|
+
dependencies=["snowflake-snowpark-python"] + self._deps,
|
841
|
+
score_sproc_imports=['lightgbm', 'sklearn'],
|
869
842
|
)
|
870
843
|
elif isinstance(dataset, pd.DataFrame):
|
871
844
|
# pandas_handler.score() does not require any extra kwargs.
|
@@ -938,9 +911,9 @@ class LGBMRegressor(BaseTransformer):
|
|
938
911
|
transform_kwargs = dict(
|
939
912
|
session = dataset._session,
|
940
913
|
dependencies = self._deps,
|
941
|
-
|
942
|
-
expected_output_cols_type
|
943
|
-
n_neighbors =
|
914
|
+
drop_input_cols = self._drop_input_cols,
|
915
|
+
expected_output_cols_type="array",
|
916
|
+
n_neighbors = n_neighbors,
|
944
917
|
return_distance = return_distance
|
945
918
|
)
|
946
919
|
elif isinstance(dataset, pd.DataFrame):
|