snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +8 -35
- snowflake/ml/_internal/utils/identifier.py +74 -7
- snowflake/ml/model/_core_requirements.py +1 -1
- snowflake/ml/model/_deploy_client/warehouse/deploy.py +5 -26
- snowflake/ml/model/_deploy_client/warehouse/infer_template.py +2 -2
- snowflake/ml/model/_handlers/_base.py +3 -1
- snowflake/ml/model/_handlers/sklearn.py +1 -0
- snowflake/ml/model/_handlers/xgboost.py +1 -1
- snowflake/ml/model/_model.py +24 -19
- snowflake/ml/model/_model_meta.py +24 -15
- snowflake/ml/model/type_hints.py +5 -11
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +28 -17
- snowflake/ml/modeling/cluster/affinity_propagation.py +28 -17
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +28 -17
- snowflake/ml/modeling/cluster/birch.py +28 -17
- snowflake/ml/modeling/cluster/bisecting_k_means.py +28 -17
- snowflake/ml/modeling/cluster/dbscan.py +28 -17
- snowflake/ml/modeling/cluster/feature_agglomeration.py +28 -17
- snowflake/ml/modeling/cluster/k_means.py +28 -17
- snowflake/ml/modeling/cluster/mean_shift.py +28 -17
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +28 -17
- snowflake/ml/modeling/cluster/optics.py +28 -17
- snowflake/ml/modeling/cluster/spectral_biclustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_clustering.py +28 -17
- snowflake/ml/modeling/cluster/spectral_coclustering.py +28 -17
- snowflake/ml/modeling/compose/column_transformer.py +28 -17
- snowflake/ml/modeling/compose/transformed_target_regressor.py +28 -17
- snowflake/ml/modeling/covariance/elliptic_envelope.py +28 -17
- snowflake/ml/modeling/covariance/empirical_covariance.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso.py +28 -17
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +28 -17
- snowflake/ml/modeling/covariance/ledoit_wolf.py +28 -17
- snowflake/ml/modeling/covariance/min_cov_det.py +28 -17
- snowflake/ml/modeling/covariance/oas.py +28 -17
- snowflake/ml/modeling/covariance/shrunk_covariance.py +28 -17
- snowflake/ml/modeling/decomposition/dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/factor_analysis.py +28 -17
- snowflake/ml/modeling/decomposition/fast_ica.py +28 -17
- snowflake/ml/modeling/decomposition/incremental_pca.py +28 -17
- snowflake/ml/modeling/decomposition/kernel_pca.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +28 -17
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/pca.py +28 -17
- snowflake/ml/modeling/decomposition/sparse_pca.py +28 -17
- snowflake/ml/modeling/decomposition/truncated_svd.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/bagging_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/isolation_forest.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/stacking_regressor.py +28 -17
- snowflake/ml/modeling/ensemble/voting_classifier.py +28 -17
- snowflake/ml/modeling/ensemble/voting_regressor.py +28 -17
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fdr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fpr.py +28 -17
- snowflake/ml/modeling/feature_selection/select_fwe.py +28 -17
- snowflake/ml/modeling/feature_selection/select_k_best.py +28 -17
- snowflake/ml/modeling/feature_selection/select_percentile.py +28 -17
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +28 -17
- snowflake/ml/modeling/feature_selection/variance_threshold.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +28 -17
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +28 -17
- snowflake/ml/modeling/impute/iterative_imputer.py +28 -17
- snowflake/ml/modeling/impute/knn_imputer.py +28 -17
- snowflake/ml/modeling/impute/missing_indicator.py +28 -17
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/nystroem.py +28 -17
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +28 -17
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +28 -17
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +28 -17
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +28 -17
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ard_regression.py +28 -17
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/gamma_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/huber_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/lars.py +28 -17
- snowflake/ml/modeling/linear_model/lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +28 -17
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +28 -17
- snowflake/ml/modeling/linear_model/linear_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -17
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +28 -17
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +28 -17
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/perceptron.py +28 -17
- snowflake/ml/modeling/linear_model/poisson_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ransac_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/ridge.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +28 -17
- snowflake/ml/modeling/linear_model/ridge_cv.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_classifier.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +28 -17
- snowflake/ml/modeling/linear_model/sgd_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +28 -17
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +28 -17
- snowflake/ml/modeling/manifold/isomap.py +28 -17
- snowflake/ml/modeling/manifold/mds.py +28 -17
- snowflake/ml/modeling/manifold/spectral_embedding.py +28 -17
- snowflake/ml/modeling/manifold/tsne.py +28 -17
- snowflake/ml/modeling/metrics/classification.py +6 -1
- snowflake/ml/modeling/metrics/regression.py +517 -9
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +28 -17
- snowflake/ml/modeling/mixture/gaussian_mixture.py +28 -17
- snowflake/ml/modeling/model_selection/grid_search_cv.py +28 -17
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +28 -17
- snowflake/ml/modeling/multiclass/output_code_classifier.py +28 -17
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/complement_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +28 -17
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neighbors/kernel_density.py +28 -17
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_centroid.py +28 -17
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +28 -17
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +28 -17
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +28 -17
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_classifier.py +28 -17
- snowflake/ml/modeling/neural_network/mlp_regressor.py +28 -17
- snowflake/ml/modeling/pipeline/pipeline.py +24 -0
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
- snowflake/ml/modeling/preprocessing/polynomial_features.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_propagation.py +28 -17
- snowflake/ml/modeling/semi_supervised/label_spreading.py +28 -17
- snowflake/ml/modeling/svm/linear_svc.py +28 -17
- snowflake/ml/modeling/svm/linear_svr.py +28 -17
- snowflake/ml/modeling/svm/nu_svc.py +28 -17
- snowflake/ml/modeling/svm/nu_svr.py +28 -17
- snowflake/ml/modeling/svm/svc.py +28 -17
- snowflake/ml/modeling/svm/svr.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/decision_tree_regressor.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -17
- snowflake/ml/modeling/tree/extra_tree_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgb_regressor.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +28 -17
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +28 -17
- snowflake/ml/registry/model_registry.py +49 -65
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/METADATA +24 -1
- snowflake_ml_python-1.0.2.dist-info/RECORD +246 -0
- snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
- {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.2.dist-info}/WHEEL +0 -0
@@ -798,26 +798,37 @@ class MLPRegressor(BaseTransformer):
|
|
798
798
|
# input cols need to match unquoted / quoted
|
799
799
|
input_cols = self.input_cols
|
800
800
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
801
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
801
802
|
|
802
803
|
estimator = self._sklearn_object
|
803
804
|
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
805
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
806
|
+
missing_features = []
|
807
|
+
features_in_dataset = set(dataset.columns)
|
808
|
+
columns_to_select = []
|
809
|
+
for i, f in enumerate(features_required_by_estimator):
|
810
|
+
if (
|
811
|
+
i >= len(input_cols)
|
812
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
813
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
814
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
815
|
+
):
|
816
|
+
missing_features.append(f)
|
817
|
+
elif input_cols[i] in features_in_dataset:
|
818
|
+
columns_to_select.append(input_cols[i])
|
819
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
820
|
+
columns_to_select.append(unquoted_input_cols[i])
|
821
|
+
else:
|
822
|
+
columns_to_select.append(quoted_input_cols[i])
|
823
|
+
|
824
|
+
if len(missing_features) > 0:
|
825
|
+
raise ValueError(
|
826
|
+
"The feature names should match with those that were passed during fit.\n"
|
827
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
828
|
+
f"Features in the input dataframe : {input_cols}\n"
|
829
|
+
)
|
830
|
+
input_df = dataset[columns_to_select]
|
831
|
+
input_df.columns = features_required_by_estimator
|
821
832
|
|
822
833
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
823
834
|
input_df
|
@@ -14,6 +14,7 @@ from sklearn.utils import metaestimators
|
|
14
14
|
|
15
15
|
from snowflake import snowpark
|
16
16
|
from snowflake.ml._internal import telemetry
|
17
|
+
from snowflake.ml.model.model_signature import ModelSignature, _infer_signature
|
17
18
|
from snowflake.ml.modeling.framework import _utils, base
|
18
19
|
|
19
20
|
_PROJECT = "ModelDevelopment"
|
@@ -103,6 +104,8 @@ class Pipeline(base.BaseTransformer):
|
|
103
104
|
self._transformers_to_input_indices: Dict[str, List[int]] = {}
|
104
105
|
self._is_convertable_to_sklearn = True
|
105
106
|
|
107
|
+
self._model_signature_dict: Optional[Dict[str, ModelSignature]] = None
|
108
|
+
|
106
109
|
deps: Set[str] = {f"pandas=={pd.__version__}", f"scikit-learn=={skversion}"}
|
107
110
|
for _, obj in steps:
|
108
111
|
if isinstance(obj, base.BaseTransformer):
|
@@ -241,6 +244,7 @@ class Pipeline(base.BaseTransformer):
|
|
241
244
|
step_name=estimator[0], all_cols=all_cols, input_cols=estimator[1].get_input_cols()
|
242
245
|
)
|
243
246
|
|
247
|
+
self._get_model_signatures(dataset=dataset)
|
244
248
|
self._is_fitted = True
|
245
249
|
return self
|
246
250
|
|
@@ -309,6 +313,7 @@ class Pipeline(base.BaseTransformer):
|
|
309
313
|
res = estimator[1].fit(transformed_dataset).transform(transformed_dataset)
|
310
314
|
return res
|
311
315
|
|
316
|
+
self._get_model_signatures(dataset=dataset)
|
312
317
|
self._is_fitted = True
|
313
318
|
return transformed_dataset
|
314
319
|
|
@@ -346,6 +351,7 @@ class Pipeline(base.BaseTransformer):
|
|
346
351
|
else:
|
347
352
|
transformed_dataset = estimator[1].fit(transformed_dataset).predict(transformed_dataset)
|
348
353
|
|
354
|
+
self._get_model_signatures(dataset=dataset)
|
349
355
|
self._is_fitted = True
|
350
356
|
return transformed_dataset
|
351
357
|
|
@@ -559,3 +565,21 @@ class Pipeline(base.BaseTransformer):
|
|
559
565
|
|
560
566
|
def _get_dependencies(self) -> List[str]:
|
561
567
|
return self._deps
|
568
|
+
|
569
|
+
def _get_model_signatures(self, dataset: Union[snowpark.DataFrame, pd.DataFrame]) -> None:
|
570
|
+
self._model_signature_dict = dict()
|
571
|
+
|
572
|
+
input_columns = self._get_sanitized_list_of_columns(dataset.columns)
|
573
|
+
inputs_signature = _infer_signature(dataset[input_columns], "input")
|
574
|
+
|
575
|
+
estimator_step = self._get_estimator()
|
576
|
+
if estimator_step:
|
577
|
+
estimator_signatures = estimator_step[1].model_signatures
|
578
|
+
for method, signature in estimator_signatures.items():
|
579
|
+
self._model_signature_dict[method] = ModelSignature(inputs=inputs_signature, outputs=signature.outputs)
|
580
|
+
|
581
|
+
@property
|
582
|
+
def model_signatures(self) -> Dict[str, ModelSignature]:
|
583
|
+
if self._model_signature_dict is None:
|
584
|
+
raise RuntimeError("Estimator not fitted before accessing property model_signatures! ")
|
585
|
+
return self._model_signature_dict
|
@@ -800,7 +800,7 @@ class OneHotEncoder(base.BaseTransformer):
|
|
800
800
|
state_df = dataset._session.create_dataframe(state_pandas)
|
801
801
|
|
802
802
|
transformed_dataset = dataset
|
803
|
-
|
803
|
+
original_dataset_columns = transformed_dataset.columns[:]
|
804
804
|
all_output_cols = []
|
805
805
|
for input_col in self.input_cols:
|
806
806
|
output_cols = [
|
@@ -818,7 +818,7 @@ class OneHotEncoder(base.BaseTransformer):
|
|
818
818
|
|
819
819
|
transformed_dataset = self._handle_unknown_in_transform(transformed_dataset)
|
820
820
|
# Reorder columns. Passthrough columns are added at the right to the output of the transformers.
|
821
|
-
transformed_dataset = transformed_dataset[all_output_cols +
|
821
|
+
transformed_dataset = transformed_dataset[all_output_cols + original_dataset_columns]
|
822
822
|
return transformed_dataset
|
823
823
|
|
824
824
|
def _transform_snowpark_sparse_udf(self, dataset: snowpark.DataFrame) -> snowpark.DataFrame:
|
@@ -895,15 +895,14 @@ class OneHotEncoder(base.BaseTransformer):
|
|
895
895
|
Output dataset.
|
896
896
|
"""
|
897
897
|
encoder_sklearn = self.to_sklearn()
|
898
|
-
|
899
898
|
transformed_dataset = encoder_sklearn.transform(dataset[self.input_cols])
|
900
899
|
|
901
|
-
if
|
902
|
-
|
903
|
-
dataset[self.get_output_cols()] = transformed_dataset
|
904
|
-
return dataset
|
900
|
+
if self.sparse:
|
901
|
+
return transformed_dataset
|
905
902
|
|
906
|
-
|
903
|
+
dataset = dataset.copy()
|
904
|
+
dataset[self.get_output_cols()] = transformed_dataset
|
905
|
+
return dataset
|
907
906
|
|
908
907
|
def _create_unfitted_sklearn_object(self) -> preprocessing.OneHotEncoder:
|
909
908
|
sklearn_args = self.get_sklearn_args(
|
@@ -1331,17 +1330,17 @@ class OneHotEncoder(base.BaseTransformer):
|
|
1331
1330
|
Output columns.
|
1332
1331
|
"""
|
1333
1332
|
if self.sparse:
|
1334
|
-
|
1335
|
-
|
1336
|
-
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1333
|
+
return self.output_cols
|
1334
|
+
|
1335
|
+
output_cols = (
|
1336
|
+
[
|
1337
|
+
identifier.get_inferred_name(col)
|
1338
|
+
for input_col in self.input_cols
|
1339
|
+
for col in self._dense_output_cols_mappings[input_col]
|
1340
|
+
]
|
1341
|
+
if self._dense_output_cols_mappings
|
1342
|
+
else []
|
1343
|
+
)
|
1345
1344
|
return output_cols
|
1346
1345
|
|
1347
1346
|
def _get_dense_output_cols_mappings(self) -> None:
|
@@ -639,26 +639,37 @@ class PolynomialFeatures(BaseTransformer):
|
|
639
639
|
# input cols need to match unquoted / quoted
|
640
640
|
input_cols = self.input_cols
|
641
641
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
642
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
642
643
|
|
643
644
|
estimator = self._sklearn_object
|
644
645
|
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
646
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
647
|
+
missing_features = []
|
648
|
+
features_in_dataset = set(dataset.columns)
|
649
|
+
columns_to_select = []
|
650
|
+
for i, f in enumerate(features_required_by_estimator):
|
651
|
+
if (
|
652
|
+
i >= len(input_cols)
|
653
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
654
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
655
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
656
|
+
):
|
657
|
+
missing_features.append(f)
|
658
|
+
elif input_cols[i] in features_in_dataset:
|
659
|
+
columns_to_select.append(input_cols[i])
|
660
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
661
|
+
columns_to_select.append(unquoted_input_cols[i])
|
662
|
+
else:
|
663
|
+
columns_to_select.append(quoted_input_cols[i])
|
664
|
+
|
665
|
+
if len(missing_features) > 0:
|
666
|
+
raise ValueError(
|
667
|
+
"The feature names should match with those that were passed during fit.\n"
|
668
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
669
|
+
f"Features in the input dataframe : {input_cols}\n"
|
670
|
+
)
|
671
|
+
input_df = dataset[columns_to_select]
|
672
|
+
input_df.columns = features_required_by_estimator
|
662
673
|
|
663
674
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
664
675
|
input_df
|
@@ -643,26 +643,37 @@ class LabelPropagation(BaseTransformer):
|
|
643
643
|
# input cols need to match unquoted / quoted
|
644
644
|
input_cols = self.input_cols
|
645
645
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
646
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
646
647
|
|
647
648
|
estimator = self._sklearn_object
|
648
649
|
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
650
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
651
|
+
missing_features = []
|
652
|
+
features_in_dataset = set(dataset.columns)
|
653
|
+
columns_to_select = []
|
654
|
+
for i, f in enumerate(features_required_by_estimator):
|
655
|
+
if (
|
656
|
+
i >= len(input_cols)
|
657
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
658
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
659
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
660
|
+
):
|
661
|
+
missing_features.append(f)
|
662
|
+
elif input_cols[i] in features_in_dataset:
|
663
|
+
columns_to_select.append(input_cols[i])
|
664
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
665
|
+
columns_to_select.append(unquoted_input_cols[i])
|
666
|
+
else:
|
667
|
+
columns_to_select.append(quoted_input_cols[i])
|
668
|
+
|
669
|
+
if len(missing_features) > 0:
|
670
|
+
raise ValueError(
|
671
|
+
"The feature names should match with those that were passed during fit.\n"
|
672
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
673
|
+
f"Features in the input dataframe : {input_cols}\n"
|
674
|
+
)
|
675
|
+
input_df = dataset[columns_to_select]
|
676
|
+
input_df.columns = features_required_by_estimator
|
666
677
|
|
667
678
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
668
679
|
input_df
|
@@ -652,26 +652,37 @@ class LabelSpreading(BaseTransformer):
|
|
652
652
|
# input cols need to match unquoted / quoted
|
653
653
|
input_cols = self.input_cols
|
654
654
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
655
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
655
656
|
|
656
657
|
estimator = self._sklearn_object
|
657
658
|
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
659
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
660
|
+
missing_features = []
|
661
|
+
features_in_dataset = set(dataset.columns)
|
662
|
+
columns_to_select = []
|
663
|
+
for i, f in enumerate(features_required_by_estimator):
|
664
|
+
if (
|
665
|
+
i >= len(input_cols)
|
666
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
667
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
668
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
669
|
+
):
|
670
|
+
missing_features.append(f)
|
671
|
+
elif input_cols[i] in features_in_dataset:
|
672
|
+
columns_to_select.append(input_cols[i])
|
673
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
674
|
+
columns_to_select.append(unquoted_input_cols[i])
|
675
|
+
else:
|
676
|
+
columns_to_select.append(quoted_input_cols[i])
|
677
|
+
|
678
|
+
if len(missing_features) > 0:
|
679
|
+
raise ValueError(
|
680
|
+
"The feature names should match with those that were passed during fit.\n"
|
681
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
682
|
+
f"Features in the input dataframe : {input_cols}\n"
|
683
|
+
)
|
684
|
+
input_df = dataset[columns_to_select]
|
685
|
+
input_df.columns = features_required_by_estimator
|
675
686
|
|
676
687
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
677
688
|
input_df
|
@@ -703,26 +703,37 @@ class LinearSVC(BaseTransformer):
|
|
703
703
|
# input cols need to match unquoted / quoted
|
704
704
|
input_cols = self.input_cols
|
705
705
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
706
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
706
707
|
|
707
708
|
estimator = self._sklearn_object
|
708
709
|
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
710
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
711
|
+
missing_features = []
|
712
|
+
features_in_dataset = set(dataset.columns)
|
713
|
+
columns_to_select = []
|
714
|
+
for i, f in enumerate(features_required_by_estimator):
|
715
|
+
if (
|
716
|
+
i >= len(input_cols)
|
717
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
718
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
719
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
720
|
+
):
|
721
|
+
missing_features.append(f)
|
722
|
+
elif input_cols[i] in features_in_dataset:
|
723
|
+
columns_to_select.append(input_cols[i])
|
724
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
725
|
+
columns_to_select.append(unquoted_input_cols[i])
|
726
|
+
else:
|
727
|
+
columns_to_select.append(quoted_input_cols[i])
|
728
|
+
|
729
|
+
if len(missing_features) > 0:
|
730
|
+
raise ValueError(
|
731
|
+
"The feature names should match with those that were passed during fit.\n"
|
732
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
733
|
+
f"Features in the input dataframe : {input_cols}\n"
|
734
|
+
)
|
735
|
+
input_df = dataset[columns_to_select]
|
736
|
+
input_df.columns = features_required_by_estimator
|
726
737
|
|
727
738
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
728
739
|
input_df
|
@@ -676,26 +676,37 @@ class LinearSVR(BaseTransformer):
|
|
676
676
|
# input cols need to match unquoted / quoted
|
677
677
|
input_cols = self.input_cols
|
678
678
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
679
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
679
680
|
|
680
681
|
estimator = self._sklearn_object
|
681
682
|
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
683
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
684
|
+
missing_features = []
|
685
|
+
features_in_dataset = set(dataset.columns)
|
686
|
+
columns_to_select = []
|
687
|
+
for i, f in enumerate(features_required_by_estimator):
|
688
|
+
if (
|
689
|
+
i >= len(input_cols)
|
690
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
691
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
692
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
693
|
+
):
|
694
|
+
missing_features.append(f)
|
695
|
+
elif input_cols[i] in features_in_dataset:
|
696
|
+
columns_to_select.append(input_cols[i])
|
697
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
698
|
+
columns_to_select.append(unquoted_input_cols[i])
|
699
|
+
else:
|
700
|
+
columns_to_select.append(quoted_input_cols[i])
|
701
|
+
|
702
|
+
if len(missing_features) > 0:
|
703
|
+
raise ValueError(
|
704
|
+
"The feature names should match with those that were passed during fit.\n"
|
705
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
706
|
+
f"Features in the input dataframe : {input_cols}\n"
|
707
|
+
)
|
708
|
+
input_df = dataset[columns_to_select]
|
709
|
+
input_df.columns = features_required_by_estimator
|
699
710
|
|
700
711
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
701
712
|
input_df
|
@@ -714,26 +714,37 @@ class NuSVC(BaseTransformer):
|
|
714
714
|
# input cols need to match unquoted / quoted
|
715
715
|
input_cols = self.input_cols
|
716
716
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
717
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
717
718
|
|
718
719
|
estimator = self._sklearn_object
|
719
720
|
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
721
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
722
|
+
missing_features = []
|
723
|
+
features_in_dataset = set(dataset.columns)
|
724
|
+
columns_to_select = []
|
725
|
+
for i, f in enumerate(features_required_by_estimator):
|
726
|
+
if (
|
727
|
+
i >= len(input_cols)
|
728
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
729
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
730
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
731
|
+
):
|
732
|
+
missing_features.append(f)
|
733
|
+
elif input_cols[i] in features_in_dataset:
|
734
|
+
columns_to_select.append(input_cols[i])
|
735
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
736
|
+
columns_to_select.append(unquoted_input_cols[i])
|
737
|
+
else:
|
738
|
+
columns_to_select.append(quoted_input_cols[i])
|
739
|
+
|
740
|
+
if len(missing_features) > 0:
|
741
|
+
raise ValueError(
|
742
|
+
"The feature names should match with those that were passed during fit.\n"
|
743
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
744
|
+
f"Features in the input dataframe : {input_cols}\n"
|
745
|
+
)
|
746
|
+
input_df = dataset[columns_to_select]
|
747
|
+
input_df.columns = features_required_by_estimator
|
737
748
|
|
738
749
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
739
750
|
input_df
|
@@ -675,26 +675,37 @@ class NuSVR(BaseTransformer):
|
|
675
675
|
# input cols need to match unquoted / quoted
|
676
676
|
input_cols = self.input_cols
|
677
677
|
unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
|
678
|
+
quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
|
678
679
|
|
679
680
|
estimator = self._sklearn_object
|
680
681
|
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
682
|
+
features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
|
683
|
+
missing_features = []
|
684
|
+
features_in_dataset = set(dataset.columns)
|
685
|
+
columns_to_select = []
|
686
|
+
for i, f in enumerate(features_required_by_estimator):
|
687
|
+
if (
|
688
|
+
i >= len(input_cols)
|
689
|
+
or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
|
690
|
+
or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
|
691
|
+
and quoted_input_cols[i] not in features_in_dataset)
|
692
|
+
):
|
693
|
+
missing_features.append(f)
|
694
|
+
elif input_cols[i] in features_in_dataset:
|
695
|
+
columns_to_select.append(input_cols[i])
|
696
|
+
elif unquoted_input_cols[i] in features_in_dataset:
|
697
|
+
columns_to_select.append(unquoted_input_cols[i])
|
698
|
+
else:
|
699
|
+
columns_to_select.append(quoted_input_cols[i])
|
700
|
+
|
701
|
+
if len(missing_features) > 0:
|
702
|
+
raise ValueError(
|
703
|
+
"The feature names should match with those that were passed during fit.\n"
|
704
|
+
f"Features seen during fit call but not present in the input: {missing_features}\n"
|
705
|
+
f"Features in the input dataframe : {input_cols}\n"
|
706
|
+
)
|
707
|
+
input_df = dataset[columns_to_select]
|
708
|
+
input_df.columns = features_required_by_estimator
|
698
709
|
|
699
710
|
transformed_numpy_array = getattr(estimator, inference_method)(
|
700
711
|
input_df
|