snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -800,7 +800,7 @@ class OneHotEncoder(base.BaseTransformer):
800
800
  state_df = dataset._session.create_dataframe(state_pandas)
801
801
 
802
802
  transformed_dataset = dataset
803
- origional_dataset_columns = transformed_dataset.columns[:]
803
+ original_dataset_columns = transformed_dataset.columns[:]
804
804
  all_output_cols = []
805
805
  for input_col in self.input_cols:
806
806
  output_cols = [
@@ -818,7 +818,7 @@ class OneHotEncoder(base.BaseTransformer):
818
818
 
819
819
  transformed_dataset = self._handle_unknown_in_transform(transformed_dataset)
820
820
  # Reorder columns. Passthrough columns are added at the right to the output of the transformers.
821
- transformed_dataset = transformed_dataset[all_output_cols + origional_dataset_columns]
821
+ transformed_dataset = transformed_dataset[all_output_cols + original_dataset_columns]
822
822
  return transformed_dataset
823
823
 
824
824
  def _transform_snowpark_sparse_udf(self, dataset: snowpark.DataFrame) -> snowpark.DataFrame:
@@ -895,15 +895,14 @@ class OneHotEncoder(base.BaseTransformer):
895
895
  Output dataset.
896
896
  """
897
897
  encoder_sklearn = self.to_sklearn()
898
-
899
898
  transformed_dataset = encoder_sklearn.transform(dataset[self.input_cols])
900
899
 
901
- if not self.sparse:
902
- dataset = dataset.copy()
903
- dataset[self.get_output_cols()] = transformed_dataset
904
- return dataset
900
+ if self.sparse:
901
+ return transformed_dataset
905
902
 
906
- return transformed_dataset
903
+ dataset = dataset.copy()
904
+ dataset[self.get_output_cols()] = transformed_dataset
905
+ return dataset
907
906
 
908
907
  def _create_unfitted_sklearn_object(self) -> preprocessing.OneHotEncoder:
909
908
  sklearn_args = self.get_sklearn_args(
@@ -1331,17 +1330,17 @@ class OneHotEncoder(base.BaseTransformer):
1331
1330
  Output columns.
1332
1331
  """
1333
1332
  if self.sparse:
1334
- output_cols = self.output_cols
1335
- else:
1336
- output_cols = (
1337
- [
1338
- identifier.quote_name_without_upper_casing(col)
1339
- for input_col in self.input_cols
1340
- for col in self._dense_output_cols_mappings[input_col]
1341
- ]
1342
- if self._dense_output_cols_mappings
1343
- else []
1344
- )
1333
+ return self.output_cols
1334
+
1335
+ output_cols = (
1336
+ [
1337
+ identifier.get_inferred_name(col)
1338
+ for input_col in self.input_cols
1339
+ for col in self._dense_output_cols_mappings[input_col]
1340
+ ]
1341
+ if self._dense_output_cols_mappings
1342
+ else []
1343
+ )
1345
1344
  return output_cols
1346
1345
 
1347
1346
  def _get_dense_output_cols_mappings(self) -> None:
@@ -121,6 +121,7 @@ class OrdinalEncoder(base.BaseTransformer):
121
121
  self.categories_: Dict[str, type_utils.LiteralNDArrayType] = {}
122
122
  self._categories_list: List[type_utils.LiteralNDArrayType] = []
123
123
  self._missing_indices: Dict[int, int] = {}
124
+ self._infrequent_enabled = False
124
125
  self._vocab_table_name = "snowml_preprocessing_ordinal_encoder_temp_table_" + uuid.uuid4().hex
125
126
 
126
127
  self.set_input_cols(input_cols)
@@ -547,6 +548,7 @@ class OrdinalEncoder(base.BaseTransformer):
547
548
  if self._is_fitted:
548
549
  encoder.categories_ = self._categories_list
549
550
  encoder._missing_indices = self._missing_indices
551
+ encoder._infrequent_enabled = self._infrequent_enabled
550
552
  return encoder
551
553
 
552
554
  def _validate_keywords(self) -> None:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -208,7 +210,6 @@ class PolynomialFeatures(BaseTransformer):
208
210
  sample_weight_col: Optional[str] = None,
209
211
  ) -> None:
210
212
  super().__init__()
211
- self.id = str(uuid4()).replace("-", "_").upper()
212
213
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
213
214
 
214
215
  self._deps = list(deps)
@@ -231,6 +232,15 @@ class PolynomialFeatures(BaseTransformer):
231
232
  self.set_drop_input_cols(drop_input_cols)
232
233
  self.set_sample_weight_col(sample_weight_col)
233
234
 
235
+ def _get_rand_id(self) -> str:
236
+ """
237
+ Generate random id to be used in sproc and stage names.
238
+
239
+ Returns:
240
+ Random id string usable in sproc, table, and stage names.
241
+ """
242
+ return str(uuid4()).replace("-", "_").upper()
243
+
234
244
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
235
245
  """
236
246
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -309,7 +319,7 @@ class PolynomialFeatures(BaseTransformer):
309
319
  cp.dump(self._sklearn_object, local_transform_file)
310
320
 
311
321
  # Create temp stage to run fit.
312
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
322
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
313
323
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
314
324
  SqlResultValidator(
315
325
  session=session,
@@ -322,11 +332,12 @@ class PolynomialFeatures(BaseTransformer):
322
332
  expected_value=f"Stage area {transform_stage_name} successfully created."
323
333
  ).validate()
324
334
 
325
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
335
+ # Use posixpath to construct stage paths
336
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
337
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
326
338
  local_result_file_name = get_temp_file_path()
327
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
328
339
 
329
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
340
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
330
341
  statement_params = telemetry.get_function_usage_statement_params(
331
342
  project=_PROJECT,
332
343
  subproject=_SUBPROJECT,
@@ -352,6 +363,7 @@ class PolynomialFeatures(BaseTransformer):
352
363
  replace=True,
353
364
  session=session,
354
365
  statement_params=statement_params,
366
+ anonymous=True
355
367
  )
356
368
  def fit_wrapper_sproc(
357
369
  session: Session,
@@ -360,7 +372,8 @@ class PolynomialFeatures(BaseTransformer):
360
372
  stage_result_file_name: str,
361
373
  input_cols: List[str],
362
374
  label_cols: List[str],
363
- sample_weight_col: Optional[str]
375
+ sample_weight_col: Optional[str],
376
+ statement_params: Dict[str, str]
364
377
  ) -> str:
365
378
  import cloudpickle as cp
366
379
  import numpy as np
@@ -427,15 +440,15 @@ class PolynomialFeatures(BaseTransformer):
427
440
  api_calls=[Session.call],
428
441
  custom_tags=dict([("autogen", True)]),
429
442
  )
430
- sproc_export_file_name = session.call(
431
- fit_sproc_name,
443
+ sproc_export_file_name = fit_wrapper_sproc(
444
+ session,
432
445
  query,
433
446
  stage_transform_file_name,
434
447
  stage_result_file_name,
435
448
  identifier.get_unescaped_names(self.input_cols),
436
449
  identifier.get_unescaped_names(self.label_cols),
437
450
  identifier.get_unescaped_names(self.sample_weight_col),
438
- statement_params=statement_params,
451
+ statement_params,
439
452
  )
440
453
 
441
454
  if "|" in sproc_export_file_name:
@@ -445,7 +458,7 @@ class PolynomialFeatures(BaseTransformer):
445
458
  print("\n".join(fields[1:]))
446
459
 
447
460
  session.file.get(
448
- os.path.join(stage_result_file_name, sproc_export_file_name),
461
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
449
462
  local_result_file_name,
450
463
  statement_params=statement_params
451
464
  )
@@ -491,7 +504,7 @@ class PolynomialFeatures(BaseTransformer):
491
504
 
492
505
  # Register vectorized UDF for batch inference
493
506
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
494
- safe_id=self.id, method=inference_method)
507
+ safe_id=self._get_rand_id(), method=inference_method)
495
508
 
496
509
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
497
510
  # will try to pickle all of self which fails.
@@ -583,7 +596,7 @@ class PolynomialFeatures(BaseTransformer):
583
596
  return transformed_pandas_df.to_dict("records")
584
597
 
585
598
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
586
- safe_id=self.id
599
+ safe_id=self._get_rand_id()
587
600
  )
588
601
 
589
602
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -639,26 +652,37 @@ class PolynomialFeatures(BaseTransformer):
639
652
  # input cols need to match unquoted / quoted
640
653
  input_cols = self.input_cols
641
654
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
655
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
642
656
 
643
657
  estimator = self._sklearn_object
644
658
 
645
- input_df = dataset[input_cols] # Select input columns with quoted column names.
646
- if hasattr(estimator, "feature_names_in_"):
647
- missing_features = []
648
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
649
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
650
- missing_features.append(f)
651
-
652
- if len(missing_features) > 0:
653
- raise ValueError(
654
- "The feature names should match with those that were passed during fit.\n"
655
- f"Features seen during fit call but not present in the input: {missing_features}\n"
656
- f"Features in the input dataframe : {input_cols}\n"
657
- )
658
- input_df.columns = getattr(estimator, "feature_names_in_")
659
- else:
660
- # Just rename the column names to unquoted identifiers.
661
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
659
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
660
+ missing_features = []
661
+ features_in_dataset = set(dataset.columns)
662
+ columns_to_select = []
663
+ for i, f in enumerate(features_required_by_estimator):
664
+ if (
665
+ i >= len(input_cols)
666
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
667
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
668
+ and quoted_input_cols[i] not in features_in_dataset)
669
+ ):
670
+ missing_features.append(f)
671
+ elif input_cols[i] in features_in_dataset:
672
+ columns_to_select.append(input_cols[i])
673
+ elif unquoted_input_cols[i] in features_in_dataset:
674
+ columns_to_select.append(unquoted_input_cols[i])
675
+ else:
676
+ columns_to_select.append(quoted_input_cols[i])
677
+
678
+ if len(missing_features) > 0:
679
+ raise ValueError(
680
+ "The feature names should match with those that were passed during fit.\n"
681
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
682
+ f"Features in the input dataframe : {input_cols}\n"
683
+ )
684
+ input_df = dataset[columns_to_select]
685
+ input_df.columns = features_required_by_estimator
662
686
 
663
687
  transformed_numpy_array = getattr(estimator, inference_method)(
664
688
  input_df
@@ -737,11 +761,18 @@ class PolynomialFeatures(BaseTransformer):
737
761
  Transformed dataset.
738
762
  """
739
763
  if isinstance(dataset, DataFrame):
764
+ expected_type_inferred = ""
765
+ # when it is classifier, infer the datatype from label columns
766
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
767
+ expected_type_inferred = convert_sp_to_sf_type(
768
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
769
+ )
770
+
740
771
  output_df = self._batch_inference(
741
772
  dataset=dataset,
742
773
  inference_method="predict",
743
774
  expected_output_cols_list=self.output_cols,
744
- expected_output_cols_type="",
775
+ expected_output_cols_type=expected_type_inferred,
745
776
  )
746
777
  elif isinstance(dataset, pd.DataFrame):
747
778
  output_df = self._sklearn_inference(
@@ -814,10 +845,10 @@ class PolynomialFeatures(BaseTransformer):
814
845
 
815
846
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
816
847
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
817
- Returns an empty list if current object is not a classifier or not yet fitted.
848
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
818
849
  """
819
850
  if getattr(self._sklearn_object, "classes_", None) is None:
820
- return []
851
+ return [output_cols_prefix]
821
852
 
822
853
  classes = self._sklearn_object.classes_
823
854
  if isinstance(classes, numpy.ndarray):
@@ -1042,7 +1073,7 @@ class PolynomialFeatures(BaseTransformer):
1042
1073
  cp.dump(self._sklearn_object, local_score_file)
1043
1074
 
1044
1075
  # Create temp stage to run score.
1045
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1076
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1046
1077
  session = dataset._session
1047
1078
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1048
1079
  SqlResultValidator(
@@ -1056,8 +1087,9 @@ class PolynomialFeatures(BaseTransformer):
1056
1087
  expected_value=f"Stage area {score_stage_name} successfully created."
1057
1088
  ).validate()
1058
1089
 
1059
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1060
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1090
+ # Use posixpath to construct stage paths
1091
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1092
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1061
1093
  statement_params = telemetry.get_function_usage_statement_params(
1062
1094
  project=_PROJECT,
1063
1095
  subproject=_SUBPROJECT,
@@ -1083,6 +1115,7 @@ class PolynomialFeatures(BaseTransformer):
1083
1115
  replace=True,
1084
1116
  session=session,
1085
1117
  statement_params=statement_params,
1118
+ anonymous=True
1086
1119
  )
1087
1120
  def score_wrapper_sproc(
1088
1121
  session: Session,
@@ -1090,7 +1123,8 @@ class PolynomialFeatures(BaseTransformer):
1090
1123
  stage_score_file_name: str,
1091
1124
  input_cols: List[str],
1092
1125
  label_cols: List[str],
1093
- sample_weight_col: Optional[str]
1126
+ sample_weight_col: Optional[str],
1127
+ statement_params: Dict[str, str]
1094
1128
  ) -> float:
1095
1129
  import cloudpickle as cp
1096
1130
  import numpy as np
@@ -1140,14 +1174,14 @@ class PolynomialFeatures(BaseTransformer):
1140
1174
  api_calls=[Session.call],
1141
1175
  custom_tags=dict([("autogen", True)]),
1142
1176
  )
1143
- score = session.call(
1144
- score_sproc_name,
1177
+ score = score_wrapper_sproc(
1178
+ session,
1145
1179
  query,
1146
1180
  stage_score_file_name,
1147
1181
  identifier.get_unescaped_names(self.input_cols),
1148
1182
  identifier.get_unescaped_names(self.label_cols),
1149
1183
  identifier.get_unescaped_names(self.sample_weight_col),
1150
- statement_params=statement_params,
1184
+ statement_params,
1151
1185
  )
1152
1186
 
1153
1187
  cleanup_temp_files([local_score_file_name])
@@ -1165,18 +1199,20 @@ class PolynomialFeatures(BaseTransformer):
1165
1199
  if self._sklearn_object._estimator_type == 'classifier':
1166
1200
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1167
1201
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1168
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1202
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1203
+ ([] if self._drop_input_cols else inputs) + outputs)
1169
1204
  # For regressor, the type of predict is float64
1170
1205
  elif self._sklearn_object._estimator_type == 'regressor':
1171
1206
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1172
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1173
-
1207
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1208
+ ([] if self._drop_input_cols else inputs) + outputs)
1174
1209
  for prob_func in PROB_FUNCTIONS:
1175
1210
  if hasattr(self, prob_func):
1176
1211
  output_cols_prefix: str = f"{prob_func}_"
1177
1212
  output_column_names = self._get_output_column_names(output_cols_prefix)
1178
1213
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1179
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1214
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1215
+ ([] if self._drop_input_cols else inputs) + outputs)
1180
1216
 
1181
1217
  @property
1182
1218
  def model_signatures(self) -> Dict[str, ModelSignature]: