snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -245,7 +247,6 @@ class ElasticNet(BaseTransformer):
245
247
  sample_weight_col: Optional[str] = None,
246
248
  ) -> None:
247
249
  super().__init__()
248
- self.id = str(uuid4()).replace("-", "_").upper()
249
250
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
250
251
 
251
252
  self._deps = list(deps)
@@ -275,6 +276,15 @@ class ElasticNet(BaseTransformer):
275
276
  self.set_drop_input_cols(drop_input_cols)
276
277
  self.set_sample_weight_col(sample_weight_col)
277
278
 
279
+ def _get_rand_id(self) -> str:
280
+ """
281
+ Generate random id to be used in sproc and stage names.
282
+
283
+ Returns:
284
+ Random id string usable in sproc, table, and stage names.
285
+ """
286
+ return str(uuid4()).replace("-", "_").upper()
287
+
278
288
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
279
289
  """
280
290
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -353,7 +363,7 @@ class ElasticNet(BaseTransformer):
353
363
  cp.dump(self._sklearn_object, local_transform_file)
354
364
 
355
365
  # Create temp stage to run fit.
356
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
366
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
357
367
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
358
368
  SqlResultValidator(
359
369
  session=session,
@@ -366,11 +376,12 @@ class ElasticNet(BaseTransformer):
366
376
  expected_value=f"Stage area {transform_stage_name} successfully created."
367
377
  ).validate()
368
378
 
369
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
379
+ # Use posixpath to construct stage paths
380
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
381
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
370
382
  local_result_file_name = get_temp_file_path()
371
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
372
383
 
373
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
384
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
374
385
  statement_params = telemetry.get_function_usage_statement_params(
375
386
  project=_PROJECT,
376
387
  subproject=_SUBPROJECT,
@@ -396,6 +407,7 @@ class ElasticNet(BaseTransformer):
396
407
  replace=True,
397
408
  session=session,
398
409
  statement_params=statement_params,
410
+ anonymous=True
399
411
  )
400
412
  def fit_wrapper_sproc(
401
413
  session: Session,
@@ -404,7 +416,8 @@ class ElasticNet(BaseTransformer):
404
416
  stage_result_file_name: str,
405
417
  input_cols: List[str],
406
418
  label_cols: List[str],
407
- sample_weight_col: Optional[str]
419
+ sample_weight_col: Optional[str],
420
+ statement_params: Dict[str, str]
408
421
  ) -> str:
409
422
  import cloudpickle as cp
410
423
  import numpy as np
@@ -471,15 +484,15 @@ class ElasticNet(BaseTransformer):
471
484
  api_calls=[Session.call],
472
485
  custom_tags=dict([("autogen", True)]),
473
486
  )
474
- sproc_export_file_name = session.call(
475
- fit_sproc_name,
487
+ sproc_export_file_name = fit_wrapper_sproc(
488
+ session,
476
489
  query,
477
490
  stage_transform_file_name,
478
491
  stage_result_file_name,
479
492
  identifier.get_unescaped_names(self.input_cols),
480
493
  identifier.get_unescaped_names(self.label_cols),
481
494
  identifier.get_unescaped_names(self.sample_weight_col),
482
- statement_params=statement_params,
495
+ statement_params,
483
496
  )
484
497
 
485
498
  if "|" in sproc_export_file_name:
@@ -489,7 +502,7 @@ class ElasticNet(BaseTransformer):
489
502
  print("\n".join(fields[1:]))
490
503
 
491
504
  session.file.get(
492
- os.path.join(stage_result_file_name, sproc_export_file_name),
505
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
493
506
  local_result_file_name,
494
507
  statement_params=statement_params
495
508
  )
@@ -535,7 +548,7 @@ class ElasticNet(BaseTransformer):
535
548
 
536
549
  # Register vectorized UDF for batch inference
537
550
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
538
- safe_id=self.id, method=inference_method)
551
+ safe_id=self._get_rand_id(), method=inference_method)
539
552
 
540
553
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
541
554
  # will try to pickle all of self which fails.
@@ -627,7 +640,7 @@ class ElasticNet(BaseTransformer):
627
640
  return transformed_pandas_df.to_dict("records")
628
641
 
629
642
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
630
- safe_id=self.id
643
+ safe_id=self._get_rand_id()
631
644
  )
632
645
 
633
646
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -683,26 +696,37 @@ class ElasticNet(BaseTransformer):
683
696
  # input cols need to match unquoted / quoted
684
697
  input_cols = self.input_cols
685
698
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
699
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
686
700
 
687
701
  estimator = self._sklearn_object
688
702
 
689
- input_df = dataset[input_cols] # Select input columns with quoted column names.
690
- if hasattr(estimator, "feature_names_in_"):
691
- missing_features = []
692
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
693
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
694
- missing_features.append(f)
695
-
696
- if len(missing_features) > 0:
697
- raise ValueError(
698
- "The feature names should match with those that were passed during fit.\n"
699
- f"Features seen during fit call but not present in the input: {missing_features}\n"
700
- f"Features in the input dataframe : {input_cols}\n"
701
- )
702
- input_df.columns = getattr(estimator, "feature_names_in_")
703
- else:
704
- # Just rename the column names to unquoted identifiers.
705
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
703
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
704
+ missing_features = []
705
+ features_in_dataset = set(dataset.columns)
706
+ columns_to_select = []
707
+ for i, f in enumerate(features_required_by_estimator):
708
+ if (
709
+ i >= len(input_cols)
710
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
711
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
712
+ and quoted_input_cols[i] not in features_in_dataset)
713
+ ):
714
+ missing_features.append(f)
715
+ elif input_cols[i] in features_in_dataset:
716
+ columns_to_select.append(input_cols[i])
717
+ elif unquoted_input_cols[i] in features_in_dataset:
718
+ columns_to_select.append(unquoted_input_cols[i])
719
+ else:
720
+ columns_to_select.append(quoted_input_cols[i])
721
+
722
+ if len(missing_features) > 0:
723
+ raise ValueError(
724
+ "The feature names should match with those that were passed during fit.\n"
725
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
726
+ f"Features in the input dataframe : {input_cols}\n"
727
+ )
728
+ input_df = dataset[columns_to_select]
729
+ input_df.columns = features_required_by_estimator
706
730
 
707
731
  transformed_numpy_array = getattr(estimator, inference_method)(
708
732
  input_df
@@ -783,11 +807,18 @@ class ElasticNet(BaseTransformer):
783
807
  Transformed dataset.
784
808
  """
785
809
  if isinstance(dataset, DataFrame):
810
+ expected_type_inferred = "float"
811
+ # when it is classifier, infer the datatype from label columns
812
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
813
+ expected_type_inferred = convert_sp_to_sf_type(
814
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
815
+ )
816
+
786
817
  output_df = self._batch_inference(
787
818
  dataset=dataset,
788
819
  inference_method="predict",
789
820
  expected_output_cols_list=self.output_cols,
790
- expected_output_cols_type="float",
821
+ expected_output_cols_type=expected_type_inferred,
791
822
  )
792
823
  elif isinstance(dataset, pd.DataFrame):
793
824
  output_df = self._sklearn_inference(
@@ -858,10 +889,10 @@ class ElasticNet(BaseTransformer):
858
889
 
859
890
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
860
891
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
861
- Returns an empty list if current object is not a classifier or not yet fitted.
892
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
862
893
  """
863
894
  if getattr(self._sklearn_object, "classes_", None) is None:
864
- return []
895
+ return [output_cols_prefix]
865
896
 
866
897
  classes = self._sklearn_object.classes_
867
898
  if isinstance(classes, numpy.ndarray):
@@ -1086,7 +1117,7 @@ class ElasticNet(BaseTransformer):
1086
1117
  cp.dump(self._sklearn_object, local_score_file)
1087
1118
 
1088
1119
  # Create temp stage to run score.
1089
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1120
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1090
1121
  session = dataset._session
1091
1122
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1092
1123
  SqlResultValidator(
@@ -1100,8 +1131,9 @@ class ElasticNet(BaseTransformer):
1100
1131
  expected_value=f"Stage area {score_stage_name} successfully created."
1101
1132
  ).validate()
1102
1133
 
1103
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1104
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1134
+ # Use posixpath to construct stage paths
1135
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1136
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1105
1137
  statement_params = telemetry.get_function_usage_statement_params(
1106
1138
  project=_PROJECT,
1107
1139
  subproject=_SUBPROJECT,
@@ -1127,6 +1159,7 @@ class ElasticNet(BaseTransformer):
1127
1159
  replace=True,
1128
1160
  session=session,
1129
1161
  statement_params=statement_params,
1162
+ anonymous=True
1130
1163
  )
1131
1164
  def score_wrapper_sproc(
1132
1165
  session: Session,
@@ -1134,7 +1167,8 @@ class ElasticNet(BaseTransformer):
1134
1167
  stage_score_file_name: str,
1135
1168
  input_cols: List[str],
1136
1169
  label_cols: List[str],
1137
- sample_weight_col: Optional[str]
1170
+ sample_weight_col: Optional[str],
1171
+ statement_params: Dict[str, str]
1138
1172
  ) -> float:
1139
1173
  import cloudpickle as cp
1140
1174
  import numpy as np
@@ -1184,14 +1218,14 @@ class ElasticNet(BaseTransformer):
1184
1218
  api_calls=[Session.call],
1185
1219
  custom_tags=dict([("autogen", True)]),
1186
1220
  )
1187
- score = session.call(
1188
- score_sproc_name,
1221
+ score = score_wrapper_sproc(
1222
+ session,
1189
1223
  query,
1190
1224
  stage_score_file_name,
1191
1225
  identifier.get_unescaped_names(self.input_cols),
1192
1226
  identifier.get_unescaped_names(self.label_cols),
1193
1227
  identifier.get_unescaped_names(self.sample_weight_col),
1194
- statement_params=statement_params,
1228
+ statement_params,
1195
1229
  )
1196
1230
 
1197
1231
  cleanup_temp_files([local_score_file_name])
@@ -1209,18 +1243,20 @@ class ElasticNet(BaseTransformer):
1209
1243
  if self._sklearn_object._estimator_type == 'classifier':
1210
1244
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1211
1245
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1212
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1246
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1247
+ ([] if self._drop_input_cols else inputs) + outputs)
1213
1248
  # For regressor, the type of predict is float64
1214
1249
  elif self._sklearn_object._estimator_type == 'regressor':
1215
1250
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1216
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1217
-
1251
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1252
+ ([] if self._drop_input_cols else inputs) + outputs)
1218
1253
  for prob_func in PROB_FUNCTIONS:
1219
1254
  if hasattr(self, prob_func):
1220
1255
  output_cols_prefix: str = f"{prob_func}_"
1221
1256
  output_column_names = self._get_output_column_names(output_cols_prefix)
1222
1257
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1223
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1258
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1259
+ ([] if self._drop_input_cols else inputs) + outputs)
1224
1260
 
1225
1261
  @property
1226
1262
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -277,7 +279,6 @@ class ElasticNetCV(BaseTransformer):
277
279
  sample_weight_col: Optional[str] = None,
278
280
  ) -> None:
279
281
  super().__init__()
280
- self.id = str(uuid4()).replace("-", "_").upper()
281
282
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
282
283
 
283
284
  self._deps = list(deps)
@@ -311,6 +312,15 @@ class ElasticNetCV(BaseTransformer):
311
312
  self.set_drop_input_cols(drop_input_cols)
312
313
  self.set_sample_weight_col(sample_weight_col)
313
314
 
315
+ def _get_rand_id(self) -> str:
316
+ """
317
+ Generate random id to be used in sproc and stage names.
318
+
319
+ Returns:
320
+ Random id string usable in sproc, table, and stage names.
321
+ """
322
+ return str(uuid4()).replace("-", "_").upper()
323
+
314
324
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
315
325
  """
316
326
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -389,7 +399,7 @@ class ElasticNetCV(BaseTransformer):
389
399
  cp.dump(self._sklearn_object, local_transform_file)
390
400
 
391
401
  # Create temp stage to run fit.
392
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
402
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
393
403
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
394
404
  SqlResultValidator(
395
405
  session=session,
@@ -402,11 +412,12 @@ class ElasticNetCV(BaseTransformer):
402
412
  expected_value=f"Stage area {transform_stage_name} successfully created."
403
413
  ).validate()
404
414
 
405
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
415
+ # Use posixpath to construct stage paths
416
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
417
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
406
418
  local_result_file_name = get_temp_file_path()
407
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
408
419
 
409
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
420
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
410
421
  statement_params = telemetry.get_function_usage_statement_params(
411
422
  project=_PROJECT,
412
423
  subproject=_SUBPROJECT,
@@ -432,6 +443,7 @@ class ElasticNetCV(BaseTransformer):
432
443
  replace=True,
433
444
  session=session,
434
445
  statement_params=statement_params,
446
+ anonymous=True
435
447
  )
436
448
  def fit_wrapper_sproc(
437
449
  session: Session,
@@ -440,7 +452,8 @@ class ElasticNetCV(BaseTransformer):
440
452
  stage_result_file_name: str,
441
453
  input_cols: List[str],
442
454
  label_cols: List[str],
443
- sample_weight_col: Optional[str]
455
+ sample_weight_col: Optional[str],
456
+ statement_params: Dict[str, str]
444
457
  ) -> str:
445
458
  import cloudpickle as cp
446
459
  import numpy as np
@@ -507,15 +520,15 @@ class ElasticNetCV(BaseTransformer):
507
520
  api_calls=[Session.call],
508
521
  custom_tags=dict([("autogen", True)]),
509
522
  )
510
- sproc_export_file_name = session.call(
511
- fit_sproc_name,
523
+ sproc_export_file_name = fit_wrapper_sproc(
524
+ session,
512
525
  query,
513
526
  stage_transform_file_name,
514
527
  stage_result_file_name,
515
528
  identifier.get_unescaped_names(self.input_cols),
516
529
  identifier.get_unescaped_names(self.label_cols),
517
530
  identifier.get_unescaped_names(self.sample_weight_col),
518
- statement_params=statement_params,
531
+ statement_params,
519
532
  )
520
533
 
521
534
  if "|" in sproc_export_file_name:
@@ -525,7 +538,7 @@ class ElasticNetCV(BaseTransformer):
525
538
  print("\n".join(fields[1:]))
526
539
 
527
540
  session.file.get(
528
- os.path.join(stage_result_file_name, sproc_export_file_name),
541
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
529
542
  local_result_file_name,
530
543
  statement_params=statement_params
531
544
  )
@@ -571,7 +584,7 @@ class ElasticNetCV(BaseTransformer):
571
584
 
572
585
  # Register vectorized UDF for batch inference
573
586
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
574
- safe_id=self.id, method=inference_method)
587
+ safe_id=self._get_rand_id(), method=inference_method)
575
588
 
576
589
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
577
590
  # will try to pickle all of self which fails.
@@ -663,7 +676,7 @@ class ElasticNetCV(BaseTransformer):
663
676
  return transformed_pandas_df.to_dict("records")
664
677
 
665
678
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
666
- safe_id=self.id
679
+ safe_id=self._get_rand_id()
667
680
  )
668
681
 
669
682
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -719,26 +732,37 @@ class ElasticNetCV(BaseTransformer):
719
732
  # input cols need to match unquoted / quoted
720
733
  input_cols = self.input_cols
721
734
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
735
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
722
736
 
723
737
  estimator = self._sklearn_object
724
738
 
725
- input_df = dataset[input_cols] # Select input columns with quoted column names.
726
- if hasattr(estimator, "feature_names_in_"):
727
- missing_features = []
728
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
729
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
730
- missing_features.append(f)
731
-
732
- if len(missing_features) > 0:
733
- raise ValueError(
734
- "The feature names should match with those that were passed during fit.\n"
735
- f"Features seen during fit call but not present in the input: {missing_features}\n"
736
- f"Features in the input dataframe : {input_cols}\n"
737
- )
738
- input_df.columns = getattr(estimator, "feature_names_in_")
739
- else:
740
- # Just rename the column names to unquoted identifiers.
741
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
739
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
740
+ missing_features = []
741
+ features_in_dataset = set(dataset.columns)
742
+ columns_to_select = []
743
+ for i, f in enumerate(features_required_by_estimator):
744
+ if (
745
+ i >= len(input_cols)
746
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
747
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
748
+ and quoted_input_cols[i] not in features_in_dataset)
749
+ ):
750
+ missing_features.append(f)
751
+ elif input_cols[i] in features_in_dataset:
752
+ columns_to_select.append(input_cols[i])
753
+ elif unquoted_input_cols[i] in features_in_dataset:
754
+ columns_to_select.append(unquoted_input_cols[i])
755
+ else:
756
+ columns_to_select.append(quoted_input_cols[i])
757
+
758
+ if len(missing_features) > 0:
759
+ raise ValueError(
760
+ "The feature names should match with those that were passed during fit.\n"
761
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
762
+ f"Features in the input dataframe : {input_cols}\n"
763
+ )
764
+ input_df = dataset[columns_to_select]
765
+ input_df.columns = features_required_by_estimator
742
766
 
743
767
  transformed_numpy_array = getattr(estimator, inference_method)(
744
768
  input_df
@@ -819,11 +843,18 @@ class ElasticNetCV(BaseTransformer):
819
843
  Transformed dataset.
820
844
  """
821
845
  if isinstance(dataset, DataFrame):
846
+ expected_type_inferred = "float"
847
+ # when it is classifier, infer the datatype from label columns
848
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
849
+ expected_type_inferred = convert_sp_to_sf_type(
850
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
851
+ )
852
+
822
853
  output_df = self._batch_inference(
823
854
  dataset=dataset,
824
855
  inference_method="predict",
825
856
  expected_output_cols_list=self.output_cols,
826
- expected_output_cols_type="float",
857
+ expected_output_cols_type=expected_type_inferred,
827
858
  )
828
859
  elif isinstance(dataset, pd.DataFrame):
829
860
  output_df = self._sklearn_inference(
@@ -894,10 +925,10 @@ class ElasticNetCV(BaseTransformer):
894
925
 
895
926
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
896
927
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
897
- Returns an empty list if current object is not a classifier or not yet fitted.
928
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
898
929
  """
899
930
  if getattr(self._sklearn_object, "classes_", None) is None:
900
- return []
931
+ return [output_cols_prefix]
901
932
 
902
933
  classes = self._sklearn_object.classes_
903
934
  if isinstance(classes, numpy.ndarray):
@@ -1122,7 +1153,7 @@ class ElasticNetCV(BaseTransformer):
1122
1153
  cp.dump(self._sklearn_object, local_score_file)
1123
1154
 
1124
1155
  # Create temp stage to run score.
1125
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1156
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1126
1157
  session = dataset._session
1127
1158
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1128
1159
  SqlResultValidator(
@@ -1136,8 +1167,9 @@ class ElasticNetCV(BaseTransformer):
1136
1167
  expected_value=f"Stage area {score_stage_name} successfully created."
1137
1168
  ).validate()
1138
1169
 
1139
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1140
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1170
+ # Use posixpath to construct stage paths
1171
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1172
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1141
1173
  statement_params = telemetry.get_function_usage_statement_params(
1142
1174
  project=_PROJECT,
1143
1175
  subproject=_SUBPROJECT,
@@ -1163,6 +1195,7 @@ class ElasticNetCV(BaseTransformer):
1163
1195
  replace=True,
1164
1196
  session=session,
1165
1197
  statement_params=statement_params,
1198
+ anonymous=True
1166
1199
  )
1167
1200
  def score_wrapper_sproc(
1168
1201
  session: Session,
@@ -1170,7 +1203,8 @@ class ElasticNetCV(BaseTransformer):
1170
1203
  stage_score_file_name: str,
1171
1204
  input_cols: List[str],
1172
1205
  label_cols: List[str],
1173
- sample_weight_col: Optional[str]
1206
+ sample_weight_col: Optional[str],
1207
+ statement_params: Dict[str, str]
1174
1208
  ) -> float:
1175
1209
  import cloudpickle as cp
1176
1210
  import numpy as np
@@ -1220,14 +1254,14 @@ class ElasticNetCV(BaseTransformer):
1220
1254
  api_calls=[Session.call],
1221
1255
  custom_tags=dict([("autogen", True)]),
1222
1256
  )
1223
- score = session.call(
1224
- score_sproc_name,
1257
+ score = score_wrapper_sproc(
1258
+ session,
1225
1259
  query,
1226
1260
  stage_score_file_name,
1227
1261
  identifier.get_unescaped_names(self.input_cols),
1228
1262
  identifier.get_unescaped_names(self.label_cols),
1229
1263
  identifier.get_unescaped_names(self.sample_weight_col),
1230
- statement_params=statement_params,
1264
+ statement_params,
1231
1265
  )
1232
1266
 
1233
1267
  cleanup_temp_files([local_score_file_name])
@@ -1245,18 +1279,20 @@ class ElasticNetCV(BaseTransformer):
1245
1279
  if self._sklearn_object._estimator_type == 'classifier':
1246
1280
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1247
1281
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1248
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1282
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1283
+ ([] if self._drop_input_cols else inputs) + outputs)
1249
1284
  # For regressor, the type of predict is float64
1250
1285
  elif self._sklearn_object._estimator_type == 'regressor':
1251
1286
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1252
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1253
-
1287
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1288
+ ([] if self._drop_input_cols else inputs) + outputs)
1254
1289
  for prob_func in PROB_FUNCTIONS:
1255
1290
  if hasattr(self, prob_func):
1256
1291
  output_cols_prefix: str = f"{prob_func}_"
1257
1292
  output_column_names = self._get_output_column_names(output_cols_prefix)
1258
1293
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1259
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1294
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1295
+ ([] if self._drop_input_cols else inputs) + outputs)
1260
1296
 
1261
1297
  @property
1262
1298
  def model_signatures(self) -> Dict[str, ModelSignature]: