snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -231,7 +233,6 @@ class MultiTaskElasticNet(BaseTransformer):
231
233
  sample_weight_col: Optional[str] = None,
232
234
  ) -> None:
233
235
  super().__init__()
234
- self.id = str(uuid4()).replace("-", "_").upper()
235
236
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
236
237
 
237
238
  self._deps = list(deps)
@@ -259,6 +260,15 @@ class MultiTaskElasticNet(BaseTransformer):
259
260
  self.set_drop_input_cols(drop_input_cols)
260
261
  self.set_sample_weight_col(sample_weight_col)
261
262
 
263
+ def _get_rand_id(self) -> str:
264
+ """
265
+ Generate random id to be used in sproc and stage names.
266
+
267
+ Returns:
268
+ Random id string usable in sproc, table, and stage names.
269
+ """
270
+ return str(uuid4()).replace("-", "_").upper()
271
+
262
272
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
263
273
  """
264
274
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -337,7 +347,7 @@ class MultiTaskElasticNet(BaseTransformer):
337
347
  cp.dump(self._sklearn_object, local_transform_file)
338
348
 
339
349
  # Create temp stage to run fit.
340
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
350
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
341
351
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
342
352
  SqlResultValidator(
343
353
  session=session,
@@ -350,11 +360,12 @@ class MultiTaskElasticNet(BaseTransformer):
350
360
  expected_value=f"Stage area {transform_stage_name} successfully created."
351
361
  ).validate()
352
362
 
353
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
363
+ # Use posixpath to construct stage paths
364
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
354
366
  local_result_file_name = get_temp_file_path()
355
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
356
367
 
357
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
368
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
358
369
  statement_params = telemetry.get_function_usage_statement_params(
359
370
  project=_PROJECT,
360
371
  subproject=_SUBPROJECT,
@@ -380,6 +391,7 @@ class MultiTaskElasticNet(BaseTransformer):
380
391
  replace=True,
381
392
  session=session,
382
393
  statement_params=statement_params,
394
+ anonymous=True
383
395
  )
384
396
  def fit_wrapper_sproc(
385
397
  session: Session,
@@ -388,7 +400,8 @@ class MultiTaskElasticNet(BaseTransformer):
388
400
  stage_result_file_name: str,
389
401
  input_cols: List[str],
390
402
  label_cols: List[str],
391
- sample_weight_col: Optional[str]
403
+ sample_weight_col: Optional[str],
404
+ statement_params: Dict[str, str]
392
405
  ) -> str:
393
406
  import cloudpickle as cp
394
407
  import numpy as np
@@ -455,15 +468,15 @@ class MultiTaskElasticNet(BaseTransformer):
455
468
  api_calls=[Session.call],
456
469
  custom_tags=dict([("autogen", True)]),
457
470
  )
458
- sproc_export_file_name = session.call(
459
- fit_sproc_name,
471
+ sproc_export_file_name = fit_wrapper_sproc(
472
+ session,
460
473
  query,
461
474
  stage_transform_file_name,
462
475
  stage_result_file_name,
463
476
  identifier.get_unescaped_names(self.input_cols),
464
477
  identifier.get_unescaped_names(self.label_cols),
465
478
  identifier.get_unescaped_names(self.sample_weight_col),
466
- statement_params=statement_params,
479
+ statement_params,
467
480
  )
468
481
 
469
482
  if "|" in sproc_export_file_name:
@@ -473,7 +486,7 @@ class MultiTaskElasticNet(BaseTransformer):
473
486
  print("\n".join(fields[1:]))
474
487
 
475
488
  session.file.get(
476
- os.path.join(stage_result_file_name, sproc_export_file_name),
489
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
477
490
  local_result_file_name,
478
491
  statement_params=statement_params
479
492
  )
@@ -519,7 +532,7 @@ class MultiTaskElasticNet(BaseTransformer):
519
532
 
520
533
  # Register vectorized UDF for batch inference
521
534
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
522
- safe_id=self.id, method=inference_method)
535
+ safe_id=self._get_rand_id(), method=inference_method)
523
536
 
524
537
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
525
538
  # will try to pickle all of self which fails.
@@ -611,7 +624,7 @@ class MultiTaskElasticNet(BaseTransformer):
611
624
  return transformed_pandas_df.to_dict("records")
612
625
 
613
626
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
614
- safe_id=self.id
627
+ safe_id=self._get_rand_id()
615
628
  )
616
629
 
617
630
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -667,26 +680,37 @@ class MultiTaskElasticNet(BaseTransformer):
667
680
  # input cols need to match unquoted / quoted
668
681
  input_cols = self.input_cols
669
682
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
683
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
670
684
 
671
685
  estimator = self._sklearn_object
672
686
 
673
- input_df = dataset[input_cols] # Select input columns with quoted column names.
674
- if hasattr(estimator, "feature_names_in_"):
675
- missing_features = []
676
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
677
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
678
- missing_features.append(f)
679
-
680
- if len(missing_features) > 0:
681
- raise ValueError(
682
- "The feature names should match with those that were passed during fit.\n"
683
- f"Features seen during fit call but not present in the input: {missing_features}\n"
684
- f"Features in the input dataframe : {input_cols}\n"
685
- )
686
- input_df.columns = getattr(estimator, "feature_names_in_")
687
- else:
688
- # Just rename the column names to unquoted identifiers.
689
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
687
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
688
+ missing_features = []
689
+ features_in_dataset = set(dataset.columns)
690
+ columns_to_select = []
691
+ for i, f in enumerate(features_required_by_estimator):
692
+ if (
693
+ i >= len(input_cols)
694
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
695
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
696
+ and quoted_input_cols[i] not in features_in_dataset)
697
+ ):
698
+ missing_features.append(f)
699
+ elif input_cols[i] in features_in_dataset:
700
+ columns_to_select.append(input_cols[i])
701
+ elif unquoted_input_cols[i] in features_in_dataset:
702
+ columns_to_select.append(unquoted_input_cols[i])
703
+ else:
704
+ columns_to_select.append(quoted_input_cols[i])
705
+
706
+ if len(missing_features) > 0:
707
+ raise ValueError(
708
+ "The feature names should match with those that were passed during fit.\n"
709
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
710
+ f"Features in the input dataframe : {input_cols}\n"
711
+ )
712
+ input_df = dataset[columns_to_select]
713
+ input_df.columns = features_required_by_estimator
690
714
 
691
715
  transformed_numpy_array = getattr(estimator, inference_method)(
692
716
  input_df
@@ -767,11 +791,18 @@ class MultiTaskElasticNet(BaseTransformer):
767
791
  Transformed dataset.
768
792
  """
769
793
  if isinstance(dataset, DataFrame):
794
+ expected_type_inferred = "float"
795
+ # when it is classifier, infer the datatype from label columns
796
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
797
+ expected_type_inferred = convert_sp_to_sf_type(
798
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
799
+ )
800
+
770
801
  output_df = self._batch_inference(
771
802
  dataset=dataset,
772
803
  inference_method="predict",
773
804
  expected_output_cols_list=self.output_cols,
774
- expected_output_cols_type="float",
805
+ expected_output_cols_type=expected_type_inferred,
775
806
  )
776
807
  elif isinstance(dataset, pd.DataFrame):
777
808
  output_df = self._sklearn_inference(
@@ -842,10 +873,10 @@ class MultiTaskElasticNet(BaseTransformer):
842
873
 
843
874
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
844
875
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
845
- Returns an empty list if current object is not a classifier or not yet fitted.
876
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
846
877
  """
847
878
  if getattr(self._sklearn_object, "classes_", None) is None:
848
- return []
879
+ return [output_cols_prefix]
849
880
 
850
881
  classes = self._sklearn_object.classes_
851
882
  if isinstance(classes, numpy.ndarray):
@@ -1070,7 +1101,7 @@ class MultiTaskElasticNet(BaseTransformer):
1070
1101
  cp.dump(self._sklearn_object, local_score_file)
1071
1102
 
1072
1103
  # Create temp stage to run score.
1073
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1104
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1074
1105
  session = dataset._session
1075
1106
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1076
1107
  SqlResultValidator(
@@ -1084,8 +1115,9 @@ class MultiTaskElasticNet(BaseTransformer):
1084
1115
  expected_value=f"Stage area {score_stage_name} successfully created."
1085
1116
  ).validate()
1086
1117
 
1087
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1088
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1118
+ # Use posixpath to construct stage paths
1119
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1120
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1089
1121
  statement_params = telemetry.get_function_usage_statement_params(
1090
1122
  project=_PROJECT,
1091
1123
  subproject=_SUBPROJECT,
@@ -1111,6 +1143,7 @@ class MultiTaskElasticNet(BaseTransformer):
1111
1143
  replace=True,
1112
1144
  session=session,
1113
1145
  statement_params=statement_params,
1146
+ anonymous=True
1114
1147
  )
1115
1148
  def score_wrapper_sproc(
1116
1149
  session: Session,
@@ -1118,7 +1151,8 @@ class MultiTaskElasticNet(BaseTransformer):
1118
1151
  stage_score_file_name: str,
1119
1152
  input_cols: List[str],
1120
1153
  label_cols: List[str],
1121
- sample_weight_col: Optional[str]
1154
+ sample_weight_col: Optional[str],
1155
+ statement_params: Dict[str, str]
1122
1156
  ) -> float:
1123
1157
  import cloudpickle as cp
1124
1158
  import numpy as np
@@ -1168,14 +1202,14 @@ class MultiTaskElasticNet(BaseTransformer):
1168
1202
  api_calls=[Session.call],
1169
1203
  custom_tags=dict([("autogen", True)]),
1170
1204
  )
1171
- score = session.call(
1172
- score_sproc_name,
1205
+ score = score_wrapper_sproc(
1206
+ session,
1173
1207
  query,
1174
1208
  stage_score_file_name,
1175
1209
  identifier.get_unescaped_names(self.input_cols),
1176
1210
  identifier.get_unescaped_names(self.label_cols),
1177
1211
  identifier.get_unescaped_names(self.sample_weight_col),
1178
- statement_params=statement_params,
1212
+ statement_params,
1179
1213
  )
1180
1214
 
1181
1215
  cleanup_temp_files([local_score_file_name])
@@ -1193,18 +1227,20 @@ class MultiTaskElasticNet(BaseTransformer):
1193
1227
  if self._sklearn_object._estimator_type == 'classifier':
1194
1228
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1195
1229
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1196
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1230
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1231
+ ([] if self._drop_input_cols else inputs) + outputs)
1197
1232
  # For regressor, the type of predict is float64
1198
1233
  elif self._sklearn_object._estimator_type == 'regressor':
1199
1234
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1200
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1201
-
1235
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1236
+ ([] if self._drop_input_cols else inputs) + outputs)
1202
1237
  for prob_func in PROB_FUNCTIONS:
1203
1238
  if hasattr(self, prob_func):
1204
1239
  output_cols_prefix: str = f"{prob_func}_"
1205
1240
  output_column_names = self._get_output_column_names(output_cols_prefix)
1206
1241
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1207
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1242
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1243
+ ([] if self._drop_input_cols else inputs) + outputs)
1208
1244
 
1209
1245
  @property
1210
1246
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -268,7 +270,6 @@ class MultiTaskElasticNetCV(BaseTransformer):
268
270
  sample_weight_col: Optional[str] = None,
269
271
  ) -> None:
270
272
  super().__init__()
271
- self.id = str(uuid4()).replace("-", "_").upper()
272
273
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
273
274
 
274
275
  self._deps = list(deps)
@@ -300,6 +301,15 @@ class MultiTaskElasticNetCV(BaseTransformer):
300
301
  self.set_drop_input_cols(drop_input_cols)
301
302
  self.set_sample_weight_col(sample_weight_col)
302
303
 
304
+ def _get_rand_id(self) -> str:
305
+ """
306
+ Generate random id to be used in sproc and stage names.
307
+
308
+ Returns:
309
+ Random id string usable in sproc, table, and stage names.
310
+ """
311
+ return str(uuid4()).replace("-", "_").upper()
312
+
303
313
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
304
314
  """
305
315
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -378,7 +388,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
378
388
  cp.dump(self._sklearn_object, local_transform_file)
379
389
 
380
390
  # Create temp stage to run fit.
381
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
391
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
382
392
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
383
393
  SqlResultValidator(
384
394
  session=session,
@@ -391,11 +401,12 @@ class MultiTaskElasticNetCV(BaseTransformer):
391
401
  expected_value=f"Stage area {transform_stage_name} successfully created."
392
402
  ).validate()
393
403
 
394
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
404
+ # Use posixpath to construct stage paths
405
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
406
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
395
407
  local_result_file_name = get_temp_file_path()
396
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
397
408
 
398
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
409
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
399
410
  statement_params = telemetry.get_function_usage_statement_params(
400
411
  project=_PROJECT,
401
412
  subproject=_SUBPROJECT,
@@ -421,6 +432,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
421
432
  replace=True,
422
433
  session=session,
423
434
  statement_params=statement_params,
435
+ anonymous=True
424
436
  )
425
437
  def fit_wrapper_sproc(
426
438
  session: Session,
@@ -429,7 +441,8 @@ class MultiTaskElasticNetCV(BaseTransformer):
429
441
  stage_result_file_name: str,
430
442
  input_cols: List[str],
431
443
  label_cols: List[str],
432
- sample_weight_col: Optional[str]
444
+ sample_weight_col: Optional[str],
445
+ statement_params: Dict[str, str]
433
446
  ) -> str:
434
447
  import cloudpickle as cp
435
448
  import numpy as np
@@ -496,15 +509,15 @@ class MultiTaskElasticNetCV(BaseTransformer):
496
509
  api_calls=[Session.call],
497
510
  custom_tags=dict([("autogen", True)]),
498
511
  )
499
- sproc_export_file_name = session.call(
500
- fit_sproc_name,
512
+ sproc_export_file_name = fit_wrapper_sproc(
513
+ session,
501
514
  query,
502
515
  stage_transform_file_name,
503
516
  stage_result_file_name,
504
517
  identifier.get_unescaped_names(self.input_cols),
505
518
  identifier.get_unescaped_names(self.label_cols),
506
519
  identifier.get_unescaped_names(self.sample_weight_col),
507
- statement_params=statement_params,
520
+ statement_params,
508
521
  )
509
522
 
510
523
  if "|" in sproc_export_file_name:
@@ -514,7 +527,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
514
527
  print("\n".join(fields[1:]))
515
528
 
516
529
  session.file.get(
517
- os.path.join(stage_result_file_name, sproc_export_file_name),
530
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
518
531
  local_result_file_name,
519
532
  statement_params=statement_params
520
533
  )
@@ -560,7 +573,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
560
573
 
561
574
  # Register vectorized UDF for batch inference
562
575
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
563
- safe_id=self.id, method=inference_method)
576
+ safe_id=self._get_rand_id(), method=inference_method)
564
577
 
565
578
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
566
579
  # will try to pickle all of self which fails.
@@ -652,7 +665,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
652
665
  return transformed_pandas_df.to_dict("records")
653
666
 
654
667
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
655
- safe_id=self.id
668
+ safe_id=self._get_rand_id()
656
669
  )
657
670
 
658
671
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -708,26 +721,37 @@ class MultiTaskElasticNetCV(BaseTransformer):
708
721
  # input cols need to match unquoted / quoted
709
722
  input_cols = self.input_cols
710
723
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
724
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
711
725
 
712
726
  estimator = self._sklearn_object
713
727
 
714
- input_df = dataset[input_cols] # Select input columns with quoted column names.
715
- if hasattr(estimator, "feature_names_in_"):
716
- missing_features = []
717
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
718
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
719
- missing_features.append(f)
720
-
721
- if len(missing_features) > 0:
722
- raise ValueError(
723
- "The feature names should match with those that were passed during fit.\n"
724
- f"Features seen during fit call but not present in the input: {missing_features}\n"
725
- f"Features in the input dataframe : {input_cols}\n"
726
- )
727
- input_df.columns = getattr(estimator, "feature_names_in_")
728
- else:
729
- # Just rename the column names to unquoted identifiers.
730
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
728
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
729
+ missing_features = []
730
+ features_in_dataset = set(dataset.columns)
731
+ columns_to_select = []
732
+ for i, f in enumerate(features_required_by_estimator):
733
+ if (
734
+ i >= len(input_cols)
735
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
736
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
737
+ and quoted_input_cols[i] not in features_in_dataset)
738
+ ):
739
+ missing_features.append(f)
740
+ elif input_cols[i] in features_in_dataset:
741
+ columns_to_select.append(input_cols[i])
742
+ elif unquoted_input_cols[i] in features_in_dataset:
743
+ columns_to_select.append(unquoted_input_cols[i])
744
+ else:
745
+ columns_to_select.append(quoted_input_cols[i])
746
+
747
+ if len(missing_features) > 0:
748
+ raise ValueError(
749
+ "The feature names should match with those that were passed during fit.\n"
750
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
751
+ f"Features in the input dataframe : {input_cols}\n"
752
+ )
753
+ input_df = dataset[columns_to_select]
754
+ input_df.columns = features_required_by_estimator
731
755
 
732
756
  transformed_numpy_array = getattr(estimator, inference_method)(
733
757
  input_df
@@ -808,11 +832,18 @@ class MultiTaskElasticNetCV(BaseTransformer):
808
832
  Transformed dataset.
809
833
  """
810
834
  if isinstance(dataset, DataFrame):
835
+ expected_type_inferred = "float"
836
+ # when it is classifier, infer the datatype from label columns
837
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
838
+ expected_type_inferred = convert_sp_to_sf_type(
839
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
840
+ )
841
+
811
842
  output_df = self._batch_inference(
812
843
  dataset=dataset,
813
844
  inference_method="predict",
814
845
  expected_output_cols_list=self.output_cols,
815
- expected_output_cols_type="float",
846
+ expected_output_cols_type=expected_type_inferred,
816
847
  )
817
848
  elif isinstance(dataset, pd.DataFrame):
818
849
  output_df = self._sklearn_inference(
@@ -883,10 +914,10 @@ class MultiTaskElasticNetCV(BaseTransformer):
883
914
 
884
915
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
885
916
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
886
- Returns an empty list if current object is not a classifier or not yet fitted.
917
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
887
918
  """
888
919
  if getattr(self._sklearn_object, "classes_", None) is None:
889
- return []
920
+ return [output_cols_prefix]
890
921
 
891
922
  classes = self._sklearn_object.classes_
892
923
  if isinstance(classes, numpy.ndarray):
@@ -1111,7 +1142,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
1111
1142
  cp.dump(self._sklearn_object, local_score_file)
1112
1143
 
1113
1144
  # Create temp stage to run score.
1114
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1145
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1115
1146
  session = dataset._session
1116
1147
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1117
1148
  SqlResultValidator(
@@ -1125,8 +1156,9 @@ class MultiTaskElasticNetCV(BaseTransformer):
1125
1156
  expected_value=f"Stage area {score_stage_name} successfully created."
1126
1157
  ).validate()
1127
1158
 
1128
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1129
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1159
+ # Use posixpath to construct stage paths
1160
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1161
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1130
1162
  statement_params = telemetry.get_function_usage_statement_params(
1131
1163
  project=_PROJECT,
1132
1164
  subproject=_SUBPROJECT,
@@ -1152,6 +1184,7 @@ class MultiTaskElasticNetCV(BaseTransformer):
1152
1184
  replace=True,
1153
1185
  session=session,
1154
1186
  statement_params=statement_params,
1187
+ anonymous=True
1155
1188
  )
1156
1189
  def score_wrapper_sproc(
1157
1190
  session: Session,
@@ -1159,7 +1192,8 @@ class MultiTaskElasticNetCV(BaseTransformer):
1159
1192
  stage_score_file_name: str,
1160
1193
  input_cols: List[str],
1161
1194
  label_cols: List[str],
1162
- sample_weight_col: Optional[str]
1195
+ sample_weight_col: Optional[str],
1196
+ statement_params: Dict[str, str]
1163
1197
  ) -> float:
1164
1198
  import cloudpickle as cp
1165
1199
  import numpy as np
@@ -1209,14 +1243,14 @@ class MultiTaskElasticNetCV(BaseTransformer):
1209
1243
  api_calls=[Session.call],
1210
1244
  custom_tags=dict([("autogen", True)]),
1211
1245
  )
1212
- score = session.call(
1213
- score_sproc_name,
1246
+ score = score_wrapper_sproc(
1247
+ session,
1214
1248
  query,
1215
1249
  stage_score_file_name,
1216
1250
  identifier.get_unescaped_names(self.input_cols),
1217
1251
  identifier.get_unescaped_names(self.label_cols),
1218
1252
  identifier.get_unescaped_names(self.sample_weight_col),
1219
- statement_params=statement_params,
1253
+ statement_params,
1220
1254
  )
1221
1255
 
1222
1256
  cleanup_temp_files([local_score_file_name])
@@ -1234,18 +1268,20 @@ class MultiTaskElasticNetCV(BaseTransformer):
1234
1268
  if self._sklearn_object._estimator_type == 'classifier':
1235
1269
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1236
1270
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1237
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1271
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1272
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1273
  # For regressor, the type of predict is float64
1239
1274
  elif self._sklearn_object._estimator_type == 'regressor':
1240
1275
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1241
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1242
-
1276
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1277
+ ([] if self._drop_input_cols else inputs) + outputs)
1243
1278
  for prob_func in PROB_FUNCTIONS:
1244
1279
  if hasattr(self, prob_func):
1245
1280
  output_cols_prefix: str = f"{prob_func}_"
1246
1281
  output_column_names = self._get_output_column_names(output_cols_prefix)
1247
1282
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1248
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1283
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1284
+ ([] if self._drop_input_cols else inputs) + outputs)
1249
1285
 
1250
1286
  @property
1251
1287
  def model_signatures(self) -> Dict[str, ModelSignature]: