snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -258,7 +260,6 @@ class LassoLars(BaseTransformer):
258
260
  sample_weight_col: Optional[str] = None,
259
261
  ) -> None:
260
262
  super().__init__()
261
- self.id = str(uuid4()).replace("-", "_").upper()
262
263
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
263
264
 
264
265
  self._deps = list(deps)
@@ -289,6 +290,15 @@ class LassoLars(BaseTransformer):
289
290
  self.set_drop_input_cols(drop_input_cols)
290
291
  self.set_sample_weight_col(sample_weight_col)
291
292
 
293
+ def _get_rand_id(self) -> str:
294
+ """
295
+ Generate random id to be used in sproc and stage names.
296
+
297
+ Returns:
298
+ Random id string usable in sproc, table, and stage names.
299
+ """
300
+ return str(uuid4()).replace("-", "_").upper()
301
+
292
302
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
293
303
  """
294
304
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -367,7 +377,7 @@ class LassoLars(BaseTransformer):
367
377
  cp.dump(self._sklearn_object, local_transform_file)
368
378
 
369
379
  # Create temp stage to run fit.
370
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
380
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
371
381
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
372
382
  SqlResultValidator(
373
383
  session=session,
@@ -380,11 +390,12 @@ class LassoLars(BaseTransformer):
380
390
  expected_value=f"Stage area {transform_stage_name} successfully created."
381
391
  ).validate()
382
392
 
383
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
393
+ # Use posixpath to construct stage paths
394
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
395
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
384
396
  local_result_file_name = get_temp_file_path()
385
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
386
397
 
387
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
398
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
388
399
  statement_params = telemetry.get_function_usage_statement_params(
389
400
  project=_PROJECT,
390
401
  subproject=_SUBPROJECT,
@@ -410,6 +421,7 @@ class LassoLars(BaseTransformer):
410
421
  replace=True,
411
422
  session=session,
412
423
  statement_params=statement_params,
424
+ anonymous=True
413
425
  )
414
426
  def fit_wrapper_sproc(
415
427
  session: Session,
@@ -418,7 +430,8 @@ class LassoLars(BaseTransformer):
418
430
  stage_result_file_name: str,
419
431
  input_cols: List[str],
420
432
  label_cols: List[str],
421
- sample_weight_col: Optional[str]
433
+ sample_weight_col: Optional[str],
434
+ statement_params: Dict[str, str]
422
435
  ) -> str:
423
436
  import cloudpickle as cp
424
437
  import numpy as np
@@ -485,15 +498,15 @@ class LassoLars(BaseTransformer):
485
498
  api_calls=[Session.call],
486
499
  custom_tags=dict([("autogen", True)]),
487
500
  )
488
- sproc_export_file_name = session.call(
489
- fit_sproc_name,
501
+ sproc_export_file_name = fit_wrapper_sproc(
502
+ session,
490
503
  query,
491
504
  stage_transform_file_name,
492
505
  stage_result_file_name,
493
506
  identifier.get_unescaped_names(self.input_cols),
494
507
  identifier.get_unescaped_names(self.label_cols),
495
508
  identifier.get_unescaped_names(self.sample_weight_col),
496
- statement_params=statement_params,
509
+ statement_params,
497
510
  )
498
511
 
499
512
  if "|" in sproc_export_file_name:
@@ -503,7 +516,7 @@ class LassoLars(BaseTransformer):
503
516
  print("\n".join(fields[1:]))
504
517
 
505
518
  session.file.get(
506
- os.path.join(stage_result_file_name, sproc_export_file_name),
519
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
507
520
  local_result_file_name,
508
521
  statement_params=statement_params
509
522
  )
@@ -549,7 +562,7 @@ class LassoLars(BaseTransformer):
549
562
 
550
563
  # Register vectorized UDF for batch inference
551
564
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
552
- safe_id=self.id, method=inference_method)
565
+ safe_id=self._get_rand_id(), method=inference_method)
553
566
 
554
567
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
555
568
  # will try to pickle all of self which fails.
@@ -641,7 +654,7 @@ class LassoLars(BaseTransformer):
641
654
  return transformed_pandas_df.to_dict("records")
642
655
 
643
656
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
644
- safe_id=self.id
657
+ safe_id=self._get_rand_id()
645
658
  )
646
659
 
647
660
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -697,26 +710,37 @@ class LassoLars(BaseTransformer):
697
710
  # input cols need to match unquoted / quoted
698
711
  input_cols = self.input_cols
699
712
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
713
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
700
714
 
701
715
  estimator = self._sklearn_object
702
716
 
703
- input_df = dataset[input_cols] # Select input columns with quoted column names.
704
- if hasattr(estimator, "feature_names_in_"):
705
- missing_features = []
706
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
707
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
708
- missing_features.append(f)
709
-
710
- if len(missing_features) > 0:
711
- raise ValueError(
712
- "The feature names should match with those that were passed during fit.\n"
713
- f"Features seen during fit call but not present in the input: {missing_features}\n"
714
- f"Features in the input dataframe : {input_cols}\n"
715
- )
716
- input_df.columns = getattr(estimator, "feature_names_in_")
717
- else:
718
- # Just rename the column names to unquoted identifiers.
719
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
717
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
718
+ missing_features = []
719
+ features_in_dataset = set(dataset.columns)
720
+ columns_to_select = []
721
+ for i, f in enumerate(features_required_by_estimator):
722
+ if (
723
+ i >= len(input_cols)
724
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
725
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
726
+ and quoted_input_cols[i] not in features_in_dataset)
727
+ ):
728
+ missing_features.append(f)
729
+ elif input_cols[i] in features_in_dataset:
730
+ columns_to_select.append(input_cols[i])
731
+ elif unquoted_input_cols[i] in features_in_dataset:
732
+ columns_to_select.append(unquoted_input_cols[i])
733
+ else:
734
+ columns_to_select.append(quoted_input_cols[i])
735
+
736
+ if len(missing_features) > 0:
737
+ raise ValueError(
738
+ "The feature names should match with those that were passed during fit.\n"
739
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
740
+ f"Features in the input dataframe : {input_cols}\n"
741
+ )
742
+ input_df = dataset[columns_to_select]
743
+ input_df.columns = features_required_by_estimator
720
744
 
721
745
  transformed_numpy_array = getattr(estimator, inference_method)(
722
746
  input_df
@@ -797,11 +821,18 @@ class LassoLars(BaseTransformer):
797
821
  Transformed dataset.
798
822
  """
799
823
  if isinstance(dataset, DataFrame):
824
+ expected_type_inferred = "float"
825
+ # when it is classifier, infer the datatype from label columns
826
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
827
+ expected_type_inferred = convert_sp_to_sf_type(
828
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
829
+ )
830
+
800
831
  output_df = self._batch_inference(
801
832
  dataset=dataset,
802
833
  inference_method="predict",
803
834
  expected_output_cols_list=self.output_cols,
804
- expected_output_cols_type="float",
835
+ expected_output_cols_type=expected_type_inferred,
805
836
  )
806
837
  elif isinstance(dataset, pd.DataFrame):
807
838
  output_df = self._sklearn_inference(
@@ -872,10 +903,10 @@ class LassoLars(BaseTransformer):
872
903
 
873
904
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
874
905
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
875
- Returns an empty list if current object is not a classifier or not yet fitted.
906
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
876
907
  """
877
908
  if getattr(self._sklearn_object, "classes_", None) is None:
878
- return []
909
+ return [output_cols_prefix]
879
910
 
880
911
  classes = self._sklearn_object.classes_
881
912
  if isinstance(classes, numpy.ndarray):
@@ -1100,7 +1131,7 @@ class LassoLars(BaseTransformer):
1100
1131
  cp.dump(self._sklearn_object, local_score_file)
1101
1132
 
1102
1133
  # Create temp stage to run score.
1103
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1134
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1104
1135
  session = dataset._session
1105
1136
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1106
1137
  SqlResultValidator(
@@ -1114,8 +1145,9 @@ class LassoLars(BaseTransformer):
1114
1145
  expected_value=f"Stage area {score_stage_name} successfully created."
1115
1146
  ).validate()
1116
1147
 
1117
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1118
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1148
+ # Use posixpath to construct stage paths
1149
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1150
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1119
1151
  statement_params = telemetry.get_function_usage_statement_params(
1120
1152
  project=_PROJECT,
1121
1153
  subproject=_SUBPROJECT,
@@ -1141,6 +1173,7 @@ class LassoLars(BaseTransformer):
1141
1173
  replace=True,
1142
1174
  session=session,
1143
1175
  statement_params=statement_params,
1176
+ anonymous=True
1144
1177
  )
1145
1178
  def score_wrapper_sproc(
1146
1179
  session: Session,
@@ -1148,7 +1181,8 @@ class LassoLars(BaseTransformer):
1148
1181
  stage_score_file_name: str,
1149
1182
  input_cols: List[str],
1150
1183
  label_cols: List[str],
1151
- sample_weight_col: Optional[str]
1184
+ sample_weight_col: Optional[str],
1185
+ statement_params: Dict[str, str]
1152
1186
  ) -> float:
1153
1187
  import cloudpickle as cp
1154
1188
  import numpy as np
@@ -1198,14 +1232,14 @@ class LassoLars(BaseTransformer):
1198
1232
  api_calls=[Session.call],
1199
1233
  custom_tags=dict([("autogen", True)]),
1200
1234
  )
1201
- score = session.call(
1202
- score_sproc_name,
1235
+ score = score_wrapper_sproc(
1236
+ session,
1203
1237
  query,
1204
1238
  stage_score_file_name,
1205
1239
  identifier.get_unescaped_names(self.input_cols),
1206
1240
  identifier.get_unescaped_names(self.label_cols),
1207
1241
  identifier.get_unescaped_names(self.sample_weight_col),
1208
- statement_params=statement_params,
1242
+ statement_params,
1209
1243
  )
1210
1244
 
1211
1245
  cleanup_temp_files([local_score_file_name])
@@ -1223,18 +1257,20 @@ class LassoLars(BaseTransformer):
1223
1257
  if self._sklearn_object._estimator_type == 'classifier':
1224
1258
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1225
1259
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1226
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1260
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1261
+ ([] if self._drop_input_cols else inputs) + outputs)
1227
1262
  # For regressor, the type of predict is float64
1228
1263
  elif self._sklearn_object._estimator_type == 'regressor':
1229
1264
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1230
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1231
-
1265
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1266
+ ([] if self._drop_input_cols else inputs) + outputs)
1232
1267
  for prob_func in PROB_FUNCTIONS:
1233
1268
  if hasattr(self, prob_func):
1234
1269
  output_cols_prefix: str = f"{prob_func}_"
1235
1270
  output_column_names = self._get_output_column_names(output_cols_prefix)
1236
1271
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1237
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1272
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1273
+ ([] if self._drop_input_cols else inputs) + outputs)
1238
1274
 
1239
1275
  @property
1240
1276
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -260,7 +262,6 @@ class LassoLarsCV(BaseTransformer):
260
262
  sample_weight_col: Optional[str] = None,
261
263
  ) -> None:
262
264
  super().__init__()
263
- self.id = str(uuid4()).replace("-", "_").upper()
264
265
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
265
266
 
266
267
  self._deps = list(deps)
@@ -290,6 +291,15 @@ class LassoLarsCV(BaseTransformer):
290
291
  self.set_drop_input_cols(drop_input_cols)
291
292
  self.set_sample_weight_col(sample_weight_col)
292
293
 
294
+ def _get_rand_id(self) -> str:
295
+ """
296
+ Generate random id to be used in sproc and stage names.
297
+
298
+ Returns:
299
+ Random id string usable in sproc, table, and stage names.
300
+ """
301
+ return str(uuid4()).replace("-", "_").upper()
302
+
293
303
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
294
304
  """
295
305
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -368,7 +378,7 @@ class LassoLarsCV(BaseTransformer):
368
378
  cp.dump(self._sklearn_object, local_transform_file)
369
379
 
370
380
  # Create temp stage to run fit.
371
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
381
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
372
382
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
373
383
  SqlResultValidator(
374
384
  session=session,
@@ -381,11 +391,12 @@ class LassoLarsCV(BaseTransformer):
381
391
  expected_value=f"Stage area {transform_stage_name} successfully created."
382
392
  ).validate()
383
393
 
384
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
394
+ # Use posixpath to construct stage paths
395
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
396
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
385
397
  local_result_file_name = get_temp_file_path()
386
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
387
398
 
388
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
399
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
389
400
  statement_params = telemetry.get_function_usage_statement_params(
390
401
  project=_PROJECT,
391
402
  subproject=_SUBPROJECT,
@@ -411,6 +422,7 @@ class LassoLarsCV(BaseTransformer):
411
422
  replace=True,
412
423
  session=session,
413
424
  statement_params=statement_params,
425
+ anonymous=True
414
426
  )
415
427
  def fit_wrapper_sproc(
416
428
  session: Session,
@@ -419,7 +431,8 @@ class LassoLarsCV(BaseTransformer):
419
431
  stage_result_file_name: str,
420
432
  input_cols: List[str],
421
433
  label_cols: List[str],
422
- sample_weight_col: Optional[str]
434
+ sample_weight_col: Optional[str],
435
+ statement_params: Dict[str, str]
423
436
  ) -> str:
424
437
  import cloudpickle as cp
425
438
  import numpy as np
@@ -486,15 +499,15 @@ class LassoLarsCV(BaseTransformer):
486
499
  api_calls=[Session.call],
487
500
  custom_tags=dict([("autogen", True)]),
488
501
  )
489
- sproc_export_file_name = session.call(
490
- fit_sproc_name,
502
+ sproc_export_file_name = fit_wrapper_sproc(
503
+ session,
491
504
  query,
492
505
  stage_transform_file_name,
493
506
  stage_result_file_name,
494
507
  identifier.get_unescaped_names(self.input_cols),
495
508
  identifier.get_unescaped_names(self.label_cols),
496
509
  identifier.get_unescaped_names(self.sample_weight_col),
497
- statement_params=statement_params,
510
+ statement_params,
498
511
  )
499
512
 
500
513
  if "|" in sproc_export_file_name:
@@ -504,7 +517,7 @@ class LassoLarsCV(BaseTransformer):
504
517
  print("\n".join(fields[1:]))
505
518
 
506
519
  session.file.get(
507
- os.path.join(stage_result_file_name, sproc_export_file_name),
520
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
508
521
  local_result_file_name,
509
522
  statement_params=statement_params
510
523
  )
@@ -550,7 +563,7 @@ class LassoLarsCV(BaseTransformer):
550
563
 
551
564
  # Register vectorized UDF for batch inference
552
565
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
553
- safe_id=self.id, method=inference_method)
566
+ safe_id=self._get_rand_id(), method=inference_method)
554
567
 
555
568
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
556
569
  # will try to pickle all of self which fails.
@@ -642,7 +655,7 @@ class LassoLarsCV(BaseTransformer):
642
655
  return transformed_pandas_df.to_dict("records")
643
656
 
644
657
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
645
- safe_id=self.id
658
+ safe_id=self._get_rand_id()
646
659
  )
647
660
 
648
661
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -698,26 +711,37 @@ class LassoLarsCV(BaseTransformer):
698
711
  # input cols need to match unquoted / quoted
699
712
  input_cols = self.input_cols
700
713
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
714
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
701
715
 
702
716
  estimator = self._sklearn_object
703
717
 
704
- input_df = dataset[input_cols] # Select input columns with quoted column names.
705
- if hasattr(estimator, "feature_names_in_"):
706
- missing_features = []
707
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
708
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
709
- missing_features.append(f)
710
-
711
- if len(missing_features) > 0:
712
- raise ValueError(
713
- "The feature names should match with those that were passed during fit.\n"
714
- f"Features seen during fit call but not present in the input: {missing_features}\n"
715
- f"Features in the input dataframe : {input_cols}\n"
716
- )
717
- input_df.columns = getattr(estimator, "feature_names_in_")
718
- else:
719
- # Just rename the column names to unquoted identifiers.
720
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
718
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
719
+ missing_features = []
720
+ features_in_dataset = set(dataset.columns)
721
+ columns_to_select = []
722
+ for i, f in enumerate(features_required_by_estimator):
723
+ if (
724
+ i >= len(input_cols)
725
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
726
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
727
+ and quoted_input_cols[i] not in features_in_dataset)
728
+ ):
729
+ missing_features.append(f)
730
+ elif input_cols[i] in features_in_dataset:
731
+ columns_to_select.append(input_cols[i])
732
+ elif unquoted_input_cols[i] in features_in_dataset:
733
+ columns_to_select.append(unquoted_input_cols[i])
734
+ else:
735
+ columns_to_select.append(quoted_input_cols[i])
736
+
737
+ if len(missing_features) > 0:
738
+ raise ValueError(
739
+ "The feature names should match with those that were passed during fit.\n"
740
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
741
+ f"Features in the input dataframe : {input_cols}\n"
742
+ )
743
+ input_df = dataset[columns_to_select]
744
+ input_df.columns = features_required_by_estimator
721
745
 
722
746
  transformed_numpy_array = getattr(estimator, inference_method)(
723
747
  input_df
@@ -798,11 +822,18 @@ class LassoLarsCV(BaseTransformer):
798
822
  Transformed dataset.
799
823
  """
800
824
  if isinstance(dataset, DataFrame):
825
+ expected_type_inferred = "float"
826
+ # when it is classifier, infer the datatype from label columns
827
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
828
+ expected_type_inferred = convert_sp_to_sf_type(
829
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
830
+ )
831
+
801
832
  output_df = self._batch_inference(
802
833
  dataset=dataset,
803
834
  inference_method="predict",
804
835
  expected_output_cols_list=self.output_cols,
805
- expected_output_cols_type="float",
836
+ expected_output_cols_type=expected_type_inferred,
806
837
  )
807
838
  elif isinstance(dataset, pd.DataFrame):
808
839
  output_df = self._sklearn_inference(
@@ -873,10 +904,10 @@ class LassoLarsCV(BaseTransformer):
873
904
 
874
905
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
875
906
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
876
- Returns an empty list if current object is not a classifier or not yet fitted.
907
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
877
908
  """
878
909
  if getattr(self._sklearn_object, "classes_", None) is None:
879
- return []
910
+ return [output_cols_prefix]
880
911
 
881
912
  classes = self._sklearn_object.classes_
882
913
  if isinstance(classes, numpy.ndarray):
@@ -1101,7 +1132,7 @@ class LassoLarsCV(BaseTransformer):
1101
1132
  cp.dump(self._sklearn_object, local_score_file)
1102
1133
 
1103
1134
  # Create temp stage to run score.
1104
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1135
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1105
1136
  session = dataset._session
1106
1137
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1107
1138
  SqlResultValidator(
@@ -1115,8 +1146,9 @@ class LassoLarsCV(BaseTransformer):
1115
1146
  expected_value=f"Stage area {score_stage_name} successfully created."
1116
1147
  ).validate()
1117
1148
 
1118
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1119
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1149
+ # Use posixpath to construct stage paths
1150
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1151
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1120
1152
  statement_params = telemetry.get_function_usage_statement_params(
1121
1153
  project=_PROJECT,
1122
1154
  subproject=_SUBPROJECT,
@@ -1142,6 +1174,7 @@ class LassoLarsCV(BaseTransformer):
1142
1174
  replace=True,
1143
1175
  session=session,
1144
1176
  statement_params=statement_params,
1177
+ anonymous=True
1145
1178
  )
1146
1179
  def score_wrapper_sproc(
1147
1180
  session: Session,
@@ -1149,7 +1182,8 @@ class LassoLarsCV(BaseTransformer):
1149
1182
  stage_score_file_name: str,
1150
1183
  input_cols: List[str],
1151
1184
  label_cols: List[str],
1152
- sample_weight_col: Optional[str]
1185
+ sample_weight_col: Optional[str],
1186
+ statement_params: Dict[str, str]
1153
1187
  ) -> float:
1154
1188
  import cloudpickle as cp
1155
1189
  import numpy as np
@@ -1199,14 +1233,14 @@ class LassoLarsCV(BaseTransformer):
1199
1233
  api_calls=[Session.call],
1200
1234
  custom_tags=dict([("autogen", True)]),
1201
1235
  )
1202
- score = session.call(
1203
- score_sproc_name,
1236
+ score = score_wrapper_sproc(
1237
+ session,
1204
1238
  query,
1205
1239
  stage_score_file_name,
1206
1240
  identifier.get_unescaped_names(self.input_cols),
1207
1241
  identifier.get_unescaped_names(self.label_cols),
1208
1242
  identifier.get_unescaped_names(self.sample_weight_col),
1209
- statement_params=statement_params,
1243
+ statement_params,
1210
1244
  )
1211
1245
 
1212
1246
  cleanup_temp_files([local_score_file_name])
@@ -1224,18 +1258,20 @@ class LassoLarsCV(BaseTransformer):
1224
1258
  if self._sklearn_object._estimator_type == 'classifier':
1225
1259
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1226
1260
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1227
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1261
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1262
+ ([] if self._drop_input_cols else inputs) + outputs)
1228
1263
  # For regressor, the type of predict is float64
1229
1264
  elif self._sklearn_object._estimator_type == 'regressor':
1230
1265
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1231
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1232
-
1266
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1267
+ ([] if self._drop_input_cols else inputs) + outputs)
1233
1268
  for prob_func in PROB_FUNCTIONS:
1234
1269
  if hasattr(self, prob_func):
1235
1270
  output_cols_prefix: str = f"{prob_func}_"
1236
1271
  output_column_names = self._get_output_column_names(output_cols_prefix)
1237
1272
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1238
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1273
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1274
+ ([] if self._drop_input_cols else inputs) + outputs)
1239
1275
 
1240
1276
  @property
1241
1277
  def model_signatures(self) -> Dict[str, ModelSignature]: