snowflake-ml-python 1.0.1__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +35 -40
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/identifier.py +74 -7
  5. snowflake/ml/_internal/utils/uri.py +7 -2
  6. snowflake/ml/model/_core_requirements.py +1 -1
  7. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  8. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  9. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  10. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  11. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  12. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  13. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  14. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  15. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  16. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  17. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  18. snowflake/ml/model/_deploy_client/warehouse/deploy.py +25 -28
  19. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +7 -4
  20. snowflake/ml/model/_deployer.py +14 -27
  21. snowflake/ml/model/_env.py +4 -4
  22. snowflake/ml/model/_handlers/_base.py +3 -1
  23. snowflake/ml/model/_handlers/custom.py +14 -2
  24. snowflake/ml/model/_handlers/pytorch.py +186 -0
  25. snowflake/ml/model/_handlers/sklearn.py +14 -8
  26. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  27. snowflake/ml/model/_handlers/torchscript.py +180 -0
  28. snowflake/ml/model/_handlers/xgboost.py +19 -9
  29. snowflake/ml/model/_model.py +27 -21
  30. snowflake/ml/model/_model_meta.py +33 -19
  31. snowflake/ml/model/model_signature.py +446 -66
  32. snowflake/ml/model/type_hints.py +28 -15
  33. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +79 -43
  34. snowflake/ml/modeling/cluster/affinity_propagation.py +79 -43
  35. snowflake/ml/modeling/cluster/agglomerative_clustering.py +79 -43
  36. snowflake/ml/modeling/cluster/birch.py +79 -43
  37. snowflake/ml/modeling/cluster/bisecting_k_means.py +79 -43
  38. snowflake/ml/modeling/cluster/dbscan.py +79 -43
  39. snowflake/ml/modeling/cluster/feature_agglomeration.py +79 -43
  40. snowflake/ml/modeling/cluster/k_means.py +79 -43
  41. snowflake/ml/modeling/cluster/mean_shift.py +79 -43
  42. snowflake/ml/modeling/cluster/mini_batch_k_means.py +79 -43
  43. snowflake/ml/modeling/cluster/optics.py +79 -43
  44. snowflake/ml/modeling/cluster/spectral_biclustering.py +79 -43
  45. snowflake/ml/modeling/cluster/spectral_clustering.py +79 -43
  46. snowflake/ml/modeling/cluster/spectral_coclustering.py +79 -43
  47. snowflake/ml/modeling/compose/column_transformer.py +79 -43
  48. snowflake/ml/modeling/compose/transformed_target_regressor.py +79 -43
  49. snowflake/ml/modeling/covariance/elliptic_envelope.py +79 -43
  50. snowflake/ml/modeling/covariance/empirical_covariance.py +79 -43
  51. snowflake/ml/modeling/covariance/graphical_lasso.py +79 -43
  52. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +79 -43
  53. snowflake/ml/modeling/covariance/ledoit_wolf.py +79 -43
  54. snowflake/ml/modeling/covariance/min_cov_det.py +79 -43
  55. snowflake/ml/modeling/covariance/oas.py +79 -43
  56. snowflake/ml/modeling/covariance/shrunk_covariance.py +79 -43
  57. snowflake/ml/modeling/decomposition/dictionary_learning.py +79 -43
  58. snowflake/ml/modeling/decomposition/factor_analysis.py +79 -43
  59. snowflake/ml/modeling/decomposition/fast_ica.py +79 -43
  60. snowflake/ml/modeling/decomposition/incremental_pca.py +79 -43
  61. snowflake/ml/modeling/decomposition/kernel_pca.py +79 -43
  62. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +79 -43
  63. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +79 -43
  64. snowflake/ml/modeling/decomposition/pca.py +79 -43
  65. snowflake/ml/modeling/decomposition/sparse_pca.py +79 -43
  66. snowflake/ml/modeling/decomposition/truncated_svd.py +79 -43
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +79 -43
  68. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +79 -43
  69. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +79 -43
  70. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +79 -43
  71. snowflake/ml/modeling/ensemble/bagging_classifier.py +79 -43
  72. snowflake/ml/modeling/ensemble/bagging_regressor.py +79 -43
  73. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +79 -43
  74. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +79 -43
  75. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +79 -43
  76. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +79 -43
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +79 -43
  78. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +79 -43
  79. snowflake/ml/modeling/ensemble/isolation_forest.py +79 -43
  80. snowflake/ml/modeling/ensemble/random_forest_classifier.py +79 -43
  81. snowflake/ml/modeling/ensemble/random_forest_regressor.py +79 -43
  82. snowflake/ml/modeling/ensemble/stacking_regressor.py +79 -43
  83. snowflake/ml/modeling/ensemble/voting_classifier.py +79 -43
  84. snowflake/ml/modeling/ensemble/voting_regressor.py +79 -43
  85. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +79 -43
  86. snowflake/ml/modeling/feature_selection/select_fdr.py +79 -43
  87. snowflake/ml/modeling/feature_selection/select_fpr.py +79 -43
  88. snowflake/ml/modeling/feature_selection/select_fwe.py +79 -43
  89. snowflake/ml/modeling/feature_selection/select_k_best.py +79 -43
  90. snowflake/ml/modeling/feature_selection/select_percentile.py +79 -43
  91. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +79 -43
  92. snowflake/ml/modeling/feature_selection/variance_threshold.py +79 -43
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +79 -43
  94. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +79 -43
  95. snowflake/ml/modeling/impute/iterative_imputer.py +79 -43
  96. snowflake/ml/modeling/impute/knn_imputer.py +79 -43
  97. snowflake/ml/modeling/impute/missing_indicator.py +79 -43
  98. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +79 -43
  99. snowflake/ml/modeling/kernel_approximation/nystroem.py +79 -43
  100. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +79 -43
  101. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +79 -43
  102. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +79 -43
  103. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +79 -43
  104. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +79 -43
  105. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +79 -43
  106. snowflake/ml/modeling/linear_model/ard_regression.py +79 -43
  107. snowflake/ml/modeling/linear_model/bayesian_ridge.py +79 -43
  108. snowflake/ml/modeling/linear_model/elastic_net.py +79 -43
  109. snowflake/ml/modeling/linear_model/elastic_net_cv.py +79 -43
  110. snowflake/ml/modeling/linear_model/gamma_regressor.py +79 -43
  111. snowflake/ml/modeling/linear_model/huber_regressor.py +79 -43
  112. snowflake/ml/modeling/linear_model/lars.py +79 -43
  113. snowflake/ml/modeling/linear_model/lars_cv.py +79 -43
  114. snowflake/ml/modeling/linear_model/lasso.py +79 -43
  115. snowflake/ml/modeling/linear_model/lasso_cv.py +79 -43
  116. snowflake/ml/modeling/linear_model/lasso_lars.py +79 -43
  117. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +79 -43
  118. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +79 -43
  119. snowflake/ml/modeling/linear_model/linear_regression.py +79 -43
  120. snowflake/ml/modeling/linear_model/logistic_regression.py +79 -43
  121. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +79 -43
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +79 -43
  123. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +79 -43
  124. snowflake/ml/modeling/linear_model/multi_task_lasso.py +79 -43
  125. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +79 -43
  126. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +79 -43
  127. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +79 -43
  128. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +79 -43
  129. snowflake/ml/modeling/linear_model/perceptron.py +79 -43
  130. snowflake/ml/modeling/linear_model/poisson_regressor.py +79 -43
  131. snowflake/ml/modeling/linear_model/ransac_regressor.py +79 -43
  132. snowflake/ml/modeling/linear_model/ridge.py +79 -43
  133. snowflake/ml/modeling/linear_model/ridge_classifier.py +79 -43
  134. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +79 -43
  135. snowflake/ml/modeling/linear_model/ridge_cv.py +79 -43
  136. snowflake/ml/modeling/linear_model/sgd_classifier.py +79 -43
  137. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +79 -43
  138. snowflake/ml/modeling/linear_model/sgd_regressor.py +79 -43
  139. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +79 -43
  140. snowflake/ml/modeling/linear_model/tweedie_regressor.py +79 -43
  141. snowflake/ml/modeling/manifold/isomap.py +79 -43
  142. snowflake/ml/modeling/manifold/mds.py +79 -43
  143. snowflake/ml/modeling/manifold/spectral_embedding.py +79 -43
  144. snowflake/ml/modeling/manifold/tsne.py +79 -43
  145. snowflake/ml/modeling/metrics/classification.py +6 -1
  146. snowflake/ml/modeling/metrics/regression.py +517 -9
  147. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +79 -43
  148. snowflake/ml/modeling/mixture/gaussian_mixture.py +79 -43
  149. snowflake/ml/modeling/model_selection/grid_search_cv.py +79 -43
  150. snowflake/ml/modeling/model_selection/randomized_search_cv.py +79 -43
  151. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +79 -43
  152. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +79 -43
  153. snowflake/ml/modeling/multiclass/output_code_classifier.py +79 -43
  154. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +79 -43
  155. snowflake/ml/modeling/naive_bayes/categorical_nb.py +79 -43
  156. snowflake/ml/modeling/naive_bayes/complement_nb.py +79 -43
  157. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +79 -43
  158. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +79 -43
  159. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +79 -43
  160. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +79 -43
  161. snowflake/ml/modeling/neighbors/kernel_density.py +79 -43
  162. snowflake/ml/modeling/neighbors/local_outlier_factor.py +79 -43
  163. snowflake/ml/modeling/neighbors/nearest_centroid.py +79 -43
  164. snowflake/ml/modeling/neighbors/nearest_neighbors.py +79 -43
  165. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +79 -43
  166. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +79 -43
  167. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +79 -43
  168. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +79 -43
  169. snowflake/ml/modeling/neural_network/mlp_classifier.py +79 -43
  170. snowflake/ml/modeling/neural_network/mlp_regressor.py +79 -43
  171. snowflake/ml/modeling/pipeline/pipeline.py +24 -0
  172. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +18 -19
  173. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  174. snowflake/ml/modeling/preprocessing/polynomial_features.py +79 -43
  175. snowflake/ml/modeling/semi_supervised/label_propagation.py +79 -43
  176. snowflake/ml/modeling/semi_supervised/label_spreading.py +79 -43
  177. snowflake/ml/modeling/svm/linear_svc.py +79 -43
  178. snowflake/ml/modeling/svm/linear_svr.py +79 -43
  179. snowflake/ml/modeling/svm/nu_svc.py +79 -43
  180. snowflake/ml/modeling/svm/nu_svr.py +79 -43
  181. snowflake/ml/modeling/svm/svc.py +79 -43
  182. snowflake/ml/modeling/svm/svr.py +79 -43
  183. snowflake/ml/modeling/tree/decision_tree_classifier.py +79 -43
  184. snowflake/ml/modeling/tree/decision_tree_regressor.py +79 -43
  185. snowflake/ml/modeling/tree/extra_tree_classifier.py +79 -43
  186. snowflake/ml/modeling/tree/extra_tree_regressor.py +79 -43
  187. snowflake/ml/modeling/xgboost/xgb_classifier.py +79 -43
  188. snowflake/ml/modeling/xgboost/xgb_regressor.py +79 -43
  189. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +79 -43
  190. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +79 -43
  191. snowflake/ml/registry/model_registry.py +123 -121
  192. snowflake/ml/version.py +1 -1
  193. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +50 -8
  194. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  195. snowflake_ml_python-1.0.1.dist-info/RECORD +0 -246
  196. {snowflake_ml_python-1.0.1.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -264,7 +266,6 @@ class LinearSVC(BaseTransformer):
264
266
  sample_weight_col: Optional[str] = None,
265
267
  ) -> None:
266
268
  super().__init__()
267
- self.id = str(uuid4()).replace("-", "_").upper()
268
269
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
269
270
 
270
271
  self._deps = list(deps)
@@ -295,6 +296,15 @@ class LinearSVC(BaseTransformer):
295
296
  self.set_drop_input_cols(drop_input_cols)
296
297
  self.set_sample_weight_col(sample_weight_col)
297
298
 
299
+ def _get_rand_id(self) -> str:
300
+ """
301
+ Generate random id to be used in sproc and stage names.
302
+
303
+ Returns:
304
+ Random id string usable in sproc, table, and stage names.
305
+ """
306
+ return str(uuid4()).replace("-", "_").upper()
307
+
298
308
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
299
309
  """
300
310
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -373,7 +383,7 @@ class LinearSVC(BaseTransformer):
373
383
  cp.dump(self._sklearn_object, local_transform_file)
374
384
 
375
385
  # Create temp stage to run fit.
376
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
386
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
377
387
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
378
388
  SqlResultValidator(
379
389
  session=session,
@@ -386,11 +396,12 @@ class LinearSVC(BaseTransformer):
386
396
  expected_value=f"Stage area {transform_stage_name} successfully created."
387
397
  ).validate()
388
398
 
389
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
399
+ # Use posixpath to construct stage paths
400
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
401
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
390
402
  local_result_file_name = get_temp_file_path()
391
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
392
403
 
393
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
404
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
394
405
  statement_params = telemetry.get_function_usage_statement_params(
395
406
  project=_PROJECT,
396
407
  subproject=_SUBPROJECT,
@@ -416,6 +427,7 @@ class LinearSVC(BaseTransformer):
416
427
  replace=True,
417
428
  session=session,
418
429
  statement_params=statement_params,
430
+ anonymous=True
419
431
  )
420
432
  def fit_wrapper_sproc(
421
433
  session: Session,
@@ -424,7 +436,8 @@ class LinearSVC(BaseTransformer):
424
436
  stage_result_file_name: str,
425
437
  input_cols: List[str],
426
438
  label_cols: List[str],
427
- sample_weight_col: Optional[str]
439
+ sample_weight_col: Optional[str],
440
+ statement_params: Dict[str, str]
428
441
  ) -> str:
429
442
  import cloudpickle as cp
430
443
  import numpy as np
@@ -491,15 +504,15 @@ class LinearSVC(BaseTransformer):
491
504
  api_calls=[Session.call],
492
505
  custom_tags=dict([("autogen", True)]),
493
506
  )
494
- sproc_export_file_name = session.call(
495
- fit_sproc_name,
507
+ sproc_export_file_name = fit_wrapper_sproc(
508
+ session,
496
509
  query,
497
510
  stage_transform_file_name,
498
511
  stage_result_file_name,
499
512
  identifier.get_unescaped_names(self.input_cols),
500
513
  identifier.get_unescaped_names(self.label_cols),
501
514
  identifier.get_unescaped_names(self.sample_weight_col),
502
- statement_params=statement_params,
515
+ statement_params,
503
516
  )
504
517
 
505
518
  if "|" in sproc_export_file_name:
@@ -509,7 +522,7 @@ class LinearSVC(BaseTransformer):
509
522
  print("\n".join(fields[1:]))
510
523
 
511
524
  session.file.get(
512
- os.path.join(stage_result_file_name, sproc_export_file_name),
525
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
513
526
  local_result_file_name,
514
527
  statement_params=statement_params
515
528
  )
@@ -555,7 +568,7 @@ class LinearSVC(BaseTransformer):
555
568
 
556
569
  # Register vectorized UDF for batch inference
557
570
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
558
- safe_id=self.id, method=inference_method)
571
+ safe_id=self._get_rand_id(), method=inference_method)
559
572
 
560
573
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
561
574
  # will try to pickle all of self which fails.
@@ -647,7 +660,7 @@ class LinearSVC(BaseTransformer):
647
660
  return transformed_pandas_df.to_dict("records")
648
661
 
649
662
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
650
- safe_id=self.id
663
+ safe_id=self._get_rand_id()
651
664
  )
652
665
 
653
666
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -703,26 +716,37 @@ class LinearSVC(BaseTransformer):
703
716
  # input cols need to match unquoted / quoted
704
717
  input_cols = self.input_cols
705
718
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
719
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
706
720
 
707
721
  estimator = self._sklearn_object
708
722
 
709
- input_df = dataset[input_cols] # Select input columns with quoted column names.
710
- if hasattr(estimator, "feature_names_in_"):
711
- missing_features = []
712
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
713
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
714
- missing_features.append(f)
715
-
716
- if len(missing_features) > 0:
717
- raise ValueError(
718
- "The feature names should match with those that were passed during fit.\n"
719
- f"Features seen during fit call but not present in the input: {missing_features}\n"
720
- f"Features in the input dataframe : {input_cols}\n"
721
- )
722
- input_df.columns = getattr(estimator, "feature_names_in_")
723
- else:
724
- # Just rename the column names to unquoted identifiers.
725
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
723
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
724
+ missing_features = []
725
+ features_in_dataset = set(dataset.columns)
726
+ columns_to_select = []
727
+ for i, f in enumerate(features_required_by_estimator):
728
+ if (
729
+ i >= len(input_cols)
730
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
731
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
732
+ and quoted_input_cols[i] not in features_in_dataset)
733
+ ):
734
+ missing_features.append(f)
735
+ elif input_cols[i] in features_in_dataset:
736
+ columns_to_select.append(input_cols[i])
737
+ elif unquoted_input_cols[i] in features_in_dataset:
738
+ columns_to_select.append(unquoted_input_cols[i])
739
+ else:
740
+ columns_to_select.append(quoted_input_cols[i])
741
+
742
+ if len(missing_features) > 0:
743
+ raise ValueError(
744
+ "The feature names should match with those that were passed during fit.\n"
745
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
746
+ f"Features in the input dataframe : {input_cols}\n"
747
+ )
748
+ input_df = dataset[columns_to_select]
749
+ input_df.columns = features_required_by_estimator
726
750
 
727
751
  transformed_numpy_array = getattr(estimator, inference_method)(
728
752
  input_df
@@ -803,11 +827,18 @@ class LinearSVC(BaseTransformer):
803
827
  Transformed dataset.
804
828
  """
805
829
  if isinstance(dataset, DataFrame):
830
+ expected_type_inferred = ""
831
+ # when it is classifier, infer the datatype from label columns
832
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
833
+ expected_type_inferred = convert_sp_to_sf_type(
834
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
835
+ )
836
+
806
837
  output_df = self._batch_inference(
807
838
  dataset=dataset,
808
839
  inference_method="predict",
809
840
  expected_output_cols_list=self.output_cols,
810
- expected_output_cols_type="",
841
+ expected_output_cols_type=expected_type_inferred,
811
842
  )
812
843
  elif isinstance(dataset, pd.DataFrame):
813
844
  output_df = self._sklearn_inference(
@@ -878,10 +909,10 @@ class LinearSVC(BaseTransformer):
878
909
 
879
910
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
880
911
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
881
- Returns an empty list if current object is not a classifier or not yet fitted.
912
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
882
913
  """
883
914
  if getattr(self._sklearn_object, "classes_", None) is None:
884
- return []
915
+ return [output_cols_prefix]
885
916
 
886
917
  classes = self._sklearn_object.classes_
887
918
  if isinstance(classes, numpy.ndarray):
@@ -1108,7 +1139,7 @@ class LinearSVC(BaseTransformer):
1108
1139
  cp.dump(self._sklearn_object, local_score_file)
1109
1140
 
1110
1141
  # Create temp stage to run score.
1111
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1142
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1112
1143
  session = dataset._session
1113
1144
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1114
1145
  SqlResultValidator(
@@ -1122,8 +1153,9 @@ class LinearSVC(BaseTransformer):
1122
1153
  expected_value=f"Stage area {score_stage_name} successfully created."
1123
1154
  ).validate()
1124
1155
 
1125
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1126
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1156
+ # Use posixpath to construct stage paths
1157
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1158
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1127
1159
  statement_params = telemetry.get_function_usage_statement_params(
1128
1160
  project=_PROJECT,
1129
1161
  subproject=_SUBPROJECT,
@@ -1149,6 +1181,7 @@ class LinearSVC(BaseTransformer):
1149
1181
  replace=True,
1150
1182
  session=session,
1151
1183
  statement_params=statement_params,
1184
+ anonymous=True
1152
1185
  )
1153
1186
  def score_wrapper_sproc(
1154
1187
  session: Session,
@@ -1156,7 +1189,8 @@ class LinearSVC(BaseTransformer):
1156
1189
  stage_score_file_name: str,
1157
1190
  input_cols: List[str],
1158
1191
  label_cols: List[str],
1159
- sample_weight_col: Optional[str]
1192
+ sample_weight_col: Optional[str],
1193
+ statement_params: Dict[str, str]
1160
1194
  ) -> float:
1161
1195
  import cloudpickle as cp
1162
1196
  import numpy as np
@@ -1206,14 +1240,14 @@ class LinearSVC(BaseTransformer):
1206
1240
  api_calls=[Session.call],
1207
1241
  custom_tags=dict([("autogen", True)]),
1208
1242
  )
1209
- score = session.call(
1210
- score_sproc_name,
1243
+ score = score_wrapper_sproc(
1244
+ session,
1211
1245
  query,
1212
1246
  stage_score_file_name,
1213
1247
  identifier.get_unescaped_names(self.input_cols),
1214
1248
  identifier.get_unescaped_names(self.label_cols),
1215
1249
  identifier.get_unescaped_names(self.sample_weight_col),
1216
- statement_params=statement_params,
1250
+ statement_params,
1217
1251
  )
1218
1252
 
1219
1253
  cleanup_temp_files([local_score_file_name])
@@ -1231,18 +1265,20 @@ class LinearSVC(BaseTransformer):
1231
1265
  if self._sklearn_object._estimator_type == 'classifier':
1232
1266
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1233
1267
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1234
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1268
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1269
+ ([] if self._drop_input_cols else inputs) + outputs)
1235
1270
  # For regressor, the type of predict is float64
1236
1271
  elif self._sklearn_object._estimator_type == 'regressor':
1237
1272
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1238
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1239
-
1273
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1274
+ ([] if self._drop_input_cols else inputs) + outputs)
1240
1275
  for prob_func in PROB_FUNCTIONS:
1241
1276
  if hasattr(self, prob_func):
1242
1277
  output_cols_prefix: str = f"{prob_func}_"
1243
1278
  output_column_names = self._get_output_column_names(output_cols_prefix)
1244
1279
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1245
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1280
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1281
+ ([] if self._drop_input_cols else inputs) + outputs)
1246
1282
 
1247
1283
  @property
1248
1284
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -239,7 +241,6 @@ class LinearSVR(BaseTransformer):
239
241
  sample_weight_col: Optional[str] = None,
240
242
  ) -> None:
241
243
  super().__init__()
242
- self.id = str(uuid4()).replace("-", "_").upper()
243
244
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
244
245
 
245
246
  self._deps = list(deps)
@@ -268,6 +269,15 @@ class LinearSVR(BaseTransformer):
268
269
  self.set_drop_input_cols(drop_input_cols)
269
270
  self.set_sample_weight_col(sample_weight_col)
270
271
 
272
+ def _get_rand_id(self) -> str:
273
+ """
274
+ Generate random id to be used in sproc and stage names.
275
+
276
+ Returns:
277
+ Random id string usable in sproc, table, and stage names.
278
+ """
279
+ return str(uuid4()).replace("-", "_").upper()
280
+
271
281
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
272
282
  """
273
283
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -346,7 +356,7 @@ class LinearSVR(BaseTransformer):
346
356
  cp.dump(self._sklearn_object, local_transform_file)
347
357
 
348
358
  # Create temp stage to run fit.
349
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
359
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
350
360
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
351
361
  SqlResultValidator(
352
362
  session=session,
@@ -359,11 +369,12 @@ class LinearSVR(BaseTransformer):
359
369
  expected_value=f"Stage area {transform_stage_name} successfully created."
360
370
  ).validate()
361
371
 
362
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
372
+ # Use posixpath to construct stage paths
373
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
374
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
363
375
  local_result_file_name = get_temp_file_path()
364
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
365
376
 
366
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
377
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
367
378
  statement_params = telemetry.get_function_usage_statement_params(
368
379
  project=_PROJECT,
369
380
  subproject=_SUBPROJECT,
@@ -389,6 +400,7 @@ class LinearSVR(BaseTransformer):
389
400
  replace=True,
390
401
  session=session,
391
402
  statement_params=statement_params,
403
+ anonymous=True
392
404
  )
393
405
  def fit_wrapper_sproc(
394
406
  session: Session,
@@ -397,7 +409,8 @@ class LinearSVR(BaseTransformer):
397
409
  stage_result_file_name: str,
398
410
  input_cols: List[str],
399
411
  label_cols: List[str],
400
- sample_weight_col: Optional[str]
412
+ sample_weight_col: Optional[str],
413
+ statement_params: Dict[str, str]
401
414
  ) -> str:
402
415
  import cloudpickle as cp
403
416
  import numpy as np
@@ -464,15 +477,15 @@ class LinearSVR(BaseTransformer):
464
477
  api_calls=[Session.call],
465
478
  custom_tags=dict([("autogen", True)]),
466
479
  )
467
- sproc_export_file_name = session.call(
468
- fit_sproc_name,
480
+ sproc_export_file_name = fit_wrapper_sproc(
481
+ session,
469
482
  query,
470
483
  stage_transform_file_name,
471
484
  stage_result_file_name,
472
485
  identifier.get_unescaped_names(self.input_cols),
473
486
  identifier.get_unescaped_names(self.label_cols),
474
487
  identifier.get_unescaped_names(self.sample_weight_col),
475
- statement_params=statement_params,
488
+ statement_params,
476
489
  )
477
490
 
478
491
  if "|" in sproc_export_file_name:
@@ -482,7 +495,7 @@ class LinearSVR(BaseTransformer):
482
495
  print("\n".join(fields[1:]))
483
496
 
484
497
  session.file.get(
485
- os.path.join(stage_result_file_name, sproc_export_file_name),
498
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
486
499
  local_result_file_name,
487
500
  statement_params=statement_params
488
501
  )
@@ -528,7 +541,7 @@ class LinearSVR(BaseTransformer):
528
541
 
529
542
  # Register vectorized UDF for batch inference
530
543
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
531
- safe_id=self.id, method=inference_method)
544
+ safe_id=self._get_rand_id(), method=inference_method)
532
545
 
533
546
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
534
547
  # will try to pickle all of self which fails.
@@ -620,7 +633,7 @@ class LinearSVR(BaseTransformer):
620
633
  return transformed_pandas_df.to_dict("records")
621
634
 
622
635
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
623
- safe_id=self.id
636
+ safe_id=self._get_rand_id()
624
637
  )
625
638
 
626
639
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -676,26 +689,37 @@ class LinearSVR(BaseTransformer):
676
689
  # input cols need to match unquoted / quoted
677
690
  input_cols = self.input_cols
678
691
  unquoted_input_cols = identifier.get_unescaped_names(self.input_cols)
692
+ quoted_input_cols = identifier.get_escaped_names(unquoted_input_cols)
679
693
 
680
694
  estimator = self._sklearn_object
681
695
 
682
- input_df = dataset[input_cols] # Select input columns with quoted column names.
683
- if hasattr(estimator, "feature_names_in_"):
684
- missing_features = []
685
- for i, f in enumerate(getattr(estimator, "feature_names_in_")):
686
- if i >= len(input_cols) or (input_cols[i] != f and unquoted_input_cols[i] != f):
687
- missing_features.append(f)
688
-
689
- if len(missing_features) > 0:
690
- raise ValueError(
691
- "The feature names should match with those that were passed during fit.\n"
692
- f"Features seen during fit call but not present in the input: {missing_features}\n"
693
- f"Features in the input dataframe : {input_cols}\n"
694
- )
695
- input_df.columns = getattr(estimator, "feature_names_in_")
696
- else:
697
- # Just rename the column names to unquoted identifiers.
698
- input_df.columns = unquoted_input_cols # Replace the quoted columns identifier with unquoted column ids.
696
+ features_required_by_estimator = getattr(estimator, "feature_names_in_") if hasattr(estimator, "feature_names_in_") else unquoted_input_cols
697
+ missing_features = []
698
+ features_in_dataset = set(dataset.columns)
699
+ columns_to_select = []
700
+ for i, f in enumerate(features_required_by_estimator):
701
+ if (
702
+ i >= len(input_cols)
703
+ or (input_cols[i] != f and unquoted_input_cols[i] != f and quoted_input_cols[i] != f)
704
+ or (input_cols[i] not in features_in_dataset and unquoted_input_cols[i] not in features_in_dataset
705
+ and quoted_input_cols[i] not in features_in_dataset)
706
+ ):
707
+ missing_features.append(f)
708
+ elif input_cols[i] in features_in_dataset:
709
+ columns_to_select.append(input_cols[i])
710
+ elif unquoted_input_cols[i] in features_in_dataset:
711
+ columns_to_select.append(unquoted_input_cols[i])
712
+ else:
713
+ columns_to_select.append(quoted_input_cols[i])
714
+
715
+ if len(missing_features) > 0:
716
+ raise ValueError(
717
+ "The feature names should match with those that were passed during fit.\n"
718
+ f"Features seen during fit call but not present in the input: {missing_features}\n"
719
+ f"Features in the input dataframe : {input_cols}\n"
720
+ )
721
+ input_df = dataset[columns_to_select]
722
+ input_df.columns = features_required_by_estimator
699
723
 
700
724
  transformed_numpy_array = getattr(estimator, inference_method)(
701
725
  input_df
@@ -776,11 +800,18 @@ class LinearSVR(BaseTransformer):
776
800
  Transformed dataset.
777
801
  """
778
802
  if isinstance(dataset, DataFrame):
803
+ expected_type_inferred = "float"
804
+ # when it is classifier, infer the datatype from label columns
805
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
806
+ expected_type_inferred = convert_sp_to_sf_type(
807
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
808
+ )
809
+
779
810
  output_df = self._batch_inference(
780
811
  dataset=dataset,
781
812
  inference_method="predict",
782
813
  expected_output_cols_list=self.output_cols,
783
- expected_output_cols_type="float",
814
+ expected_output_cols_type=expected_type_inferred,
784
815
  )
785
816
  elif isinstance(dataset, pd.DataFrame):
786
817
  output_df = self._sklearn_inference(
@@ -851,10 +882,10 @@ class LinearSVR(BaseTransformer):
851
882
 
852
883
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
853
884
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
854
- Returns an empty list if current object is not a classifier or not yet fitted.
885
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
855
886
  """
856
887
  if getattr(self._sklearn_object, "classes_", None) is None:
857
- return []
888
+ return [output_cols_prefix]
858
889
 
859
890
  classes = self._sklearn_object.classes_
860
891
  if isinstance(classes, numpy.ndarray):
@@ -1079,7 +1110,7 @@ class LinearSVR(BaseTransformer):
1079
1110
  cp.dump(self._sklearn_object, local_score_file)
1080
1111
 
1081
1112
  # Create temp stage to run score.
1082
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1113
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1083
1114
  session = dataset._session
1084
1115
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1085
1116
  SqlResultValidator(
@@ -1093,8 +1124,9 @@ class LinearSVR(BaseTransformer):
1093
1124
  expected_value=f"Stage area {score_stage_name} successfully created."
1094
1125
  ).validate()
1095
1126
 
1096
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1097
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1127
+ # Use posixpath to construct stage paths
1128
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1129
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1098
1130
  statement_params = telemetry.get_function_usage_statement_params(
1099
1131
  project=_PROJECT,
1100
1132
  subproject=_SUBPROJECT,
@@ -1120,6 +1152,7 @@ class LinearSVR(BaseTransformer):
1120
1152
  replace=True,
1121
1153
  session=session,
1122
1154
  statement_params=statement_params,
1155
+ anonymous=True
1123
1156
  )
1124
1157
  def score_wrapper_sproc(
1125
1158
  session: Session,
@@ -1127,7 +1160,8 @@ class LinearSVR(BaseTransformer):
1127
1160
  stage_score_file_name: str,
1128
1161
  input_cols: List[str],
1129
1162
  label_cols: List[str],
1130
- sample_weight_col: Optional[str]
1163
+ sample_weight_col: Optional[str],
1164
+ statement_params: Dict[str, str]
1131
1165
  ) -> float:
1132
1166
  import cloudpickle as cp
1133
1167
  import numpy as np
@@ -1177,14 +1211,14 @@ class LinearSVR(BaseTransformer):
1177
1211
  api_calls=[Session.call],
1178
1212
  custom_tags=dict([("autogen", True)]),
1179
1213
  )
1180
- score = session.call(
1181
- score_sproc_name,
1214
+ score = score_wrapper_sproc(
1215
+ session,
1182
1216
  query,
1183
1217
  stage_score_file_name,
1184
1218
  identifier.get_unescaped_names(self.input_cols),
1185
1219
  identifier.get_unescaped_names(self.label_cols),
1186
1220
  identifier.get_unescaped_names(self.sample_weight_col),
1187
- statement_params=statement_params,
1221
+ statement_params,
1188
1222
  )
1189
1223
 
1190
1224
  cleanup_temp_files([local_score_file_name])
@@ -1202,18 +1236,20 @@ class LinearSVR(BaseTransformer):
1202
1236
  if self._sklearn_object._estimator_type == 'classifier':
1203
1237
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1204
1238
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1205
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1239
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1240
+ ([] if self._drop_input_cols else inputs) + outputs)
1206
1241
  # For regressor, the type of predict is float64
1207
1242
  elif self._sklearn_object._estimator_type == 'regressor':
1208
1243
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1209
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1210
-
1244
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1245
+ ([] if self._drop_input_cols else inputs) + outputs)
1211
1246
  for prob_func in PROB_FUNCTIONS:
1212
1247
  if hasattr(self, prob_func):
1213
1248
  output_cols_prefix: str = f"{prob_func}_"
1214
1249
  output_column_names = self._get_output_column_names(output_cols_prefix)
1215
1250
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1216
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1251
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1252
+ ([] if self._drop_input_cols else inputs) + outputs)
1217
1253
 
1218
1254
  @property
1219
1255
  def model_signatures(self) -> Dict[str, ModelSignature]: