snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -238,7 +240,6 @@ class LinearDiscriminantAnalysis(BaseTransformer):
238
240
  sample_weight_col: Optional[str] = None,
239
241
  ) -> None:
240
242
  super().__init__()
241
- self.id = str(uuid4()).replace("-", "_").upper()
242
243
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
243
244
 
244
245
  self._deps = list(deps)
@@ -264,6 +265,15 @@ class LinearDiscriminantAnalysis(BaseTransformer):
264
265
  self.set_drop_input_cols(drop_input_cols)
265
266
  self.set_sample_weight_col(sample_weight_col)
266
267
 
268
+ def _get_rand_id(self) -> str:
269
+ """
270
+ Generate random id to be used in sproc and stage names.
271
+
272
+ Returns:
273
+ Random id string usable in sproc, table, and stage names.
274
+ """
275
+ return str(uuid4()).replace("-", "_").upper()
276
+
267
277
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
268
278
  """
269
279
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -342,7 +352,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
342
352
  cp.dump(self._sklearn_object, local_transform_file)
343
353
 
344
354
  # Create temp stage to run fit.
345
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
355
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
346
356
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
347
357
  SqlResultValidator(
348
358
  session=session,
@@ -355,11 +365,12 @@ class LinearDiscriminantAnalysis(BaseTransformer):
355
365
  expected_value=f"Stage area {transform_stage_name} successfully created."
356
366
  ).validate()
357
367
 
358
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
368
+ # Use posixpath to construct stage paths
369
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
370
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
359
371
  local_result_file_name = get_temp_file_path()
360
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
361
372
 
362
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
373
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
363
374
  statement_params = telemetry.get_function_usage_statement_params(
364
375
  project=_PROJECT,
365
376
  subproject=_SUBPROJECT,
@@ -385,6 +396,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
385
396
  replace=True,
386
397
  session=session,
387
398
  statement_params=statement_params,
399
+ anonymous=True
388
400
  )
389
401
  def fit_wrapper_sproc(
390
402
  session: Session,
@@ -393,7 +405,8 @@ class LinearDiscriminantAnalysis(BaseTransformer):
393
405
  stage_result_file_name: str,
394
406
  input_cols: List[str],
395
407
  label_cols: List[str],
396
- sample_weight_col: Optional[str]
408
+ sample_weight_col: Optional[str],
409
+ statement_params: Dict[str, str]
397
410
  ) -> str:
398
411
  import cloudpickle as cp
399
412
  import numpy as np
@@ -460,15 +473,15 @@ class LinearDiscriminantAnalysis(BaseTransformer):
460
473
  api_calls=[Session.call],
461
474
  custom_tags=dict([("autogen", True)]),
462
475
  )
463
- sproc_export_file_name = session.call(
464
- fit_sproc_name,
476
+ sproc_export_file_name = fit_wrapper_sproc(
477
+ session,
465
478
  query,
466
479
  stage_transform_file_name,
467
480
  stage_result_file_name,
468
481
  identifier.get_unescaped_names(self.input_cols),
469
482
  identifier.get_unescaped_names(self.label_cols),
470
483
  identifier.get_unescaped_names(self.sample_weight_col),
471
- statement_params=statement_params,
484
+ statement_params,
472
485
  )
473
486
 
474
487
  if "|" in sproc_export_file_name:
@@ -478,7 +491,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
478
491
  print("\n".join(fields[1:]))
479
492
 
480
493
  session.file.get(
481
- os.path.join(stage_result_file_name, sproc_export_file_name),
494
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
482
495
  local_result_file_name,
483
496
  statement_params=statement_params
484
497
  )
@@ -524,7 +537,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
524
537
 
525
538
  # Register vectorized UDF for batch inference
526
539
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
527
- safe_id=self.id, method=inference_method)
540
+ safe_id=self._get_rand_id(), method=inference_method)
528
541
 
529
542
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
530
543
  # will try to pickle all of self which fails.
@@ -616,7 +629,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
616
629
  return transformed_pandas_df.to_dict("records")
617
630
 
618
631
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
619
- safe_id=self.id
632
+ safe_id=self._get_rand_id()
620
633
  )
621
634
 
622
635
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -783,11 +796,18 @@ class LinearDiscriminantAnalysis(BaseTransformer):
783
796
  Transformed dataset.
784
797
  """
785
798
  if isinstance(dataset, DataFrame):
799
+ expected_type_inferred = ""
800
+ # when it is classifier, infer the datatype from label columns
801
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
802
+ expected_type_inferred = convert_sp_to_sf_type(
803
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
804
+ )
805
+
786
806
  output_df = self._batch_inference(
787
807
  dataset=dataset,
788
808
  inference_method="predict",
789
809
  expected_output_cols_list=self.output_cols,
790
- expected_output_cols_type="",
810
+ expected_output_cols_type=expected_type_inferred,
791
811
  )
792
812
  elif isinstance(dataset, pd.DataFrame):
793
813
  output_df = self._sklearn_inference(
@@ -860,10 +880,10 @@ class LinearDiscriminantAnalysis(BaseTransformer):
860
880
 
861
881
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
862
882
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
863
- Returns an empty list if current object is not a classifier or not yet fitted.
883
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
864
884
  """
865
885
  if getattr(self._sklearn_object, "classes_", None) is None:
866
- return []
886
+ return [output_cols_prefix]
867
887
 
868
888
  classes = self._sklearn_object.classes_
869
889
  if isinstance(classes, numpy.ndarray):
@@ -1094,7 +1114,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
1094
1114
  cp.dump(self._sklearn_object, local_score_file)
1095
1115
 
1096
1116
  # Create temp stage to run score.
1097
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1117
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1098
1118
  session = dataset._session
1099
1119
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1100
1120
  SqlResultValidator(
@@ -1108,8 +1128,9 @@ class LinearDiscriminantAnalysis(BaseTransformer):
1108
1128
  expected_value=f"Stage area {score_stage_name} successfully created."
1109
1129
  ).validate()
1110
1130
 
1111
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1112
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1131
+ # Use posixpath to construct stage paths
1132
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1133
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1113
1134
  statement_params = telemetry.get_function_usage_statement_params(
1114
1135
  project=_PROJECT,
1115
1136
  subproject=_SUBPROJECT,
@@ -1135,6 +1156,7 @@ class LinearDiscriminantAnalysis(BaseTransformer):
1135
1156
  replace=True,
1136
1157
  session=session,
1137
1158
  statement_params=statement_params,
1159
+ anonymous=True
1138
1160
  )
1139
1161
  def score_wrapper_sproc(
1140
1162
  session: Session,
@@ -1142,7 +1164,8 @@ class LinearDiscriminantAnalysis(BaseTransformer):
1142
1164
  stage_score_file_name: str,
1143
1165
  input_cols: List[str],
1144
1166
  label_cols: List[str],
1145
- sample_weight_col: Optional[str]
1167
+ sample_weight_col: Optional[str],
1168
+ statement_params: Dict[str, str]
1146
1169
  ) -> float:
1147
1170
  import cloudpickle as cp
1148
1171
  import numpy as np
@@ -1192,14 +1215,14 @@ class LinearDiscriminantAnalysis(BaseTransformer):
1192
1215
  api_calls=[Session.call],
1193
1216
  custom_tags=dict([("autogen", True)]),
1194
1217
  )
1195
- score = session.call(
1196
- score_sproc_name,
1218
+ score = score_wrapper_sproc(
1219
+ session,
1197
1220
  query,
1198
1221
  stage_score_file_name,
1199
1222
  identifier.get_unescaped_names(self.input_cols),
1200
1223
  identifier.get_unescaped_names(self.label_cols),
1201
1224
  identifier.get_unescaped_names(self.sample_weight_col),
1202
- statement_params=statement_params,
1225
+ statement_params,
1203
1226
  )
1204
1227
 
1205
1228
  cleanup_temp_files([local_score_file_name])
@@ -1217,18 +1240,20 @@ class LinearDiscriminantAnalysis(BaseTransformer):
1217
1240
  if self._sklearn_object._estimator_type == 'classifier':
1218
1241
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1219
1242
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1220
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1243
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1244
+ ([] if self._drop_input_cols else inputs) + outputs)
1221
1245
  # For regressor, the type of predict is float64
1222
1246
  elif self._sklearn_object._estimator_type == 'regressor':
1223
1247
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1224
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1225
-
1248
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1249
+ ([] if self._drop_input_cols else inputs) + outputs)
1226
1250
  for prob_func in PROB_FUNCTIONS:
1227
1251
  if hasattr(self, prob_func):
1228
1252
  output_cols_prefix: str = f"{prob_func}_"
1229
1253
  output_column_names = self._get_output_column_names(output_cols_prefix)
1230
1254
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1231
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1255
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1256
+ ([] if self._drop_input_cols else inputs) + outputs)
1232
1257
 
1233
1258
  @property
1234
1259
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -203,7 +205,6 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
203
205
  sample_weight_col: Optional[str] = None,
204
206
  ) -> None:
205
207
  super().__init__()
206
- self.id = str(uuid4()).replace("-", "_").upper()
207
208
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
208
209
 
209
210
  self._deps = list(deps)
@@ -226,6 +227,15 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
226
227
  self.set_drop_input_cols(drop_input_cols)
227
228
  self.set_sample_weight_col(sample_weight_col)
228
229
 
230
+ def _get_rand_id(self) -> str:
231
+ """
232
+ Generate random id to be used in sproc and stage names.
233
+
234
+ Returns:
235
+ Random id string usable in sproc, table, and stage names.
236
+ """
237
+ return str(uuid4()).replace("-", "_").upper()
238
+
229
239
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
230
240
  """
231
241
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -304,7 +314,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
304
314
  cp.dump(self._sklearn_object, local_transform_file)
305
315
 
306
316
  # Create temp stage to run fit.
307
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
317
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
308
318
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
309
319
  SqlResultValidator(
310
320
  session=session,
@@ -317,11 +327,12 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
317
327
  expected_value=f"Stage area {transform_stage_name} successfully created."
318
328
  ).validate()
319
329
 
320
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
330
+ # Use posixpath to construct stage paths
331
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
332
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
321
333
  local_result_file_name = get_temp_file_path()
322
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
323
334
 
324
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
335
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
325
336
  statement_params = telemetry.get_function_usage_statement_params(
326
337
  project=_PROJECT,
327
338
  subproject=_SUBPROJECT,
@@ -347,6 +358,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
347
358
  replace=True,
348
359
  session=session,
349
360
  statement_params=statement_params,
361
+ anonymous=True
350
362
  )
351
363
  def fit_wrapper_sproc(
352
364
  session: Session,
@@ -355,7 +367,8 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
355
367
  stage_result_file_name: str,
356
368
  input_cols: List[str],
357
369
  label_cols: List[str],
358
- sample_weight_col: Optional[str]
370
+ sample_weight_col: Optional[str],
371
+ statement_params: Dict[str, str]
359
372
  ) -> str:
360
373
  import cloudpickle as cp
361
374
  import numpy as np
@@ -422,15 +435,15 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
422
435
  api_calls=[Session.call],
423
436
  custom_tags=dict([("autogen", True)]),
424
437
  )
425
- sproc_export_file_name = session.call(
426
- fit_sproc_name,
438
+ sproc_export_file_name = fit_wrapper_sproc(
439
+ session,
427
440
  query,
428
441
  stage_transform_file_name,
429
442
  stage_result_file_name,
430
443
  identifier.get_unescaped_names(self.input_cols),
431
444
  identifier.get_unescaped_names(self.label_cols),
432
445
  identifier.get_unescaped_names(self.sample_weight_col),
433
- statement_params=statement_params,
446
+ statement_params,
434
447
  )
435
448
 
436
449
  if "|" in sproc_export_file_name:
@@ -440,7 +453,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
440
453
  print("\n".join(fields[1:]))
441
454
 
442
455
  session.file.get(
443
- os.path.join(stage_result_file_name, sproc_export_file_name),
456
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
444
457
  local_result_file_name,
445
458
  statement_params=statement_params
446
459
  )
@@ -486,7 +499,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
486
499
 
487
500
  # Register vectorized UDF for batch inference
488
501
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
489
- safe_id=self.id, method=inference_method)
502
+ safe_id=self._get_rand_id(), method=inference_method)
490
503
 
491
504
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
492
505
  # will try to pickle all of self which fails.
@@ -578,7 +591,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
578
591
  return transformed_pandas_df.to_dict("records")
579
592
 
580
593
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
581
- safe_id=self.id
594
+ safe_id=self._get_rand_id()
582
595
  )
583
596
 
584
597
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -745,11 +758,18 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
745
758
  Transformed dataset.
746
759
  """
747
760
  if isinstance(dataset, DataFrame):
761
+ expected_type_inferred = ""
762
+ # when it is classifier, infer the datatype from label columns
763
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
764
+ expected_type_inferred = convert_sp_to_sf_type(
765
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
766
+ )
767
+
748
768
  output_df = self._batch_inference(
749
769
  dataset=dataset,
750
770
  inference_method="predict",
751
771
  expected_output_cols_list=self.output_cols,
752
- expected_output_cols_type="",
772
+ expected_output_cols_type=expected_type_inferred,
753
773
  )
754
774
  elif isinstance(dataset, pd.DataFrame):
755
775
  output_df = self._sklearn_inference(
@@ -820,10 +840,10 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
820
840
 
821
841
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
822
842
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
823
- Returns an empty list if current object is not a classifier or not yet fitted.
843
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
824
844
  """
825
845
  if getattr(self._sklearn_object, "classes_", None) is None:
826
- return []
846
+ return [output_cols_prefix]
827
847
 
828
848
  classes = self._sklearn_object.classes_
829
849
  if isinstance(classes, numpy.ndarray):
@@ -1054,7 +1074,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
1054
1074
  cp.dump(self._sklearn_object, local_score_file)
1055
1075
 
1056
1076
  # Create temp stage to run score.
1057
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1077
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1058
1078
  session = dataset._session
1059
1079
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1060
1080
  SqlResultValidator(
@@ -1068,8 +1088,9 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
1068
1088
  expected_value=f"Stage area {score_stage_name} successfully created."
1069
1089
  ).validate()
1070
1090
 
1071
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1072
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1091
+ # Use posixpath to construct stage paths
1092
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1093
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1073
1094
  statement_params = telemetry.get_function_usage_statement_params(
1074
1095
  project=_PROJECT,
1075
1096
  subproject=_SUBPROJECT,
@@ -1095,6 +1116,7 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
1095
1116
  replace=True,
1096
1117
  session=session,
1097
1118
  statement_params=statement_params,
1119
+ anonymous=True
1098
1120
  )
1099
1121
  def score_wrapper_sproc(
1100
1122
  session: Session,
@@ -1102,7 +1124,8 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
1102
1124
  stage_score_file_name: str,
1103
1125
  input_cols: List[str],
1104
1126
  label_cols: List[str],
1105
- sample_weight_col: Optional[str]
1127
+ sample_weight_col: Optional[str],
1128
+ statement_params: Dict[str, str]
1106
1129
  ) -> float:
1107
1130
  import cloudpickle as cp
1108
1131
  import numpy as np
@@ -1152,14 +1175,14 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
1152
1175
  api_calls=[Session.call],
1153
1176
  custom_tags=dict([("autogen", True)]),
1154
1177
  )
1155
- score = session.call(
1156
- score_sproc_name,
1178
+ score = score_wrapper_sproc(
1179
+ session,
1157
1180
  query,
1158
1181
  stage_score_file_name,
1159
1182
  identifier.get_unescaped_names(self.input_cols),
1160
1183
  identifier.get_unescaped_names(self.label_cols),
1161
1184
  identifier.get_unescaped_names(self.sample_weight_col),
1162
- statement_params=statement_params,
1185
+ statement_params,
1163
1186
  )
1164
1187
 
1165
1188
  cleanup_temp_files([local_score_file_name])
@@ -1177,18 +1200,20 @@ class QuadraticDiscriminantAnalysis(BaseTransformer):
1177
1200
  if self._sklearn_object._estimator_type == 'classifier':
1178
1201
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1179
1202
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1180
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1203
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1204
+ ([] if self._drop_input_cols else inputs) + outputs)
1181
1205
  # For regressor, the type of predict is float64
1182
1206
  elif self._sklearn_object._estimator_type == 'regressor':
1183
1207
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1184
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1185
-
1208
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1209
+ ([] if self._drop_input_cols else inputs) + outputs)
1186
1210
  for prob_func in PROB_FUNCTIONS:
1187
1211
  if hasattr(self, prob_func):
1188
1212
  output_cols_prefix: str = f"{prob_func}_"
1189
1213
  output_column_names = self._get_output_column_names(output_cols_prefix)
1190
1214
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1191
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1215
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1216
+ ([] if self._drop_input_cols else inputs) + outputs)
1192
1217
 
1193
1218
  @property
1194
1219
  def model_signatures(self) -> Dict[str, ModelSignature]: