snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -282,7 +284,6 @@ class DictionaryLearning(BaseTransformer):
282
284
  sample_weight_col: Optional[str] = None,
283
285
  ) -> None:
284
286
  super().__init__()
285
- self.id = str(uuid4()).replace("-", "_").upper()
286
287
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
287
288
 
288
289
  self._deps = list(deps)
@@ -318,6 +319,15 @@ class DictionaryLearning(BaseTransformer):
318
319
  self.set_drop_input_cols(drop_input_cols)
319
320
  self.set_sample_weight_col(sample_weight_col)
320
321
 
322
+ def _get_rand_id(self) -> str:
323
+ """
324
+ Generate random id to be used in sproc and stage names.
325
+
326
+ Returns:
327
+ Random id string usable in sproc, table, and stage names.
328
+ """
329
+ return str(uuid4()).replace("-", "_").upper()
330
+
321
331
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
322
332
  """
323
333
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -396,7 +406,7 @@ class DictionaryLearning(BaseTransformer):
396
406
  cp.dump(self._sklearn_object, local_transform_file)
397
407
 
398
408
  # Create temp stage to run fit.
399
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
409
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
400
410
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
401
411
  SqlResultValidator(
402
412
  session=session,
@@ -409,11 +419,12 @@ class DictionaryLearning(BaseTransformer):
409
419
  expected_value=f"Stage area {transform_stage_name} successfully created."
410
420
  ).validate()
411
421
 
412
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
422
+ # Use posixpath to construct stage paths
423
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
424
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
413
425
  local_result_file_name = get_temp_file_path()
414
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
415
426
 
416
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
427
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
417
428
  statement_params = telemetry.get_function_usage_statement_params(
418
429
  project=_PROJECT,
419
430
  subproject=_SUBPROJECT,
@@ -439,6 +450,7 @@ class DictionaryLearning(BaseTransformer):
439
450
  replace=True,
440
451
  session=session,
441
452
  statement_params=statement_params,
453
+ anonymous=True
442
454
  )
443
455
  def fit_wrapper_sproc(
444
456
  session: Session,
@@ -447,7 +459,8 @@ class DictionaryLearning(BaseTransformer):
447
459
  stage_result_file_name: str,
448
460
  input_cols: List[str],
449
461
  label_cols: List[str],
450
- sample_weight_col: Optional[str]
462
+ sample_weight_col: Optional[str],
463
+ statement_params: Dict[str, str]
451
464
  ) -> str:
452
465
  import cloudpickle as cp
453
466
  import numpy as np
@@ -514,15 +527,15 @@ class DictionaryLearning(BaseTransformer):
514
527
  api_calls=[Session.call],
515
528
  custom_tags=dict([("autogen", True)]),
516
529
  )
517
- sproc_export_file_name = session.call(
518
- fit_sproc_name,
530
+ sproc_export_file_name = fit_wrapper_sproc(
531
+ session,
519
532
  query,
520
533
  stage_transform_file_name,
521
534
  stage_result_file_name,
522
535
  identifier.get_unescaped_names(self.input_cols),
523
536
  identifier.get_unescaped_names(self.label_cols),
524
537
  identifier.get_unescaped_names(self.sample_weight_col),
525
- statement_params=statement_params,
538
+ statement_params,
526
539
  )
527
540
 
528
541
  if "|" in sproc_export_file_name:
@@ -532,7 +545,7 @@ class DictionaryLearning(BaseTransformer):
532
545
  print("\n".join(fields[1:]))
533
546
 
534
547
  session.file.get(
535
- os.path.join(stage_result_file_name, sproc_export_file_name),
548
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
536
549
  local_result_file_name,
537
550
  statement_params=statement_params
538
551
  )
@@ -578,7 +591,7 @@ class DictionaryLearning(BaseTransformer):
578
591
 
579
592
  # Register vectorized UDF for batch inference
580
593
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
581
- safe_id=self.id, method=inference_method)
594
+ safe_id=self._get_rand_id(), method=inference_method)
582
595
 
583
596
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
584
597
  # will try to pickle all of self which fails.
@@ -670,7 +683,7 @@ class DictionaryLearning(BaseTransformer):
670
683
  return transformed_pandas_df.to_dict("records")
671
684
 
672
685
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
673
- safe_id=self.id
686
+ safe_id=self._get_rand_id()
674
687
  )
675
688
 
676
689
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -835,11 +848,18 @@ class DictionaryLearning(BaseTransformer):
835
848
  Transformed dataset.
836
849
  """
837
850
  if isinstance(dataset, DataFrame):
851
+ expected_type_inferred = ""
852
+ # when it is classifier, infer the datatype from label columns
853
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
854
+ expected_type_inferred = convert_sp_to_sf_type(
855
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
856
+ )
857
+
838
858
  output_df = self._batch_inference(
839
859
  dataset=dataset,
840
860
  inference_method="predict",
841
861
  expected_output_cols_list=self.output_cols,
842
- expected_output_cols_type="",
862
+ expected_output_cols_type=expected_type_inferred,
843
863
  )
844
864
  elif isinstance(dataset, pd.DataFrame):
845
865
  output_df = self._sklearn_inference(
@@ -912,10 +932,10 @@ class DictionaryLearning(BaseTransformer):
912
932
 
913
933
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
914
934
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
915
- Returns an empty list if current object is not a classifier or not yet fitted.
935
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
916
936
  """
917
937
  if getattr(self._sklearn_object, "classes_", None) is None:
918
- return []
938
+ return [output_cols_prefix]
919
939
 
920
940
  classes = self._sklearn_object.classes_
921
941
  if isinstance(classes, numpy.ndarray):
@@ -1140,7 +1160,7 @@ class DictionaryLearning(BaseTransformer):
1140
1160
  cp.dump(self._sklearn_object, local_score_file)
1141
1161
 
1142
1162
  # Create temp stage to run score.
1143
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1163
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1144
1164
  session = dataset._session
1145
1165
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1146
1166
  SqlResultValidator(
@@ -1154,8 +1174,9 @@ class DictionaryLearning(BaseTransformer):
1154
1174
  expected_value=f"Stage area {score_stage_name} successfully created."
1155
1175
  ).validate()
1156
1176
 
1157
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1158
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1177
+ # Use posixpath to construct stage paths
1178
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1179
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1159
1180
  statement_params = telemetry.get_function_usage_statement_params(
1160
1181
  project=_PROJECT,
1161
1182
  subproject=_SUBPROJECT,
@@ -1181,6 +1202,7 @@ class DictionaryLearning(BaseTransformer):
1181
1202
  replace=True,
1182
1203
  session=session,
1183
1204
  statement_params=statement_params,
1205
+ anonymous=True
1184
1206
  )
1185
1207
  def score_wrapper_sproc(
1186
1208
  session: Session,
@@ -1188,7 +1210,8 @@ class DictionaryLearning(BaseTransformer):
1188
1210
  stage_score_file_name: str,
1189
1211
  input_cols: List[str],
1190
1212
  label_cols: List[str],
1191
- sample_weight_col: Optional[str]
1213
+ sample_weight_col: Optional[str],
1214
+ statement_params: Dict[str, str]
1192
1215
  ) -> float:
1193
1216
  import cloudpickle as cp
1194
1217
  import numpy as np
@@ -1238,14 +1261,14 @@ class DictionaryLearning(BaseTransformer):
1238
1261
  api_calls=[Session.call],
1239
1262
  custom_tags=dict([("autogen", True)]),
1240
1263
  )
1241
- score = session.call(
1242
- score_sproc_name,
1264
+ score = score_wrapper_sproc(
1265
+ session,
1243
1266
  query,
1244
1267
  stage_score_file_name,
1245
1268
  identifier.get_unescaped_names(self.input_cols),
1246
1269
  identifier.get_unescaped_names(self.label_cols),
1247
1270
  identifier.get_unescaped_names(self.sample_weight_col),
1248
- statement_params=statement_params,
1271
+ statement_params,
1249
1272
  )
1250
1273
 
1251
1274
  cleanup_temp_files([local_score_file_name])
@@ -1263,18 +1286,20 @@ class DictionaryLearning(BaseTransformer):
1263
1286
  if self._sklearn_object._estimator_type == 'classifier':
1264
1287
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1265
1288
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1266
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1289
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1290
+ ([] if self._drop_input_cols else inputs) + outputs)
1267
1291
  # For regressor, the type of predict is float64
1268
1292
  elif self._sklearn_object._estimator_type == 'regressor':
1269
1293
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1270
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1271
-
1294
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1295
+ ([] if self._drop_input_cols else inputs) + outputs)
1272
1296
  for prob_func in PROB_FUNCTIONS:
1273
1297
  if hasattr(self, prob_func):
1274
1298
  output_cols_prefix: str = f"{prob_func}_"
1275
1299
  output_column_names = self._get_output_column_names(output_cols_prefix)
1276
1300
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1277
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1301
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1302
+ ([] if self._drop_input_cols else inputs) + outputs)
1278
1303
 
1279
1304
  @property
1280
1305
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -232,7 +234,6 @@ class FactorAnalysis(BaseTransformer):
232
234
  sample_weight_col: Optional[str] = None,
233
235
  ) -> None:
234
236
  super().__init__()
235
- self.id = str(uuid4()).replace("-", "_").upper()
236
237
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
237
238
 
238
239
  self._deps = list(deps)
@@ -260,6 +261,15 @@ class FactorAnalysis(BaseTransformer):
260
261
  self.set_drop_input_cols(drop_input_cols)
261
262
  self.set_sample_weight_col(sample_weight_col)
262
263
 
264
+ def _get_rand_id(self) -> str:
265
+ """
266
+ Generate random id to be used in sproc and stage names.
267
+
268
+ Returns:
269
+ Random id string usable in sproc, table, and stage names.
270
+ """
271
+ return str(uuid4()).replace("-", "_").upper()
272
+
263
273
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
264
274
  """
265
275
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -338,7 +348,7 @@ class FactorAnalysis(BaseTransformer):
338
348
  cp.dump(self._sklearn_object, local_transform_file)
339
349
 
340
350
  # Create temp stage to run fit.
341
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
351
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
342
352
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
343
353
  SqlResultValidator(
344
354
  session=session,
@@ -351,11 +361,12 @@ class FactorAnalysis(BaseTransformer):
351
361
  expected_value=f"Stage area {transform_stage_name} successfully created."
352
362
  ).validate()
353
363
 
354
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
364
+ # Use posixpath to construct stage paths
365
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
366
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
355
367
  local_result_file_name = get_temp_file_path()
356
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
357
368
 
358
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
369
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
359
370
  statement_params = telemetry.get_function_usage_statement_params(
360
371
  project=_PROJECT,
361
372
  subproject=_SUBPROJECT,
@@ -381,6 +392,7 @@ class FactorAnalysis(BaseTransformer):
381
392
  replace=True,
382
393
  session=session,
383
394
  statement_params=statement_params,
395
+ anonymous=True
384
396
  )
385
397
  def fit_wrapper_sproc(
386
398
  session: Session,
@@ -389,7 +401,8 @@ class FactorAnalysis(BaseTransformer):
389
401
  stage_result_file_name: str,
390
402
  input_cols: List[str],
391
403
  label_cols: List[str],
392
- sample_weight_col: Optional[str]
404
+ sample_weight_col: Optional[str],
405
+ statement_params: Dict[str, str]
393
406
  ) -> str:
394
407
  import cloudpickle as cp
395
408
  import numpy as np
@@ -456,15 +469,15 @@ class FactorAnalysis(BaseTransformer):
456
469
  api_calls=[Session.call],
457
470
  custom_tags=dict([("autogen", True)]),
458
471
  )
459
- sproc_export_file_name = session.call(
460
- fit_sproc_name,
472
+ sproc_export_file_name = fit_wrapper_sproc(
473
+ session,
461
474
  query,
462
475
  stage_transform_file_name,
463
476
  stage_result_file_name,
464
477
  identifier.get_unescaped_names(self.input_cols),
465
478
  identifier.get_unescaped_names(self.label_cols),
466
479
  identifier.get_unescaped_names(self.sample_weight_col),
467
- statement_params=statement_params,
480
+ statement_params,
468
481
  )
469
482
 
470
483
  if "|" in sproc_export_file_name:
@@ -474,7 +487,7 @@ class FactorAnalysis(BaseTransformer):
474
487
  print("\n".join(fields[1:]))
475
488
 
476
489
  session.file.get(
477
- os.path.join(stage_result_file_name, sproc_export_file_name),
490
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
478
491
  local_result_file_name,
479
492
  statement_params=statement_params
480
493
  )
@@ -520,7 +533,7 @@ class FactorAnalysis(BaseTransformer):
520
533
 
521
534
  # Register vectorized UDF for batch inference
522
535
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
523
- safe_id=self.id, method=inference_method)
536
+ safe_id=self._get_rand_id(), method=inference_method)
524
537
 
525
538
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
526
539
  # will try to pickle all of self which fails.
@@ -612,7 +625,7 @@ class FactorAnalysis(BaseTransformer):
612
625
  return transformed_pandas_df.to_dict("records")
613
626
 
614
627
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
615
- safe_id=self.id
628
+ safe_id=self._get_rand_id()
616
629
  )
617
630
 
618
631
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -777,11 +790,18 @@ class FactorAnalysis(BaseTransformer):
777
790
  Transformed dataset.
778
791
  """
779
792
  if isinstance(dataset, DataFrame):
793
+ expected_type_inferred = ""
794
+ # when it is classifier, infer the datatype from label columns
795
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
796
+ expected_type_inferred = convert_sp_to_sf_type(
797
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
798
+ )
799
+
780
800
  output_df = self._batch_inference(
781
801
  dataset=dataset,
782
802
  inference_method="predict",
783
803
  expected_output_cols_list=self.output_cols,
784
- expected_output_cols_type="",
804
+ expected_output_cols_type=expected_type_inferred,
785
805
  )
786
806
  elif isinstance(dataset, pd.DataFrame):
787
807
  output_df = self._sklearn_inference(
@@ -854,10 +874,10 @@ class FactorAnalysis(BaseTransformer):
854
874
 
855
875
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
856
876
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
857
- Returns an empty list if current object is not a classifier or not yet fitted.
877
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
858
878
  """
859
879
  if getattr(self._sklearn_object, "classes_", None) is None:
860
- return []
880
+ return [output_cols_prefix]
861
881
 
862
882
  classes = self._sklearn_object.classes_
863
883
  if isinstance(classes, numpy.ndarray):
@@ -1082,7 +1102,7 @@ class FactorAnalysis(BaseTransformer):
1082
1102
  cp.dump(self._sklearn_object, local_score_file)
1083
1103
 
1084
1104
  # Create temp stage to run score.
1085
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1105
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1086
1106
  session = dataset._session
1087
1107
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1088
1108
  SqlResultValidator(
@@ -1096,8 +1116,9 @@ class FactorAnalysis(BaseTransformer):
1096
1116
  expected_value=f"Stage area {score_stage_name} successfully created."
1097
1117
  ).validate()
1098
1118
 
1099
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1100
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1119
+ # Use posixpath to construct stage paths
1120
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1121
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1101
1122
  statement_params = telemetry.get_function_usage_statement_params(
1102
1123
  project=_PROJECT,
1103
1124
  subproject=_SUBPROJECT,
@@ -1123,6 +1144,7 @@ class FactorAnalysis(BaseTransformer):
1123
1144
  replace=True,
1124
1145
  session=session,
1125
1146
  statement_params=statement_params,
1147
+ anonymous=True
1126
1148
  )
1127
1149
  def score_wrapper_sproc(
1128
1150
  session: Session,
@@ -1130,7 +1152,8 @@ class FactorAnalysis(BaseTransformer):
1130
1152
  stage_score_file_name: str,
1131
1153
  input_cols: List[str],
1132
1154
  label_cols: List[str],
1133
- sample_weight_col: Optional[str]
1155
+ sample_weight_col: Optional[str],
1156
+ statement_params: Dict[str, str]
1134
1157
  ) -> float:
1135
1158
  import cloudpickle as cp
1136
1159
  import numpy as np
@@ -1180,14 +1203,14 @@ class FactorAnalysis(BaseTransformer):
1180
1203
  api_calls=[Session.call],
1181
1204
  custom_tags=dict([("autogen", True)]),
1182
1205
  )
1183
- score = session.call(
1184
- score_sproc_name,
1206
+ score = score_wrapper_sproc(
1207
+ session,
1185
1208
  query,
1186
1209
  stage_score_file_name,
1187
1210
  identifier.get_unescaped_names(self.input_cols),
1188
1211
  identifier.get_unescaped_names(self.label_cols),
1189
1212
  identifier.get_unescaped_names(self.sample_weight_col),
1190
- statement_params=statement_params,
1213
+ statement_params,
1191
1214
  )
1192
1215
 
1193
1216
  cleanup_temp_files([local_score_file_name])
@@ -1205,18 +1228,20 @@ class FactorAnalysis(BaseTransformer):
1205
1228
  if self._sklearn_object._estimator_type == 'classifier':
1206
1229
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1207
1230
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1208
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1231
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1232
+ ([] if self._drop_input_cols else inputs) + outputs)
1209
1233
  # For regressor, the type of predict is float64
1210
1234
  elif self._sklearn_object._estimator_type == 'regressor':
1211
1235
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1212
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1213
-
1236
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1237
+ ([] if self._drop_input_cols else inputs) + outputs)
1214
1238
  for prob_func in PROB_FUNCTIONS:
1215
1239
  if hasattr(self, prob_func):
1216
1240
  output_cols_prefix: str = f"{prob_func}_"
1217
1241
  output_column_names = self._get_output_column_names(output_cols_prefix)
1218
1242
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1219
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1243
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1244
+ ([] if self._drop_input_cols else inputs) + outputs)
1220
1245
 
1221
1246
  @property
1222
1247
  def model_signatures(self) -> Dict[str, ModelSignature]: