snowflake-ml-python 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. snowflake/ml/_internal/env_utils.py +2 -1
  2. snowflake/ml/_internal/file_utils.py +29 -7
  3. snowflake/ml/_internal/telemetry.py +5 -8
  4. snowflake/ml/_internal/utils/uri.py +7 -2
  5. snowflake/ml/model/_deploy_client/image_builds/base_image_builder.py +15 -0
  6. snowflake/ml/model/_deploy_client/image_builds/client_image_builder.py +259 -0
  7. snowflake/ml/model/_deploy_client/image_builds/docker_context.py +89 -0
  8. snowflake/ml/model/_deploy_client/image_builds/gunicorn_run.sh +24 -0
  9. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +118 -0
  10. snowflake/ml/model/_deploy_client/image_builds/templates/dockerfile_template +40 -0
  11. snowflake/ml/model/_deploy_client/snowservice/deploy.py +199 -0
  12. snowflake/ml/model/_deploy_client/snowservice/deploy_options.py +88 -0
  13. snowflake/ml/model/_deploy_client/snowservice/templates/service_spec_template +24 -0
  14. snowflake/ml/model/_deploy_client/utils/constants.py +47 -0
  15. snowflake/ml/model/_deploy_client/utils/snowservice_client.py +178 -0
  16. snowflake/ml/model/_deploy_client/warehouse/deploy.py +24 -6
  17. snowflake/ml/model/_deploy_client/warehouse/infer_template.py +5 -2
  18. snowflake/ml/model/_deployer.py +14 -27
  19. snowflake/ml/model/_env.py +4 -4
  20. snowflake/ml/model/_handlers/custom.py +14 -2
  21. snowflake/ml/model/_handlers/pytorch.py +186 -0
  22. snowflake/ml/model/_handlers/sklearn.py +14 -9
  23. snowflake/ml/model/_handlers/snowmlmodel.py +14 -9
  24. snowflake/ml/model/_handlers/torchscript.py +180 -0
  25. snowflake/ml/model/_handlers/xgboost.py +19 -9
  26. snowflake/ml/model/_model.py +3 -2
  27. snowflake/ml/model/_model_meta.py +12 -7
  28. snowflake/ml/model/model_signature.py +446 -66
  29. snowflake/ml/model/type_hints.py +23 -4
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +51 -26
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +51 -26
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +51 -26
  33. snowflake/ml/modeling/cluster/birch.py +51 -26
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +51 -26
  35. snowflake/ml/modeling/cluster/dbscan.py +51 -26
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +51 -26
  37. snowflake/ml/modeling/cluster/k_means.py +51 -26
  38. snowflake/ml/modeling/cluster/mean_shift.py +51 -26
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +51 -26
  40. snowflake/ml/modeling/cluster/optics.py +51 -26
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +51 -26
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +51 -26
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +51 -26
  44. snowflake/ml/modeling/compose/column_transformer.py +51 -26
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +51 -26
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +51 -26
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +51 -26
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +51 -26
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +51 -26
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +51 -26
  51. snowflake/ml/modeling/covariance/min_cov_det.py +51 -26
  52. snowflake/ml/modeling/covariance/oas.py +51 -26
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +51 -26
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +51 -26
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +51 -26
  56. snowflake/ml/modeling/decomposition/fast_ica.py +51 -26
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +51 -26
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +51 -26
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +51 -26
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +51 -26
  61. snowflake/ml/modeling/decomposition/pca.py +51 -26
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +51 -26
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +51 -26
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +51 -26
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +51 -26
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +51 -26
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +51 -26
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +51 -26
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +51 -26
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +51 -26
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +51 -26
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +51 -26
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +51 -26
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +51 -26
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +51 -26
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +51 -26
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +51 -26
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +51 -26
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +51 -26
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +51 -26
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +51 -26
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +51 -26
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +51 -26
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +51 -26
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +51 -26
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +51 -26
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +51 -26
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +51 -26
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +51 -26
  90. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +51 -26
  91. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +51 -26
  92. snowflake/ml/modeling/impute/iterative_imputer.py +51 -26
  93. snowflake/ml/modeling/impute/knn_imputer.py +51 -26
  94. snowflake/ml/modeling/impute/missing_indicator.py +51 -26
  95. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +51 -26
  96. snowflake/ml/modeling/kernel_approximation/nystroem.py +51 -26
  97. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +51 -26
  98. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +51 -26
  99. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +51 -26
  100. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +51 -26
  101. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +51 -26
  102. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +51 -26
  103. snowflake/ml/modeling/linear_model/ard_regression.py +51 -26
  104. snowflake/ml/modeling/linear_model/bayesian_ridge.py +51 -26
  105. snowflake/ml/modeling/linear_model/elastic_net.py +51 -26
  106. snowflake/ml/modeling/linear_model/elastic_net_cv.py +51 -26
  107. snowflake/ml/modeling/linear_model/gamma_regressor.py +51 -26
  108. snowflake/ml/modeling/linear_model/huber_regressor.py +51 -26
  109. snowflake/ml/modeling/linear_model/lars.py +51 -26
  110. snowflake/ml/modeling/linear_model/lars_cv.py +51 -26
  111. snowflake/ml/modeling/linear_model/lasso.py +51 -26
  112. snowflake/ml/modeling/linear_model/lasso_cv.py +51 -26
  113. snowflake/ml/modeling/linear_model/lasso_lars.py +51 -26
  114. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +51 -26
  115. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +51 -26
  116. snowflake/ml/modeling/linear_model/linear_regression.py +51 -26
  117. snowflake/ml/modeling/linear_model/logistic_regression.py +51 -26
  118. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +51 -26
  119. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +51 -26
  120. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +51 -26
  121. snowflake/ml/modeling/linear_model/multi_task_lasso.py +51 -26
  122. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +51 -26
  123. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +51 -26
  124. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +51 -26
  125. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +51 -26
  126. snowflake/ml/modeling/linear_model/perceptron.py +51 -26
  127. snowflake/ml/modeling/linear_model/poisson_regressor.py +51 -26
  128. snowflake/ml/modeling/linear_model/ransac_regressor.py +51 -26
  129. snowflake/ml/modeling/linear_model/ridge.py +51 -26
  130. snowflake/ml/modeling/linear_model/ridge_classifier.py +51 -26
  131. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +51 -26
  132. snowflake/ml/modeling/linear_model/ridge_cv.py +51 -26
  133. snowflake/ml/modeling/linear_model/sgd_classifier.py +51 -26
  134. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +51 -26
  135. snowflake/ml/modeling/linear_model/sgd_regressor.py +51 -26
  136. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +51 -26
  137. snowflake/ml/modeling/linear_model/tweedie_regressor.py +51 -26
  138. snowflake/ml/modeling/manifold/isomap.py +51 -26
  139. snowflake/ml/modeling/manifold/mds.py +51 -26
  140. snowflake/ml/modeling/manifold/spectral_embedding.py +51 -26
  141. snowflake/ml/modeling/manifold/tsne.py +51 -26
  142. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +51 -26
  143. snowflake/ml/modeling/mixture/gaussian_mixture.py +51 -26
  144. snowflake/ml/modeling/model_selection/grid_search_cv.py +51 -26
  145. snowflake/ml/modeling/model_selection/randomized_search_cv.py +51 -26
  146. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +51 -26
  147. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +51 -26
  148. snowflake/ml/modeling/multiclass/output_code_classifier.py +51 -26
  149. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +51 -26
  150. snowflake/ml/modeling/naive_bayes/categorical_nb.py +51 -26
  151. snowflake/ml/modeling/naive_bayes/complement_nb.py +51 -26
  152. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +51 -26
  153. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +51 -26
  154. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +51 -26
  155. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +51 -26
  156. snowflake/ml/modeling/neighbors/kernel_density.py +51 -26
  157. snowflake/ml/modeling/neighbors/local_outlier_factor.py +51 -26
  158. snowflake/ml/modeling/neighbors/nearest_centroid.py +51 -26
  159. snowflake/ml/modeling/neighbors/nearest_neighbors.py +51 -26
  160. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +51 -26
  161. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +51 -26
  162. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +51 -26
  163. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +51 -26
  164. snowflake/ml/modeling/neural_network/mlp_classifier.py +51 -26
  165. snowflake/ml/modeling/neural_network/mlp_regressor.py +51 -26
  166. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +2 -0
  167. snowflake/ml/modeling/preprocessing/polynomial_features.py +51 -26
  168. snowflake/ml/modeling/semi_supervised/label_propagation.py +51 -26
  169. snowflake/ml/modeling/semi_supervised/label_spreading.py +51 -26
  170. snowflake/ml/modeling/svm/linear_svc.py +51 -26
  171. snowflake/ml/modeling/svm/linear_svr.py +51 -26
  172. snowflake/ml/modeling/svm/nu_svc.py +51 -26
  173. snowflake/ml/modeling/svm/nu_svr.py +51 -26
  174. snowflake/ml/modeling/svm/svc.py +51 -26
  175. snowflake/ml/modeling/svm/svr.py +51 -26
  176. snowflake/ml/modeling/tree/decision_tree_classifier.py +51 -26
  177. snowflake/ml/modeling/tree/decision_tree_regressor.py +51 -26
  178. snowflake/ml/modeling/tree/extra_tree_classifier.py +51 -26
  179. snowflake/ml/modeling/tree/extra_tree_regressor.py +51 -26
  180. snowflake/ml/modeling/xgboost/xgb_classifier.py +51 -26
  181. snowflake/ml/modeling/xgboost/xgb_regressor.py +51 -26
  182. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +51 -26
  183. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +51 -26
  184. snowflake/ml/registry/model_registry.py +74 -56
  185. snowflake/ml/version.py +1 -1
  186. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/METADATA +27 -8
  187. snowflake_ml_python-1.0.3.dist-info/RECORD +259 -0
  188. snowflake_ml_python-1.0.2.dist-info/RECORD +0 -246
  189. {snowflake_ml_python-1.0.2.dist-info → snowflake_ml_python-1.0.3.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -219,7 +221,6 @@ class GraphicalLasso(BaseTransformer):
219
221
  sample_weight_col: Optional[str] = None,
220
222
  ) -> None:
221
223
  super().__init__()
222
- self.id = str(uuid4()).replace("-", "_").upper()
223
224
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
224
225
 
225
226
  self._deps = list(deps)
@@ -245,6 +246,15 @@ class GraphicalLasso(BaseTransformer):
245
246
  self.set_drop_input_cols(drop_input_cols)
246
247
  self.set_sample_weight_col(sample_weight_col)
247
248
 
249
+ def _get_rand_id(self) -> str:
250
+ """
251
+ Generate random id to be used in sproc and stage names.
252
+
253
+ Returns:
254
+ Random id string usable in sproc, table, and stage names.
255
+ """
256
+ return str(uuid4()).replace("-", "_").upper()
257
+
248
258
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
249
259
  """
250
260
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -323,7 +333,7 @@ class GraphicalLasso(BaseTransformer):
323
333
  cp.dump(self._sklearn_object, local_transform_file)
324
334
 
325
335
  # Create temp stage to run fit.
326
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
336
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
327
337
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
328
338
  SqlResultValidator(
329
339
  session=session,
@@ -336,11 +346,12 @@ class GraphicalLasso(BaseTransformer):
336
346
  expected_value=f"Stage area {transform_stage_name} successfully created."
337
347
  ).validate()
338
348
 
339
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
349
+ # Use posixpath to construct stage paths
350
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
351
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
340
352
  local_result_file_name = get_temp_file_path()
341
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
342
353
 
343
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
354
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
344
355
  statement_params = telemetry.get_function_usage_statement_params(
345
356
  project=_PROJECT,
346
357
  subproject=_SUBPROJECT,
@@ -366,6 +377,7 @@ class GraphicalLasso(BaseTransformer):
366
377
  replace=True,
367
378
  session=session,
368
379
  statement_params=statement_params,
380
+ anonymous=True
369
381
  )
370
382
  def fit_wrapper_sproc(
371
383
  session: Session,
@@ -374,7 +386,8 @@ class GraphicalLasso(BaseTransformer):
374
386
  stage_result_file_name: str,
375
387
  input_cols: List[str],
376
388
  label_cols: List[str],
377
- sample_weight_col: Optional[str]
389
+ sample_weight_col: Optional[str],
390
+ statement_params: Dict[str, str]
378
391
  ) -> str:
379
392
  import cloudpickle as cp
380
393
  import numpy as np
@@ -441,15 +454,15 @@ class GraphicalLasso(BaseTransformer):
441
454
  api_calls=[Session.call],
442
455
  custom_tags=dict([("autogen", True)]),
443
456
  )
444
- sproc_export_file_name = session.call(
445
- fit_sproc_name,
457
+ sproc_export_file_name = fit_wrapper_sproc(
458
+ session,
446
459
  query,
447
460
  stage_transform_file_name,
448
461
  stage_result_file_name,
449
462
  identifier.get_unescaped_names(self.input_cols),
450
463
  identifier.get_unescaped_names(self.label_cols),
451
464
  identifier.get_unescaped_names(self.sample_weight_col),
452
- statement_params=statement_params,
465
+ statement_params,
453
466
  )
454
467
 
455
468
  if "|" in sproc_export_file_name:
@@ -459,7 +472,7 @@ class GraphicalLasso(BaseTransformer):
459
472
  print("\n".join(fields[1:]))
460
473
 
461
474
  session.file.get(
462
- os.path.join(stage_result_file_name, sproc_export_file_name),
475
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
463
476
  local_result_file_name,
464
477
  statement_params=statement_params
465
478
  )
@@ -505,7 +518,7 @@ class GraphicalLasso(BaseTransformer):
505
518
 
506
519
  # Register vectorized UDF for batch inference
507
520
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
508
- safe_id=self.id, method=inference_method)
521
+ safe_id=self._get_rand_id(), method=inference_method)
509
522
 
510
523
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
511
524
  # will try to pickle all of self which fails.
@@ -597,7 +610,7 @@ class GraphicalLasso(BaseTransformer):
597
610
  return transformed_pandas_df.to_dict("records")
598
611
 
599
612
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
600
- safe_id=self.id
613
+ safe_id=self._get_rand_id()
601
614
  )
602
615
 
603
616
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -762,11 +775,18 @@ class GraphicalLasso(BaseTransformer):
762
775
  Transformed dataset.
763
776
  """
764
777
  if isinstance(dataset, DataFrame):
778
+ expected_type_inferred = ""
779
+ # when it is classifier, infer the datatype from label columns
780
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
781
+ expected_type_inferred = convert_sp_to_sf_type(
782
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
783
+ )
784
+
765
785
  output_df = self._batch_inference(
766
786
  dataset=dataset,
767
787
  inference_method="predict",
768
788
  expected_output_cols_list=self.output_cols,
769
- expected_output_cols_type="",
789
+ expected_output_cols_type=expected_type_inferred,
770
790
  )
771
791
  elif isinstance(dataset, pd.DataFrame):
772
792
  output_df = self._sklearn_inference(
@@ -837,10 +857,10 @@ class GraphicalLasso(BaseTransformer):
837
857
 
838
858
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
839
859
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
840
- Returns an empty list if current object is not a classifier or not yet fitted.
860
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
841
861
  """
842
862
  if getattr(self._sklearn_object, "classes_", None) is None:
843
- return []
863
+ return [output_cols_prefix]
844
864
 
845
865
  classes = self._sklearn_object.classes_
846
866
  if isinstance(classes, numpy.ndarray):
@@ -1065,7 +1085,7 @@ class GraphicalLasso(BaseTransformer):
1065
1085
  cp.dump(self._sklearn_object, local_score_file)
1066
1086
 
1067
1087
  # Create temp stage to run score.
1068
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1088
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1069
1089
  session = dataset._session
1070
1090
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1071
1091
  SqlResultValidator(
@@ -1079,8 +1099,9 @@ class GraphicalLasso(BaseTransformer):
1079
1099
  expected_value=f"Stage area {score_stage_name} successfully created."
1080
1100
  ).validate()
1081
1101
 
1082
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1083
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1102
+ # Use posixpath to construct stage paths
1103
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1104
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1084
1105
  statement_params = telemetry.get_function_usage_statement_params(
1085
1106
  project=_PROJECT,
1086
1107
  subproject=_SUBPROJECT,
@@ -1106,6 +1127,7 @@ class GraphicalLasso(BaseTransformer):
1106
1127
  replace=True,
1107
1128
  session=session,
1108
1129
  statement_params=statement_params,
1130
+ anonymous=True
1109
1131
  )
1110
1132
  def score_wrapper_sproc(
1111
1133
  session: Session,
@@ -1113,7 +1135,8 @@ class GraphicalLasso(BaseTransformer):
1113
1135
  stage_score_file_name: str,
1114
1136
  input_cols: List[str],
1115
1137
  label_cols: List[str],
1116
- sample_weight_col: Optional[str]
1138
+ sample_weight_col: Optional[str],
1139
+ statement_params: Dict[str, str]
1117
1140
  ) -> float:
1118
1141
  import cloudpickle as cp
1119
1142
  import numpy as np
@@ -1163,14 +1186,14 @@ class GraphicalLasso(BaseTransformer):
1163
1186
  api_calls=[Session.call],
1164
1187
  custom_tags=dict([("autogen", True)]),
1165
1188
  )
1166
- score = session.call(
1167
- score_sproc_name,
1189
+ score = score_wrapper_sproc(
1190
+ session,
1168
1191
  query,
1169
1192
  stage_score_file_name,
1170
1193
  identifier.get_unescaped_names(self.input_cols),
1171
1194
  identifier.get_unescaped_names(self.label_cols),
1172
1195
  identifier.get_unescaped_names(self.sample_weight_col),
1173
- statement_params=statement_params,
1196
+ statement_params,
1174
1197
  )
1175
1198
 
1176
1199
  cleanup_temp_files([local_score_file_name])
@@ -1188,18 +1211,20 @@ class GraphicalLasso(BaseTransformer):
1188
1211
  if self._sklearn_object._estimator_type == 'classifier':
1189
1212
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1190
1213
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1191
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1214
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1215
+ ([] if self._drop_input_cols else inputs) + outputs)
1192
1216
  # For regressor, the type of predict is float64
1193
1217
  elif self._sklearn_object._estimator_type == 'regressor':
1194
1218
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1195
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1196
-
1219
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1220
+ ([] if self._drop_input_cols else inputs) + outputs)
1197
1221
  for prob_func in PROB_FUNCTIONS:
1198
1222
  if hasattr(self, prob_func):
1199
1223
  output_cols_prefix: str = f"{prob_func}_"
1200
1224
  output_column_names = self._get_output_column_names(output_cols_prefix)
1201
1225
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1202
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1226
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1227
+ ([] if self._drop_input_cols else inputs) + outputs)
1203
1228
 
1204
1229
  @property
1205
1230
  def model_signatures(self) -> Dict[str, ModelSignature]:
@@ -7,6 +7,7 @@
7
7
  #
8
8
  import inspect
9
9
  import os
10
+ import posixpath
10
11
  from typing import Iterable, Optional, Union, List, Any, Dict, Callable, Set
11
12
  from uuid import uuid4
12
13
 
@@ -27,6 +28,7 @@ from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get
27
28
  from snowflake.snowpark import DataFrame, Session
28
29
  from snowflake.snowpark.functions import pandas_udf, sproc
29
30
  from snowflake.snowpark.types import PandasSeries
31
+ from snowflake.snowpark._internal.type_utils import convert_sp_to_sf_type
30
32
 
31
33
  from snowflake.ml.model.model_signature import (
32
34
  DataType,
@@ -249,7 +251,6 @@ class GraphicalLassoCV(BaseTransformer):
249
251
  sample_weight_col: Optional[str] = None,
250
252
  ) -> None:
251
253
  super().__init__()
252
- self.id = str(uuid4()).replace("-", "_").upper()
253
254
  deps: Set[str] = set([f'numpy=={np.__version__}', f'scikit-learn=={sklearn.__version__}', f'cloudpickle=={cp.__version__}'])
254
255
 
255
256
  self._deps = list(deps)
@@ -278,6 +279,15 @@ class GraphicalLassoCV(BaseTransformer):
278
279
  self.set_drop_input_cols(drop_input_cols)
279
280
  self.set_sample_weight_col(sample_weight_col)
280
281
 
282
+ def _get_rand_id(self) -> str:
283
+ """
284
+ Generate random id to be used in sproc and stage names.
285
+
286
+ Returns:
287
+ Random id string usable in sproc, table, and stage names.
288
+ """
289
+ return str(uuid4()).replace("-", "_").upper()
290
+
281
291
  def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
282
292
  """
283
293
  Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.
@@ -356,7 +366,7 @@ class GraphicalLassoCV(BaseTransformer):
356
366
  cp.dump(self._sklearn_object, local_transform_file)
357
367
 
358
368
  # Create temp stage to run fit.
359
- transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self.id)
369
+ transform_stage_name = "SNOWML_TRANSFORM_{safe_id}".format(safe_id=self._get_rand_id())
360
370
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {transform_stage_name};"
361
371
  SqlResultValidator(
362
372
  session=session,
@@ -369,11 +379,12 @@ class GraphicalLassoCV(BaseTransformer):
369
379
  expected_value=f"Stage area {transform_stage_name} successfully created."
370
380
  ).validate()
371
381
 
372
- stage_transform_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
382
+ # Use posixpath to construct stage paths
383
+ stage_transform_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
384
+ stage_result_file_name = posixpath.join(transform_stage_name, os.path.basename(local_transform_file_name))
373
385
  local_result_file_name = get_temp_file_path()
374
- stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
375
386
 
376
- fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self.id)
387
+ fit_sproc_name = "SNOWML_FIT_{safe_id}".format(safe_id=self._get_rand_id())
377
388
  statement_params = telemetry.get_function_usage_statement_params(
378
389
  project=_PROJECT,
379
390
  subproject=_SUBPROJECT,
@@ -399,6 +410,7 @@ class GraphicalLassoCV(BaseTransformer):
399
410
  replace=True,
400
411
  session=session,
401
412
  statement_params=statement_params,
413
+ anonymous=True
402
414
  )
403
415
  def fit_wrapper_sproc(
404
416
  session: Session,
@@ -407,7 +419,8 @@ class GraphicalLassoCV(BaseTransformer):
407
419
  stage_result_file_name: str,
408
420
  input_cols: List[str],
409
421
  label_cols: List[str],
410
- sample_weight_col: Optional[str]
422
+ sample_weight_col: Optional[str],
423
+ statement_params: Dict[str, str]
411
424
  ) -> str:
412
425
  import cloudpickle as cp
413
426
  import numpy as np
@@ -474,15 +487,15 @@ class GraphicalLassoCV(BaseTransformer):
474
487
  api_calls=[Session.call],
475
488
  custom_tags=dict([("autogen", True)]),
476
489
  )
477
- sproc_export_file_name = session.call(
478
- fit_sproc_name,
490
+ sproc_export_file_name = fit_wrapper_sproc(
491
+ session,
479
492
  query,
480
493
  stage_transform_file_name,
481
494
  stage_result_file_name,
482
495
  identifier.get_unescaped_names(self.input_cols),
483
496
  identifier.get_unescaped_names(self.label_cols),
484
497
  identifier.get_unescaped_names(self.sample_weight_col),
485
- statement_params=statement_params,
498
+ statement_params,
486
499
  )
487
500
 
488
501
  if "|" in sproc_export_file_name:
@@ -492,7 +505,7 @@ class GraphicalLassoCV(BaseTransformer):
492
505
  print("\n".join(fields[1:]))
493
506
 
494
507
  session.file.get(
495
- os.path.join(stage_result_file_name, sproc_export_file_name),
508
+ posixpath.join(stage_result_file_name, sproc_export_file_name),
496
509
  local_result_file_name,
497
510
  statement_params=statement_params
498
511
  )
@@ -538,7 +551,7 @@ class GraphicalLassoCV(BaseTransformer):
538
551
 
539
552
  # Register vectorized UDF for batch inference
540
553
  batch_inference_udf_name = "SNOWML_BATCH_INFERENCE_{safe_id}_{method}".format(
541
- safe_id=self.id, method=inference_method)
554
+ safe_id=self._get_rand_id(), method=inference_method)
542
555
 
543
556
  # Need to do this since if we use self._sklearn_object directly in the UDF, Snowpark
544
557
  # will try to pickle all of self which fails.
@@ -630,7 +643,7 @@ class GraphicalLassoCV(BaseTransformer):
630
643
  return transformed_pandas_df.to_dict("records")
631
644
 
632
645
  batch_inference_table_name = "SNOWML_BATCH_INFERENCE_INPUT_TABLE_{safe_id}".format(
633
- safe_id=self.id
646
+ safe_id=self._get_rand_id()
634
647
  )
635
648
 
636
649
  pass_through_columns = self._get_pass_through_columns(dataset)
@@ -795,11 +808,18 @@ class GraphicalLassoCV(BaseTransformer):
795
808
  Transformed dataset.
796
809
  """
797
810
  if isinstance(dataset, DataFrame):
811
+ expected_type_inferred = ""
812
+ # when it is classifier, infer the datatype from label columns
813
+ if expected_type_inferred == "" and 'predict' in self.model_signatures:
814
+ expected_type_inferred = convert_sp_to_sf_type(
815
+ self.model_signatures['predict'].outputs[0].as_snowpark_type()
816
+ )
817
+
798
818
  output_df = self._batch_inference(
799
819
  dataset=dataset,
800
820
  inference_method="predict",
801
821
  expected_output_cols_list=self.output_cols,
802
- expected_output_cols_type="",
822
+ expected_output_cols_type=expected_type_inferred,
803
823
  )
804
824
  elif isinstance(dataset, pd.DataFrame):
805
825
  output_df = self._sklearn_inference(
@@ -870,10 +890,10 @@ class GraphicalLassoCV(BaseTransformer):
870
890
 
871
891
  def _get_output_column_names(self, output_cols_prefix: str) -> List[str]:
872
892
  """ Returns the list of output columns for predict_proba(), decision_function(), etc.. functions.
873
- Returns an empty list if current object is not a classifier or not yet fitted.
893
+ Returns a list with output_cols_prefix as the only element if the estimator is not a classifier.
874
894
  """
875
895
  if getattr(self._sklearn_object, "classes_", None) is None:
876
- return []
896
+ return [output_cols_prefix]
877
897
 
878
898
  classes = self._sklearn_object.classes_
879
899
  if isinstance(classes, numpy.ndarray):
@@ -1098,7 +1118,7 @@ class GraphicalLassoCV(BaseTransformer):
1098
1118
  cp.dump(self._sklearn_object, local_score_file)
1099
1119
 
1100
1120
  # Create temp stage to run score.
1101
- score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1121
+ score_stage_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1102
1122
  session = dataset._session
1103
1123
  stage_creation_query = f"CREATE OR REPLACE TEMPORARY STAGE {score_stage_name};"
1104
1124
  SqlResultValidator(
@@ -1112,8 +1132,9 @@ class GraphicalLassoCV(BaseTransformer):
1112
1132
  expected_value=f"Stage area {score_stage_name} successfully created."
1113
1133
  ).validate()
1114
1134
 
1115
- stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
1116
- score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self.id)
1135
+ # Use posixpath to construct stage paths
1136
+ stage_score_file_name = posixpath.join(score_stage_name, os.path.basename(local_score_file_name))
1137
+ score_sproc_name = "SNOWML_SCORE_{safe_id}".format(safe_id=self._get_rand_id())
1117
1138
  statement_params = telemetry.get_function_usage_statement_params(
1118
1139
  project=_PROJECT,
1119
1140
  subproject=_SUBPROJECT,
@@ -1139,6 +1160,7 @@ class GraphicalLassoCV(BaseTransformer):
1139
1160
  replace=True,
1140
1161
  session=session,
1141
1162
  statement_params=statement_params,
1163
+ anonymous=True
1142
1164
  )
1143
1165
  def score_wrapper_sproc(
1144
1166
  session: Session,
@@ -1146,7 +1168,8 @@ class GraphicalLassoCV(BaseTransformer):
1146
1168
  stage_score_file_name: str,
1147
1169
  input_cols: List[str],
1148
1170
  label_cols: List[str],
1149
- sample_weight_col: Optional[str]
1171
+ sample_weight_col: Optional[str],
1172
+ statement_params: Dict[str, str]
1150
1173
  ) -> float:
1151
1174
  import cloudpickle as cp
1152
1175
  import numpy as np
@@ -1196,14 +1219,14 @@ class GraphicalLassoCV(BaseTransformer):
1196
1219
  api_calls=[Session.call],
1197
1220
  custom_tags=dict([("autogen", True)]),
1198
1221
  )
1199
- score = session.call(
1200
- score_sproc_name,
1222
+ score = score_wrapper_sproc(
1223
+ session,
1201
1224
  query,
1202
1225
  stage_score_file_name,
1203
1226
  identifier.get_unescaped_names(self.input_cols),
1204
1227
  identifier.get_unescaped_names(self.label_cols),
1205
1228
  identifier.get_unescaped_names(self.sample_weight_col),
1206
- statement_params=statement_params,
1229
+ statement_params,
1207
1230
  )
1208
1231
 
1209
1232
  cleanup_temp_files([local_score_file_name])
@@ -1221,18 +1244,20 @@ class GraphicalLassoCV(BaseTransformer):
1221
1244
  if self._sklearn_object._estimator_type == 'classifier':
1222
1245
  outputs = _infer_signature(dataset[self.label_cols], "output") # label columns is the desired type for output
1223
1246
  outputs = _rename_features(outputs, self.output_cols) # rename the output columns
1224
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1247
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1248
+ ([] if self._drop_input_cols else inputs) + outputs)
1225
1249
  # For regressor, the type of predict is float64
1226
1250
  elif self._sklearn_object._estimator_type == 'regressor':
1227
1251
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
1228
- self._model_signature_dict["predict"] = ModelSignature(inputs, outputs)
1229
-
1252
+ self._model_signature_dict["predict"] = ModelSignature(inputs,
1253
+ ([] if self._drop_input_cols else inputs) + outputs)
1230
1254
  for prob_func in PROB_FUNCTIONS:
1231
1255
  if hasattr(self, prob_func):
1232
1256
  output_cols_prefix: str = f"{prob_func}_"
1233
1257
  output_column_names = self._get_output_column_names(output_cols_prefix)
1234
1258
  outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in output_column_names]
1235
- self._model_signature_dict[prob_func] = ModelSignature(inputs, outputs)
1259
+ self._model_signature_dict[prob_func] = ModelSignature(inputs,
1260
+ ([] if self._drop_input_cols else inputs) + outputs)
1236
1261
 
1237
1262
  @property
1238
1263
  def model_signatures(self) -> Dict[str, ModelSignature]: