snowflake-ml-python 1.5.3__py3-none-any.whl → 1.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. snowflake/cortex/__init__.py +2 -1
  2. snowflake/cortex/_complete.py +224 -21
  3. snowflake/cortex/_extract_answer.py +0 -1
  4. snowflake/cortex/_sentiment.py +0 -1
  5. snowflake/cortex/_summarize.py +0 -1
  6. snowflake/cortex/_translate.py +0 -1
  7. snowflake/cortex/_util.py +12 -85
  8. snowflake/ml/_internal/container_services/image_registry/http_client.py +10 -3
  9. snowflake/ml/_internal/container_services/image_registry/imagelib.py +23 -10
  10. snowflake/ml/_internal/container_services/image_registry/registry_client.py +7 -1
  11. snowflake/ml/_internal/exceptions/dataset_errors.py +7 -7
  12. snowflake/ml/_internal/exceptions/fileset_errors.py +3 -3
  13. snowflake/ml/_internal/exceptions/sql_error_codes.py +6 -0
  14. snowflake/ml/_internal/telemetry.py +26 -0
  15. snowflake/ml/_internal/utils/identifier.py +14 -0
  16. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +15 -4
  17. snowflake/ml/dataset/dataset.py +39 -20
  18. snowflake/ml/feature_store/feature_store.py +440 -243
  19. snowflake/ml/feature_store/feature_view.py +61 -9
  20. snowflake/ml/fileset/embedded_stage_fs.py +25 -21
  21. snowflake/ml/fileset/fileset.py +2 -2
  22. snowflake/ml/fileset/snowfs.py +4 -15
  23. snowflake/ml/fileset/stage_fs.py +6 -8
  24. snowflake/ml/lineage/__init__.py +3 -0
  25. snowflake/ml/lineage/lineage_node.py +139 -0
  26. snowflake/ml/model/_client/model/model_impl.py +47 -14
  27. snowflake/ml/model/_client/model/model_version_impl.py +82 -2
  28. snowflake/ml/model/_client/ops/model_ops.py +77 -5
  29. snowflake/ml/model/_client/sql/model.py +1 -0
  30. snowflake/ml/model/_client/sql/model_version.py +45 -2
  31. snowflake/ml/model/_deploy_client/image_builds/inference_server/main.py +2 -3
  32. snowflake/ml/model/_model_composer/model_composer.py +5 -4
  33. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +7 -1
  34. snowflake/ml/model/_model_composer/model_method/function_generator.py +17 -1
  35. snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +79 -0
  36. snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -2
  37. snowflake/ml/model/_model_composer/model_method/model_method.py +5 -5
  38. snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
  39. snowflake/ml/model/_packager/model_handlers/_utils.py +1 -0
  40. snowflake/ml/model/_packager/model_handlers/catboost.py +2 -2
  41. snowflake/ml/model/_packager/model_handlers/custom.py +12 -4
  42. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +18 -15
  43. snowflake/ml/model/_packager/model_handlers/lightgbm.py +2 -2
  44. snowflake/ml/model/_packager/model_handlers/llm.py +2 -2
  45. snowflake/ml/model/_packager/model_handlers/mlflow.py +2 -2
  46. snowflake/ml/model/_packager/model_handlers/pytorch.py +2 -2
  47. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +2 -2
  48. snowflake/ml/model/_packager/model_handlers/sklearn.py +2 -2
  49. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +2 -2
  50. snowflake/ml/model/_packager/model_handlers/tensorflow.py +2 -2
  51. snowflake/ml/model/_packager/model_handlers/torchscript.py +2 -2
  52. snowflake/ml/model/_packager/model_handlers/xgboost.py +2 -2
  53. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  54. snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -0
  55. snowflake/ml/model/_packager/model_meta/model_meta.py +21 -1
  56. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
  57. snowflake/ml/model/_packager/model_packager.py +9 -4
  58. snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +1 -1
  59. snowflake/ml/model/custom_model.py +22 -2
  60. snowflake/ml/model/type_hints.py +73 -4
  61. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +2 -0
  62. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +1 -0
  63. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +6 -0
  64. snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +1 -0
  65. snowflake/ml/modeling/cluster/affinity_propagation.py +4 -2
  66. snowflake/ml/modeling/cluster/agglomerative_clustering.py +4 -2
  67. snowflake/ml/modeling/cluster/birch.py +4 -2
  68. snowflake/ml/modeling/cluster/bisecting_k_means.py +4 -2
  69. snowflake/ml/modeling/cluster/dbscan.py +4 -2
  70. snowflake/ml/modeling/cluster/feature_agglomeration.py +4 -2
  71. snowflake/ml/modeling/cluster/k_means.py +4 -2
  72. snowflake/ml/modeling/cluster/mean_shift.py +4 -2
  73. snowflake/ml/modeling/cluster/mini_batch_k_means.py +4 -2
  74. snowflake/ml/modeling/cluster/optics.py +4 -2
  75. snowflake/ml/modeling/cluster/spectral_biclustering.py +4 -2
  76. snowflake/ml/modeling/cluster/spectral_clustering.py +4 -2
  77. snowflake/ml/modeling/cluster/spectral_coclustering.py +4 -2
  78. snowflake/ml/modeling/compose/column_transformer.py +4 -2
  79. snowflake/ml/modeling/covariance/elliptic_envelope.py +4 -2
  80. snowflake/ml/modeling/covariance/empirical_covariance.py +4 -2
  81. snowflake/ml/modeling/covariance/graphical_lasso.py +4 -2
  82. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +4 -2
  83. snowflake/ml/modeling/covariance/ledoit_wolf.py +4 -2
  84. snowflake/ml/modeling/covariance/min_cov_det.py +4 -2
  85. snowflake/ml/modeling/covariance/oas.py +4 -2
  86. snowflake/ml/modeling/covariance/shrunk_covariance.py +4 -2
  87. snowflake/ml/modeling/decomposition/dictionary_learning.py +4 -2
  88. snowflake/ml/modeling/decomposition/factor_analysis.py +4 -2
  89. snowflake/ml/modeling/decomposition/fast_ica.py +4 -2
  90. snowflake/ml/modeling/decomposition/incremental_pca.py +4 -2
  91. snowflake/ml/modeling/decomposition/kernel_pca.py +4 -2
  92. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +4 -2
  93. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +4 -2
  94. snowflake/ml/modeling/decomposition/pca.py +4 -2
  95. snowflake/ml/modeling/decomposition/sparse_pca.py +4 -2
  96. snowflake/ml/modeling/decomposition/truncated_svd.py +4 -2
  97. snowflake/ml/modeling/ensemble/isolation_forest.py +4 -2
  98. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +4 -2
  99. snowflake/ml/modeling/feature_selection/variance_threshold.py +4 -2
  100. snowflake/ml/modeling/impute/iterative_imputer.py +4 -2
  101. snowflake/ml/modeling/impute/knn_imputer.py +4 -2
  102. snowflake/ml/modeling/impute/missing_indicator.py +4 -2
  103. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +4 -2
  104. snowflake/ml/modeling/kernel_approximation/nystroem.py +4 -2
  105. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +4 -2
  106. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +4 -2
  107. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +4 -2
  108. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +4 -2
  109. snowflake/ml/modeling/manifold/isomap.py +4 -2
  110. snowflake/ml/modeling/manifold/mds.py +4 -2
  111. snowflake/ml/modeling/manifold/spectral_embedding.py +4 -2
  112. snowflake/ml/modeling/manifold/tsne.py +4 -2
  113. snowflake/ml/modeling/metrics/ranking.py +3 -0
  114. snowflake/ml/modeling/metrics/regression.py +3 -0
  115. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +4 -2
  116. snowflake/ml/modeling/mixture/gaussian_mixture.py +4 -2
  117. snowflake/ml/modeling/neighbors/kernel_density.py +4 -2
  118. snowflake/ml/modeling/neighbors/local_outlier_factor.py +4 -2
  119. snowflake/ml/modeling/neighbors/nearest_neighbors.py +4 -2
  120. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +4 -2
  121. snowflake/ml/modeling/pipeline/pipeline.py +1 -0
  122. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +43 -9
  123. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +36 -8
  124. snowflake/ml/modeling/preprocessing/polynomial_features.py +4 -2
  125. snowflake/ml/registry/_manager/model_manager.py +16 -3
  126. snowflake/ml/version.py +1 -1
  127. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/METADATA +35 -7
  128. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/RECORD +131 -127
  129. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/WHEEL +1 -1
  130. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/LICENSE.txt +0 -0
  131. {snowflake_ml_python-1.5.3.dist-info → snowflake_ml_python-1.5.4.dist-info}/top_level.txt +0 -0
@@ -76,8 +76,10 @@ class MissingIndicator(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class AdditiveChi2Sampler(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class Nystroem(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class PolynomialCountSketch(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class RBFSampler(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class SkewedChi2Sampler(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class SGDOneClassSVM(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class Isomap(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class MDS(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class SpectralEmbedding(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class TSNE(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -102,6 +102,7 @@ def precision_recall_curve(
102
102
  ],
103
103
  statement_params=statement_params,
104
104
  anonymous=True,
105
+ execute_as="caller",
105
106
  )
106
107
  def precision_recall_curve_anon_sproc(session: snowpark.Session) -> bytes:
107
108
  for query in queries[:-1]:
@@ -249,6 +250,7 @@ def roc_auc_score(
249
250
  ],
250
251
  statement_params=statement_params,
251
252
  anonymous=True,
253
+ execute_as="caller",
252
254
  )
253
255
  def roc_auc_score_anon_sproc(session: snowpark.Session) -> bytes:
254
256
  for query in queries[:-1]:
@@ -352,6 +354,7 @@ def roc_curve(
352
354
  ],
353
355
  statement_params=statement_params,
354
356
  anonymous=True,
357
+ execute_as="caller",
355
358
  )
356
359
  def roc_curve_anon_sproc(session: snowpark.Session) -> bytes:
357
360
  for query in queries[:-1]:
@@ -87,6 +87,7 @@ def d2_absolute_error_score(
87
87
  ],
88
88
  statement_params=statement_params,
89
89
  anonymous=True,
90
+ execute_as="caller",
90
91
  )
91
92
  def d2_absolute_error_score_anon_sproc(session: snowpark.Session) -> bytes:
92
93
  for query in queries[:-1]:
@@ -184,6 +185,7 @@ def d2_pinball_score(
184
185
  ],
185
186
  statement_params=statement_params,
186
187
  anonymous=True,
188
+ execute_as="caller",
187
189
  )
188
190
  def d2_pinball_score_anon_sproc(session: snowpark.Session) -> bytes:
189
191
  for query in queries[:-1]:
@@ -299,6 +301,7 @@ def explained_variance_score(
299
301
  ],
300
302
  statement_params=statement_params,
301
303
  anonymous=True,
304
+ execute_as="caller",
302
305
  )
303
306
  def explained_variance_score_anon_sproc(session: snowpark.Session) -> bytes:
304
307
  for query in queries[:-1]:
@@ -76,8 +76,10 @@ class BayesianGaussianMixture(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class GaussianMixture(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class KernelDensity(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class LocalOutlierFactor(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class NearestNeighbors(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -76,8 +76,10 @@ class BernoulliRBM(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -378,6 +378,7 @@ class Pipeline(base.BaseTransformer):
378
378
  anonymous=True,
379
379
  imports=imports, # type: ignore[arg-type]
380
380
  statement_params=sproc_statement_params,
381
+ execute_as="caller",
381
382
  )
382
383
 
383
384
  sproc_export_file_name: str = pipeline_within_one_sproc(
@@ -101,16 +101,20 @@ class OneHotEncoder(base.BaseTransformer):
101
101
  (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html).
102
102
 
103
103
  Args:
104
- categories: 'auto' or dict {column_name: np.ndarray([category])}, default='auto'
104
+ categories: 'auto', list of array-like, or dict {column_name: np.ndarray([category])}, default='auto'
105
105
  Categories (unique values) per feature:
106
106
  - 'auto': Determine categories automatically from the training data.
107
+ - list: ``categories[i]`` holds the categories expected in the ith
108
+ column. The passed categories should not mix strings and numeric
109
+ values within a single feature, and should be sorted in case of
110
+ numeric values.
107
111
  - dict: ``categories[column_name]`` holds the categories expected in
108
112
  the column provided. The passed categories should not mix strings
109
113
  and numeric values within a single feature, and should be sorted in
110
114
  case of numeric values.
111
115
  The used categories can be found in the ``categories_`` attribute.
112
116
 
113
- drop: {first’, if_binary} or an array-like of shape (n_features,), default=None
117
+ drop: {'first', 'if_binary'} or an array-like of shape (n_features,), default=None
114
118
  Specifies a methodology to use to drop one of the categories per
115
119
  feature. This is useful in situations where perfectly collinear
116
120
  features cause problems, such as when feeding the resulting data
@@ -206,7 +210,7 @@ class OneHotEncoder(base.BaseTransformer):
206
210
  def __init__(
207
211
  self,
208
212
  *,
209
- categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]] = "auto",
213
+ categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]] = "auto",
210
214
  drop: Optional[Union[str, npt.ArrayLike]] = None,
211
215
  sparse: bool = False,
212
216
  handle_unknown: str = "error",
@@ -440,8 +444,19 @@ class OneHotEncoder(base.BaseTransformer):
440
444
  assert found_state_df is not None
441
445
  if self.categories != "auto":
442
446
  state_data = []
443
- assert isinstance(self.categories, dict)
444
- for input_col, cats in self.categories.items():
447
+ if isinstance(self.categories, list):
448
+ categories_map = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
449
+ elif isinstance(self.categories, dict):
450
+ categories_map = self.categories
451
+ else:
452
+ raise exceptions.SnowflakeMLException(
453
+ error_code=error_codes.INVALID_ARGUMENT,
454
+ original_exception=ValueError(
455
+ f"Invalid type {type(self.categories)} provided for argument `categories`"
456
+ ),
457
+ )
458
+
459
+ for input_col, cats in categories_map.items():
445
460
  for cat in cats.tolist():
446
461
  state_data.append([input_col, cat])
447
462
  # states of given categories
@@ -565,6 +580,8 @@ class OneHotEncoder(base.BaseTransformer):
565
580
  else:
566
581
  categories[k] = vectorized_func(v)
567
582
  self.categories_ = categories
583
+ elif isinstance(self.categories, list):
584
+ self.categories_ = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
568
585
  else:
569
586
  self.categories_ = self.categories
570
587
 
@@ -850,8 +867,15 @@ class OneHotEncoder(base.BaseTransformer):
850
867
  # In case of fitting with pandas dataframe and transforming with snowpark dataframe
851
868
  # state_pandas cannot recognize the datatype of _CATEGORY and _FITTED_CATEGORY column
852
869
  # Therefore, apply the convert_to_string_excluding_nan function to _CATEGORY and _FITTED_CATEGORY
853
- state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].applymap(convert_to_string_excluding_nan)
854
- state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].applymap(convert_to_string_excluding_nan)
870
+ # applymap is depreciated since pandas 2.1.0, replaced by map
871
+ if pd.__version__ < "2.1.0":
872
+ state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].applymap(convert_to_string_excluding_nan)
873
+ state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].applymap(
874
+ convert_to_string_excluding_nan
875
+ )
876
+ else:
877
+ state_pandas[[_CATEGORY]] = state_pandas[[_CATEGORY]].map(convert_to_string_excluding_nan)
878
+ state_pandas[[_FITTED_CATEGORY]] = state_pandas[[_FITTED_CATEGORY]].map(convert_to_string_excluding_nan)
855
879
  state_df = dataset._session.create_dataframe(state_pandas)
856
880
 
857
881
  transformed_dataset = dataset
@@ -1009,7 +1033,7 @@ class OneHotEncoder(base.BaseTransformer):
1009
1033
  error_code=error_codes.INVALID_ATTRIBUTE,
1010
1034
  original_exception=ValueError(f"Unsupported `categories` value: {self.categories}."),
1011
1035
  )
1012
- elif isinstance(self.categories, dict):
1036
+ elif isinstance(self.categories, (dict, list)):
1013
1037
  if len(self.categories) != len(self.input_cols):
1014
1038
  raise exceptions.SnowflakeMLException(
1015
1039
  error_code=error_codes.INVALID_ATTRIBUTE,
@@ -1018,7 +1042,7 @@ class OneHotEncoder(base.BaseTransformer):
1018
1042
  f"({len(self.input_cols)})."
1019
1043
  ),
1020
1044
  )
1021
- elif set(self.categories.keys()) != set(self.input_cols):
1045
+ elif isinstance(self.categories, dict) and set(self.categories.keys()) != set(self.input_cols):
1022
1046
  raise exceptions.SnowflakeMLException(
1023
1047
  error_code=error_codes.INVALID_ATTRIBUTE,
1024
1048
  original_exception=ValueError(
@@ -1537,6 +1561,16 @@ class OneHotEncoder(base.BaseTransformer):
1537
1561
  default_sklearn_args = _utils.get_default_args(default_sklearn_obj.__class__.__init__)
1538
1562
  given_args = self.get_params()
1539
1563
 
1564
+ if "categories" in given_args and isinstance(given_args["categories"], dict):
1565
+ # sklearn requires a list of array-like to satisfy the `categories` arg
1566
+ try:
1567
+ given_args["categories"] = [given_args["categories"][input_col] for input_col in self.input_cols]
1568
+ except KeyError as e:
1569
+ raise exceptions.SnowflakeMLException(
1570
+ error_code=error_codes.INVALID_ARGUMENT,
1571
+ original_exception=e,
1572
+ )
1573
+
1540
1574
  # replace 'sparse' with 'sparse_output' when scikit-learn>=1.2
1541
1575
  sklearn_version = sklearn.__version__
1542
1576
  if version.parse(sklearn_version) >= version.parse(_SKLEARN_DEPRECATED_KEYWORD_TO_VERSION_DICT["sparse"]):
@@ -45,9 +45,11 @@ class OrdinalEncoder(base.BaseTransformer):
45
45
  (https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OrdinalEncoder.html).
46
46
 
47
47
  Args:
48
- categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]], default="auto"
48
+ categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]],
49
+ default="auto"
49
50
  The string 'auto' (the default) causes the categories to be extracted from the input columns.
50
- To specify the categories yourself, pass a dictionary mapping the column name to an ndarray containing the
51
+ To specify the categories yourself, pass either (1) a list of ndarrays containing the categories or
52
+ (2) a dictionary mapping the column name to an ndarray containing the
51
53
  categories.
52
54
 
53
55
  handle_unknown: str, default="error"
@@ -96,7 +98,7 @@ class OrdinalEncoder(base.BaseTransformer):
96
98
  def __init__(
97
99
  self,
98
100
  *,
99
- categories: Union[str, Dict[str, type_utils.LiteralNDArrayType]] = "auto",
101
+ categories: Union[str, List[type_utils.LiteralNDArrayType], Dict[str, type_utils.LiteralNDArrayType]] = "auto",
100
102
  handle_unknown: str = "error",
101
103
  unknown_value: Optional[Union[int, float]] = None,
102
104
  encoded_missing_value: Union[int, float] = np.nan,
@@ -114,9 +116,13 @@ class OrdinalEncoder(base.BaseTransformer):
114
116
  a single column of integers (0 to n_categories - 1) per feature.
115
117
 
116
118
  Args:
117
- categories: 'auto' or dict {column_name: ndarray([category])}, default='auto'
119
+ categories: 'auto', list of array-like, or dict {column_name: ndarray([category])}, default='auto'
118
120
  Categories (unique values) per feature:
119
121
  - 'auto': Determine categories automatically from the training data.
122
+ - list: ``categories[i]`` holds the categories expected in the ith
123
+ column. The passed categories should not mix strings and numeric
124
+ values within a single feature, and should be sorted in case of
125
+ numeric values.
120
126
  - dict: ``categories[column_name]`` holds the categories expected in
121
127
  the column provided. The passed categories should not mix strings
122
128
  and numeric values within a single feature, and should be sorted in
@@ -317,8 +323,19 @@ class OrdinalEncoder(base.BaseTransformer):
317
323
  assert found_state_df is not None
318
324
  if self.categories != "auto":
319
325
  state_data = []
320
- assert isinstance(self.categories, dict)
321
- for input_col, cats in self.categories.items():
326
+ if isinstance(self.categories, list):
327
+ categories_map = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
328
+ elif isinstance(self.categories, dict):
329
+ categories_map = self.categories
330
+ else:
331
+ raise exceptions.SnowflakeMLException(
332
+ error_code=error_codes.INVALID_ARGUMENT,
333
+ original_exception=ValueError(
334
+ f"Invalid type {type(self.categories)} provided for argument `categories`"
335
+ ),
336
+ )
337
+
338
+ for input_col, cats in categories_map.items():
322
339
  for idx, cat in enumerate(cats.tolist()):
323
340
  state_data.append([input_col, cat, idx])
324
341
  # states of given categories
@@ -368,6 +385,8 @@ class OrdinalEncoder(base.BaseTransformer):
368
385
  for col_name, cats in grouped_categories.items()
369
386
  }
370
387
  self.categories_ = categories
388
+ elif isinstance(self.categories, list):
389
+ self.categories_ = {col_name: cats for col_name, cats in zip(self.input_cols, self.categories)}
371
390
  else:
372
391
  self.categories_ = self.categories
373
392
 
@@ -548,6 +567,15 @@ class OrdinalEncoder(base.BaseTransformer):
548
567
  snowml_only_keywords=_SNOWML_ONLY_KEYWORDS,
549
568
  sklearn_added_keyword_to_version_dict=_SKLEARN_ADDED_KEYWORD_TO_VERSION_DICT,
550
569
  )
570
+ if "categories" in sklearn_args and isinstance(sklearn_args["categories"], dict):
571
+ # sklearn requires a list of array-like to satisfy the `categories` arg
572
+ try:
573
+ sklearn_args["categories"] = [sklearn_args["categories"][input_col] for input_col in self.input_cols]
574
+ except KeyError as e:
575
+ raise exceptions.SnowflakeMLException(
576
+ error_code=error_codes.INVALID_ARGUMENT,
577
+ original_exception=e,
578
+ )
551
579
  return preprocessing.OrdinalEncoder(**sklearn_args)
552
580
 
553
581
  def _create_sklearn_object(self) -> preprocessing.OrdinalEncoder:
@@ -570,7 +598,7 @@ class OrdinalEncoder(base.BaseTransformer):
570
598
  error_code=error_codes.INVALID_ATTRIBUTE,
571
599
  original_exception=ValueError(f"Unsupported `categories` value: {self.categories}."),
572
600
  )
573
- elif isinstance(self.categories, dict):
601
+ elif isinstance(self.categories, (dict, list)):
574
602
  if len(self.categories) != len(self.input_cols):
575
603
  raise exceptions.SnowflakeMLException(
576
604
  error_code=error_codes.INVALID_ATTRIBUTE,
@@ -579,7 +607,7 @@ class OrdinalEncoder(base.BaseTransformer):
579
607
  f"({len(self.input_cols)})."
580
608
  ),
581
609
  )
582
- elif set(self.categories.keys()) != set(self.input_cols):
610
+ elif isinstance(self.categories, dict) and set(self.categories.keys()) != set(self.input_cols):
583
611
  raise exceptions.SnowflakeMLException(
584
612
  error_code=error_codes.INVALID_ATTRIBUTE,
585
613
  original_exception=ValueError(
@@ -76,8 +76,10 @@ class PolynomialFeatures(BaseTransformer):
76
76
  initialization with the `set_input_cols` method.
77
77
 
78
78
  label_cols: Optional[Union[str, List[str]]]
79
- This parameter is optional and will be ignored during fit. It is present here for API consistency by convention.
80
-
79
+ A string or list of strings representing column names that contain labels.
80
+ Label columns must be specified with this parameter during initialization
81
+ or with the `set_label_cols` method before fitting.
82
+
81
83
  output_cols: Optional[Union[str, List[str]]]
82
84
  A string or list of strings representing column names that will store the
83
85
  output of predict and transform operations. The length of output_cols must
@@ -4,12 +4,14 @@ from typing import Any, Dict, List, Optional, Union
4
4
  import pandas as pd
5
5
  from absl.logging import logging
6
6
 
7
+ from snowflake.ml._internal import telemetry
7
8
  from snowflake.ml._internal.human_readable_id import hrid_generator
8
9
  from snowflake.ml._internal.utils import sql_identifier
9
10
  from snowflake.ml.model import model_signature, type_hints as model_types
10
11
  from snowflake.ml.model._client.model import model_impl, model_version_impl
11
12
  from snowflake.ml.model._client.ops import metadata_ops, model_ops
12
13
  from snowflake.ml.model._model_composer import model_composer
14
+ from snowflake.ml.model._packager.model_meta import model_meta
13
15
  from snowflake.snowpark import session
14
16
 
15
17
  logger = logging.getLogger(__name__)
@@ -124,7 +126,10 @@ class ModelManager:
124
126
  version_name=version_name_id,
125
127
  statement_params=statement_params,
126
128
  ):
127
- raise ValueError(f"Model {model_name} version {version_name} already existed.")
129
+ raise ValueError(
130
+ f"Model {model_name} version {version_name} already existed. "
131
+ + "To auto-generate `version_name`, skip that argument."
132
+ )
128
133
 
129
134
  stage_path = self._model_ops.prepare_model_stage_path(
130
135
  database_name=database_name_id,
@@ -134,8 +139,10 @@ class ModelManager:
134
139
 
135
140
  logger.info("Start packaging and uploading your model. It might take some time based on the size of the model.")
136
141
 
137
- mc = model_composer.ModelComposer(self._model_ops._session, stage_path=stage_path)
138
- mc.save(
142
+ mc = model_composer.ModelComposer(
143
+ self._model_ops._session, stage_path=stage_path, statement_params=statement_params
144
+ )
145
+ model_metadata: model_meta.ModelMetadata = mc.save(
139
146
  name=model_name_id.resolved(),
140
147
  model=model,
141
148
  signatures=signatures,
@@ -147,6 +154,12 @@ class ModelManager:
147
154
  ext_modules=ext_modules,
148
155
  options=options,
149
156
  )
157
+ statement_params = telemetry.add_statement_params_custom_tags(
158
+ statement_params, model_metadata.telemetry_metadata()
159
+ )
160
+ statement_params = telemetry.add_statement_params_custom_tags(
161
+ statement_params, {"model_version_name": version_name_id}
162
+ )
150
163
 
151
164
  logger.info("Start creating MODEL object for you in the Snowflake.")
152
165
 
snowflake/ml/version.py CHANGED
@@ -1 +1 @@
1
- VERSION="1.5.3"
1
+ VERSION="1.5.4"