snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. snowflake/ml/_internal/env_utils.py +11 -1
  2. snowflake/ml/_internal/utils/identifier.py +3 -1
  3. snowflake/ml/_internal/utils/sql_identifier.py +2 -6
  4. snowflake/ml/feature_store/feature_store.py +151 -78
  5. snowflake/ml/feature_store/feature_view.py +12 -24
  6. snowflake/ml/fileset/sfcfs.py +56 -50
  7. snowflake/ml/fileset/stage_fs.py +48 -13
  8. snowflake/ml/model/_client/model/model_version_impl.py +2 -50
  9. snowflake/ml/model/_client/ops/model_ops.py +78 -29
  10. snowflake/ml/model/_client/sql/model.py +23 -2
  11. snowflake/ml/model/_client/sql/model_version.py +22 -1
  12. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
  13. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
  14. snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
  15. snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
  16. snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
  17. snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
  18. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  19. snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
  20. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
  21. snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
  22. snowflake/ml/model/_packager/model_packager.py +2 -2
  23. snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
  24. snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
  25. snowflake/ml/model/type_hints.py +21 -2
  26. snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
  27. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
  28. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
  29. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
  33. snowflake/ml/modeling/cluster/birch.py +195 -123
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
  35. snowflake/ml/modeling/cluster/dbscan.py +195 -123
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
  37. snowflake/ml/modeling/cluster/k_means.py +195 -123
  38. snowflake/ml/modeling/cluster/mean_shift.py +195 -123
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
  40. snowflake/ml/modeling/cluster/optics.py +195 -123
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
  44. snowflake/ml/modeling/compose/column_transformer.py +195 -123
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
  51. snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
  52. snowflake/ml/modeling/covariance/oas.py +195 -123
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
  56. snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
  61. snowflake/ml/modeling/decomposition/pca.py +195 -123
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
  90. snowflake/ml/modeling/framework/_utils.py +8 -1
  91. snowflake/ml/modeling/framework/base.py +9 -1
  92. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
  94. snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
  95. snowflake/ml/modeling/impute/knn_imputer.py +195 -123
  96. snowflake/ml/modeling/impute/missing_indicator.py +195 -123
  97. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
  98. snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
  99. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
  100. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
  101. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
  102. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
  103. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
  104. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
  105. snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
  106. snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
  107. snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
  108. snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
  109. snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
  110. snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
  111. snowflake/ml/modeling/linear_model/lars.py +195 -123
  112. snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
  113. snowflake/ml/modeling/linear_model/lasso.py +195 -123
  114. snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
  115. snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
  116. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
  117. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
  118. snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
  119. snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
  120. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
  121. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
  123. snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
  124. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
  125. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
  126. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
  127. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
  128. snowflake/ml/modeling/linear_model/perceptron.py +195 -123
  129. snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
  130. snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
  131. snowflake/ml/modeling/linear_model/ridge.py +195 -123
  132. snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
  133. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
  134. snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
  135. snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
  136. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
  137. snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
  138. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
  139. snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
  140. snowflake/ml/modeling/manifold/isomap.py +195 -123
  141. snowflake/ml/modeling/manifold/mds.py +195 -123
  142. snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
  143. snowflake/ml/modeling/manifold/tsne.py +195 -123
  144. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
  145. snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
  146. snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
  147. snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
  148. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
  149. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
  150. snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
  151. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
  152. snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
  153. snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
  154. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
  155. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
  156. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
  157. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
  158. snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
  159. snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
  160. snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
  161. snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
  162. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
  163. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
  164. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
  165. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
  166. snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
  167. snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
  168. snowflake/ml/modeling/pipeline/pipeline.py +4 -4
  169. snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
  170. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
  171. snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
  172. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
  173. snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
  174. snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
  175. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
  176. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
  177. snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
  178. snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
  179. snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
  180. snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
  181. snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
  182. snowflake/ml/modeling/svm/linear_svc.py +195 -123
  183. snowflake/ml/modeling/svm/linear_svr.py +195 -123
  184. snowflake/ml/modeling/svm/nu_svc.py +195 -123
  185. snowflake/ml/modeling/svm/nu_svr.py +195 -123
  186. snowflake/ml/modeling/svm/svc.py +195 -123
  187. snowflake/ml/modeling/svm/svr.py +195 -123
  188. snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
  189. snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
  190. snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
  191. snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
  192. snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
  193. snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
  194. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
  195. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
  196. snowflake/ml/registry/registry.py +1 -1
  197. snowflake/ml/version.py +1 -1
  198. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
  199. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
  200. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
  201. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
  202. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
  203. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -23,6 +23,7 @@ from snowflake.ml.model._packager.model_meta import (
23
23
  model_meta_schema,
24
24
  )
25
25
  from snowflake.ml.model._packager.model_meta_migrator import migrator_plans
26
+ from snowflake.ml.model._packager.model_runtime import model_runtime
26
27
 
27
28
  MODEL_METADATA_FILE = "model.yaml"
28
29
  MODEL_CODE_DIR = "code"
@@ -115,7 +116,6 @@ def create_model_metadata(
115
116
  python_version=python_version,
116
117
  embed_local_ml_library=embed_local_ml_library,
117
118
  legacy_save=legacy_save,
118
- relax_version=relax_version,
119
119
  )
120
120
 
121
121
  if embed_local_ml_library:
@@ -156,6 +156,8 @@ def create_model_metadata(
156
156
  cloudpickle.register_pickle_by_value(mod)
157
157
  imported_modules.append(mod)
158
158
  yield model_meta
159
+ if relax_version:
160
+ model_meta.env.relax_version()
159
161
  model_meta.save(model_dir_path)
160
162
  finally:
161
163
  for mod in imported_modules:
@@ -169,7 +171,6 @@ def _create_env_for_model_metadata(
169
171
  python_version: Optional[str] = None,
170
172
  embed_local_ml_library: bool = False,
171
173
  legacy_save: bool = False,
172
- relax_version: bool = False,
173
174
  ) -> model_env.ModelEnv:
174
175
  env = model_env.ModelEnv()
175
176
 
@@ -197,10 +198,6 @@ def _create_env_for_model_metadata(
197
198
  ],
198
199
  check_local_version=True,
199
200
  )
200
-
201
- if relax_version:
202
- env.relax_version()
203
-
204
201
  return env
205
202
 
206
203
 
@@ -237,6 +234,7 @@ class ModelMetadata:
237
234
  name: str,
238
235
  env: model_env.ModelEnv,
239
236
  model_type: model_types.SupportedModelHandlerType,
237
+ runtimes: Optional[Dict[str, model_runtime.ModelRuntime]] = None,
240
238
  signatures: Optional[Dict[str, model_signature.ModelSignature]] = None,
241
239
  metadata: Optional[Dict[str, str]] = None,
242
240
  creation_timestamp: Optional[str] = None,
@@ -262,6 +260,8 @@ class ModelMetadata:
262
260
  if models:
263
261
  self.models = models
264
262
 
263
+ self._runtimes = runtimes
264
+
265
265
  self.original_metadata_version = original_metadata_version
266
266
 
267
267
  @property
@@ -273,6 +273,19 @@ class ModelMetadata:
273
273
  parsed_min_snowpark_ml_version = version.parse(min_snowpark_ml_version)
274
274
  self._min_snowpark_ml_version = max(self._min_snowpark_ml_version, parsed_min_snowpark_ml_version)
275
275
 
276
+ @property
277
+ def runtimes(self) -> Dict[str, model_runtime.ModelRuntime]:
278
+ if self._runtimes and "cpu" in self._runtimes:
279
+ return self._runtimes
280
+ runtimes = {
281
+ "cpu": model_runtime.ModelRuntime("cpu", self.env),
282
+ }
283
+ if self.env.cuda_version:
284
+ runtimes.update(
285
+ {"gpu": model_runtime.ModelRuntime("gpu", self.env, is_gpu=True, server_availability_source="conda")}
286
+ )
287
+ return runtimes
288
+
276
289
  def save(self, model_dir_path: str) -> None:
277
290
  """Save the model metadata
278
291
 
@@ -291,6 +304,10 @@ class ModelMetadata:
291
304
  {
292
305
  "creation_timestamp": self.creation_timestamp,
293
306
  "env": self.env.save_as_dict(pathlib.Path(model_dir_path)),
307
+ "runtimes": {
308
+ runtime_name: runtime.save(pathlib.Path(model_dir_path))
309
+ for runtime_name, runtime in self.runtimes.items()
310
+ },
294
311
  "metadata": self.metadata,
295
312
  "model_type": self.model_type,
296
313
  "models": {model_name: blob.to_dict() for model_name, blob in self.models.items()},
@@ -302,6 +319,7 @@ class ModelMetadata:
302
319
  )
303
320
 
304
321
  with open(model_yaml_path, "w", encoding="utf-8") as out:
322
+ yaml.SafeDumper.ignore_aliases = lambda *args: True # type: ignore[method-assign]
305
323
  yaml.safe_dump(
306
324
  model_dict,
307
325
  stream=out,
@@ -330,6 +348,7 @@ class ModelMetadata:
330
348
  return model_meta_schema.ModelMetadataDict(
331
349
  creation_timestamp=loaded_meta["creation_timestamp"],
332
350
  env=loaded_meta["env"],
351
+ runtimes=loaded_meta.get("runtimes", None),
333
352
  metadata=loaded_meta.get("metadata", None),
334
353
  model_type=loaded_meta["model_type"],
335
354
  models=loaded_meta["models"],
@@ -363,10 +382,21 @@ class ModelMetadata:
363
382
  models = {name: model_blob_meta.ModelBlobMeta(**blob_meta) for name, blob_meta in model_dict["models"].items()}
364
383
  env = model_env.ModelEnv()
365
384
  env.load_from_dict(pathlib.Path(model_dir_path), model_dict["env"])
385
+
386
+ runtimes: Optional[Dict[str, model_runtime.ModelRuntime]]
387
+ if model_dict.get("runtimes", None):
388
+ runtimes = {
389
+ name: model_runtime.ModelRuntime.load(pathlib.Path(model_dir_path), name, env, runtime_dict)
390
+ for name, runtime_dict in model_dict["runtimes"].items()
391
+ }
392
+ else:
393
+ runtimes = None
394
+
366
395
  return cls(
367
396
  name=model_dict["name"],
368
397
  model_type=model_dict["model_type"],
369
398
  env=env,
399
+ runtimes=runtimes,
370
400
  signatures=signatures,
371
401
  metadata=model_dict.get("metadata", None),
372
402
  creation_timestamp=model_dict["creation_timestamp"],
@@ -1,7 +1,7 @@
1
1
  # This files contains schema definition of what will be written into model.yml
2
2
  # Changing this file should lead to a change of the schema version.
3
3
 
4
- from typing import Any, Dict, Optional, TypedDict, Union
4
+ from typing import Any, Dict, List, Optional, TypedDict, Union
5
5
 
6
6
  from typing_extensions import NotRequired, Required
7
7
 
@@ -11,6 +11,16 @@ MODEL_METADATA_VERSION = "2023-12-01"
11
11
  MODEL_METADATA_MIN_SNOWPARK_ML_VERSION = "1.0.12"
12
12
 
13
13
 
14
+ class ModelRuntimeDependenciesDict(TypedDict):
15
+ conda: Required[str]
16
+ pip: Required[str]
17
+
18
+
19
+ class ModelRuntimeDict(TypedDict):
20
+ imports: Required[List[str]]
21
+ dependencies: Required[ModelRuntimeDependenciesDict]
22
+
23
+
14
24
  class ModelEnvDict(TypedDict):
15
25
  conda: Required[str]
16
26
  pip: Required[str]
@@ -23,11 +33,19 @@ class BaseModelBlobOptions(TypedDict):
23
33
  ...
24
34
 
25
35
 
36
+ class CatBoostModelBlobOptions(BaseModelBlobOptions):
37
+ catboost_estimator_type: Required[str]
38
+
39
+
26
40
  class HuggingFacePipelineModelBlobOptions(BaseModelBlobOptions):
27
41
  task: Required[str]
28
42
  batch_size: Required[int]
29
43
 
30
44
 
45
+ class LightGBMModelBlobOptions(BaseModelBlobOptions):
46
+ lightgbm_estimator_type: Required[str]
47
+
48
+
31
49
  class LLMModelBlobOptions(BaseModelBlobOptions):
32
50
  batch_size: Required[int]
33
51
 
@@ -61,6 +79,7 @@ class ModelBlobMetadataDict(TypedDict):
61
79
  class ModelMetadataDict(TypedDict):
62
80
  creation_timestamp: Required[str]
63
81
  env: Required[ModelEnvDict]
82
+ runtimes: NotRequired[Dict[str, ModelRuntimeDict]]
64
83
  metadata: NotRequired[Optional[Dict[str, str]]]
65
84
  model_type: Required[type_hints.SupportedModelHandlerType]
66
85
  models: Required[Dict[str, ModelBlobMetadataDict]]
@@ -3,7 +3,9 @@ from typing import Any, Dict, Type
3
3
  from snowflake.ml.model._packager.model_meta import model_meta_schema
4
4
  from snowflake.ml.model._packager.model_meta_migrator import base_migrator, migrator_v1
5
5
 
6
- MODEL_META_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelMetaMigrator]] = {"1": migrator_v1.MetaMigrator_v1}
6
+ MODEL_META_MIGRATOR_PLANS: Dict[str, Type[base_migrator.BaseModelMetaMigrator]] = {
7
+ "1": migrator_v1.MetaMigrator_v1,
8
+ }
7
9
 
8
10
 
9
11
  def migrate_metadata(loaded_meta: Dict[str, Any]) -> Dict[str, Any]:
@@ -102,8 +102,8 @@ class ModelPackager:
102
102
  if signatures is None:
103
103
  logging.info(f"Model signatures are auto inferred as:\n\n{meta.signatures}")
104
104
 
105
- self.model = model
106
- self.meta = meta
105
+ self.model = model
106
+ self.meta = meta
107
107
 
108
108
  def load(
109
109
  self,
@@ -3,7 +3,7 @@ REQUIREMENTS = [
3
3
  "anyio>=3.5.0,<4",
4
4
  "numpy>=1.23,<2",
5
5
  "packaging>=20.9,<24",
6
- "pandas>=1.0.0,<2",
6
+ "pandas>=1.0.0,<3",
7
7
  "pyyaml>=6.0,<7",
8
8
  "snowflake-snowpark-python>=1.11.1,<2,!=1.12.0",
9
9
  "typing-extensions>=4.1.0,<5"
@@ -0,0 +1,137 @@
1
+ import copy
2
+ import pathlib
3
+ import warnings
4
+ from typing import List, Literal, Optional
5
+
6
+ from packaging import requirements
7
+
8
+ from snowflake.ml._internal import env as snowml_env, env_utils, file_utils
9
+ from snowflake.ml.model._packager.model_env import model_env
10
+ from snowflake.ml.model._packager.model_meta import model_meta_schema
11
+ from snowflake.ml.model._packager.model_runtime import (
12
+ _snowml_inference_alternative_requirements,
13
+ )
14
+
15
+ _SNOWML_INFERENCE_ALTERNATIVE_DEPENDENCIES = [
16
+ str(env_utils.get_package_spec_with_supported_ops_only(requirements.Requirement(r)))
17
+ for r in _snowml_inference_alternative_requirements.REQUIREMENTS
18
+ ]
19
+
20
+
21
+ class ModelRuntime:
22
+ """Class to represent runtime in a model, which controls the runtime and version, imports and dependencies.
23
+
24
+ Attributes:
25
+ runtime_env: ModelEnv object representing the actual environment when deploying. The environment is based on
26
+ the environment from the packaged model with additional dependencies required to deploy.
27
+ imports: List of files to be imported in the created functions. At least packed model should be imported.
28
+ If the required Snowpark ML library is not available in the server-side, we will automatically pack the
29
+ local version as well as "snowflake-ml-python.zip" and added into the imports.
30
+ """
31
+
32
+ RUNTIME_DIR_REL_PATH = "runtimes"
33
+
34
+ def __init__(
35
+ self,
36
+ name: str,
37
+ env: model_env.ModelEnv,
38
+ imports: Optional[List[pathlib.PurePosixPath]] = None,
39
+ is_gpu: bool = False,
40
+ server_availability_source: Literal["snowflake", "conda"] = "snowflake",
41
+ loading_from_file: bool = False,
42
+ ) -> None:
43
+ self.name = name
44
+ self.runtime_env = copy.deepcopy(env)
45
+ self.imports = imports or []
46
+
47
+ if loading_from_file:
48
+ return
49
+
50
+ snowml_pkg_spec = f"{env_utils.SNOWPARK_ML_PKG_NAME}=={self.runtime_env.snowpark_ml_version}"
51
+ if self.runtime_env._snowpark_ml_version.local:
52
+ self.embed_local_ml_library = True
53
+ else:
54
+ if server_availability_source == "snowflake":
55
+ snowml_server_availability = (
56
+ len(
57
+ env_utils.get_matched_package_versions_in_information_schema_with_active_session(
58
+ reqs=[requirements.Requirement(snowml_pkg_spec)],
59
+ python_version=snowml_env.PYTHON_VERSION,
60
+ ).get(env_utils.SNOWPARK_ML_PKG_NAME, [])
61
+ )
62
+ >= 1
63
+ )
64
+ else:
65
+ snowml_server_availability = (
66
+ len(
67
+ env_utils.get_matched_package_versions_in_snowflake_conda_channel(
68
+ req=requirements.Requirement(snowml_pkg_spec),
69
+ python_version=snowml_env.PYTHON_VERSION,
70
+ )
71
+ )
72
+ >= 1
73
+ )
74
+ self.embed_local_ml_library = not snowml_server_availability
75
+
76
+ additional_package = (
77
+ _SNOWML_INFERENCE_ALTERNATIVE_DEPENDENCIES if self.embed_local_ml_library else [snowml_pkg_spec]
78
+ )
79
+
80
+ self.runtime_env.include_if_absent(
81
+ [
82
+ model_env.ModelDependency(requirement=dep, pip_name=requirements.Requirement(dep).name)
83
+ for dep in additional_package
84
+ ],
85
+ )
86
+
87
+ if is_gpu:
88
+ self.runtime_env.generate_env_for_cuda()
89
+
90
+ @property
91
+ def runtime_rel_path(self) -> pathlib.PurePosixPath:
92
+ return pathlib.PurePosixPath(ModelRuntime.RUNTIME_DIR_REL_PATH) / self.name
93
+
94
+ def save(self, packager_path: pathlib.Path) -> model_meta_schema.ModelRuntimeDict:
95
+ runtime_base_path = packager_path / self.runtime_rel_path
96
+ runtime_base_path.mkdir(parents=True, exist_ok=True)
97
+
98
+ if getattr(self, "embed_local_ml_library", False):
99
+ snowpark_ml_lib_path = runtime_base_path / "snowflake-ml-python.zip"
100
+ file_utils.zip_python_package(str(snowpark_ml_lib_path), "snowflake.ml")
101
+ snowpark_ml_lib_rel_path = pathlib.PurePosixPath(snowpark_ml_lib_path.relative_to(packager_path).as_posix())
102
+ self.imports.append(snowpark_ml_lib_rel_path)
103
+
104
+ self.runtime_env.conda_env_rel_path = self.runtime_rel_path / self.runtime_env.conda_env_rel_path
105
+ self.runtime_env.pip_requirements_rel_path = self.runtime_rel_path / self.runtime_env.pip_requirements_rel_path
106
+
107
+ env_dict = self.runtime_env.save_as_dict(packager_path)
108
+
109
+ return model_meta_schema.ModelRuntimeDict(
110
+ imports=list(map(str, self.imports)),
111
+ dependencies=model_meta_schema.ModelRuntimeDependenciesDict(
112
+ conda=env_dict["conda"],
113
+ pip=env_dict["pip"],
114
+ ),
115
+ )
116
+
117
+ @staticmethod
118
+ def load(
119
+ packager_path: pathlib.Path,
120
+ name: str,
121
+ meta_env: model_env.ModelEnv,
122
+ loaded_dict: model_meta_schema.ModelRuntimeDict,
123
+ ) -> "ModelRuntime":
124
+ env = model_env.ModelEnv()
125
+ env.python_version = meta_env.python_version
126
+ env.cuda_version = meta_env.cuda_version
127
+ env.snowpark_ml_version = meta_env.snowpark_ml_version
128
+
129
+ conda_env_rel_path = pathlib.PurePosixPath(loaded_dict["dependencies"]["conda"])
130
+ pip_requirements_rel_path = pathlib.PurePosixPath(loaded_dict["dependencies"]["pip"])
131
+ with warnings.catch_warnings():
132
+ warnings.simplefilter("ignore")
133
+ env.load_from_conda_file(packager_path / conda_env_rel_path)
134
+ env.load_from_pip_file(packager_path / pip_requirements_rel_path)
135
+ return ModelRuntime(
136
+ name=name, env=env, imports=list(map(pathlib.PurePosixPath, loaded_dict["imports"])), loading_from_file=True
137
+ )
@@ -19,6 +19,8 @@ from snowflake.ml.model import deploy_platforms
19
19
  from snowflake.ml.model._signatures import core
20
20
 
21
21
  if TYPE_CHECKING:
22
+ import catboost
23
+ import lightgbm
22
24
  import mlflow
23
25
  import numpy as np
24
26
  import pandas as pd
@@ -33,7 +35,6 @@ if TYPE_CHECKING:
33
35
  import snowflake.ml.model.custom_model
34
36
  import snowflake.ml.model.models.huggingface_pipeline
35
37
  import snowflake.ml.model.models.llm
36
- import snowflake.ml.model.models.sentence_transformers
37
38
  import snowflake.snowpark
38
39
  from snowflake.ml.modeling.framework import base # noqa: F401
39
40
 
@@ -69,6 +70,9 @@ _DataType = TypeVar("_DataType", bound=SupportedDataType)
69
70
  CustomModelType = TypeVar("CustomModelType", bound="snowflake.ml.model.custom_model.CustomModel")
70
71
 
71
72
  SupportedRequireSignatureModelType = Union[
73
+ "catboost.CatBoost",
74
+ "lightgbm.LGBMModel",
75
+ "lightgbm.Booster",
72
76
  "snowflake.ml.model.custom_model.CustomModel",
73
77
  "sklearn.base.BaseEstimator",
74
78
  "sklearn.pipeline.Pipeline",
@@ -85,7 +89,6 @@ SupportedNoSignatureRequirementsModelType = Union[
85
89
  "transformers.Pipeline",
86
90
  "sentence_transformers.SentenceTransformer",
87
91
  "snowflake.ml.model.models.huggingface_pipeline.HuggingFacePipelineModel",
88
- "snowflake.ml.model.models.sentence_transformers.SentenceTransformer",
89
92
  "snowflake.ml.model.models.llm.LLM",
90
93
  ]
91
94
 
@@ -98,11 +101,14 @@ Here is all acceptable types of Snowflake native model packaging and its handler
98
101
 
99
102
  | Type | Handler File | Handler |
100
103
  |---------------------------------|--------------|---------------------|
104
+ | catboost.CatBoost | catboost.py | _CatBoostModelHandler |
101
105
  | snowflake.ml.model.custom_model.CustomModel | custom.py | _CustomModelHandler |
102
106
  | sklearn.base.BaseEstimator | sklearn.py | _SKLModelHandler |
103
107
  | sklearn.pipeline.Pipeline | sklearn.py | _SKLModelHandler |
104
108
  | xgboost.XGBModel | xgboost.py | _XGBModelHandler |
105
109
  | xgboost.Booster | xgboost.py | _XGBModelHandler |
110
+ | lightgbm.LGBMModel | lightgbm.py | _LGBMModelHandler |
111
+ | lightgbm.Booster | lightgbm.py | _LGBMModelHandler |
106
112
  | snowflake.ml.framework.base.BaseEstimator | snowmlmodel.py | _SnowMLModelHandler |
107
113
  | torch.nn.Module | pytroch.py | _PyTorchHandler |
108
114
  | torch.jit.ScriptModule | torchscript.py | _TorchScriptHandler |
@@ -114,8 +120,10 @@ Here is all acceptable types of Snowflake native model packaging and its handler
114
120
  """
115
121
 
116
122
  SupportedModelHandlerType = Literal[
123
+ "catboost",
117
124
  "custom",
118
125
  "huggingface_pipeline",
126
+ "lightgbm",
119
127
  "mlflow",
120
128
  "pytorch",
121
129
  "sentence_transformers",
@@ -225,6 +233,11 @@ class BaseModelSaveOption(TypedDict):
225
233
  method_options: NotRequired[Dict[str, ModelMethodSaveOptions]]
226
234
 
227
235
 
236
+ class CatBoostModelSaveOptions(BaseModelSaveOption):
237
+ target_methods: NotRequired[Sequence[str]]
238
+ cuda_version: NotRequired[str]
239
+
240
+
228
241
  class CustomModelSaveOption(BaseModelSaveOption):
229
242
  cuda_version: NotRequired[str]
230
243
 
@@ -238,6 +251,10 @@ class XGBModelSaveOptions(BaseModelSaveOption):
238
251
  cuda_version: NotRequired[str]
239
252
 
240
253
 
254
+ class LGBMModelSaveOptions(BaseModelSaveOption):
255
+ target_methods: NotRequired[Sequence[str]]
256
+
257
+
241
258
  class SNOWModelSaveOptions(BaseModelSaveOption):
242
259
  target_methods: NotRequired[Sequence[str]]
243
260
 
@@ -279,7 +296,9 @@ class LLMSaveOptions(BaseModelSaveOption):
279
296
 
280
297
  ModelSaveOption = Union[
281
298
  BaseModelSaveOption,
299
+ CatBoostModelSaveOptions,
282
300
  CustomModelSaveOption,
301
+ LGBMModelSaveOptions,
283
302
  SKLModelSaveOptions,
284
303
  XGBModelSaveOptions,
285
304
  SNOWModelSaveOptions,
@@ -195,21 +195,26 @@ def handle_inference_result(
195
195
  shape = transformed_numpy_array.shape
196
196
  if len(shape) > 1:
197
197
  if shape[1] != len(output_cols):
198
- # HeterogeneousEnsemble's transform method produce results with variying shapes
199
- # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes).
200
- # It is hard to predict the response shape without using fragile introspection logic.
201
- # So, to avoid that we are packing the results into a dataframe of shape (n_samples, 1) with
202
- # each element being a list.
203
- if len(output_cols) != 1:
204
- raise TypeError(
205
- "expected_output_cols must be same length as transformed array or should be of length 1."
206
- f"Currently expected_output_cols shape is {len(output_cols)}, "
207
- f"transformed array shape is {shape}. "
208
- )
198
+ # Within UDF, it is not feasible to change the output cols because we need to
199
+ # query the output cols after UDF by the expected output cols
209
200
  if not within_udf:
201
+ # The following lines are to generate the output cols to match the length of
202
+ # transformed_numpy_array
210
203
  actual_output_cols = []
211
204
  for i in range(shape[1]):
212
205
  actual_output_cols.append(f"{output_cols[0]}_{i}")
213
206
  output_cols = actual_output_cols
207
+ else:
208
+ # HeterogeneousEnsemble's transform method produce results with varying shapes
209
+ # from (n_samples, n_estimators) to (n_samples, n_estimators * n_classes).
210
+ # It is hard to predict the response shape without using fragile introspection logic.
211
+ # So, to avoid that we are packing the results into a dataframe of shape (n_samples, 1) with
212
+ # each element being a list.
213
+ if len(output_cols) != 1:
214
+ raise TypeError(
215
+ "expected_output_cols must be same length as transformed array or should be of length 1."
216
+ f"Currently expected_output_cols shape is {len(output_cols)}, "
217
+ f"transformed array shape is {shape}. "
218
+ )
214
219
 
215
220
  return transformed_numpy_array, output_cols
@@ -99,7 +99,10 @@ class PandasTransformHandlers:
99
99
  original_exception=ValueError(
100
100
  "The feature names should match with those that were passed during fit.\n"
101
101
  f"Features seen during fit call but not present in the input: {missing_features}\n"
102
- f"Features in the input dataframe : {input_cols}\n"
102
+ f"Features specified with `input_cols` in estimator "
103
+ f"{self.estimator.__class__.__name__} in the input dataframe: {input_cols}\n"
104
+ f"In your input dataset for current method '{inference_method}', the features are:"
105
+ f" {features_in_dataset}."
103
106
  ),
104
107
  )
105
108
  input_df = dataset[columns_to_select]
@@ -955,22 +955,21 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
955
955
  X, y, indices, params_to_evaluate, base_estimator, fit_and_score_kwargs = _load_data_into_udf()
956
956
  self.X = X
957
957
  self.y = y
958
- self.indices = indices
958
+ self.test_indices = indices
959
959
  self.params_to_evaluate = params_to_evaluate
960
960
  self.base_estimator = base_estimator
961
961
  self.fit_and_score_kwargs = fit_and_score_kwargs
962
962
  self.fit_score_params: List[Any] = []
963
+ self.cached_train_test_indices = []
964
+ # Calculate the full index here to avoid duplicate calculation (which consumes a lot of memory)
965
+ full_index = np.arange(DATA_LENGTH)
966
+ for i in range(n_splits):
967
+ self.cached_train_test_indices.extend(
968
+ [[np.setdiff1d(full_index, self.test_indices[i]), self.test_indices[i]]]
969
+ )
963
970
 
964
971
  def process(self, idx: int, params_idx: int, cv_idx: int) -> None:
965
- # 1. Calculate the parameter list
966
- parameters = self.params_to_evaluate[params_idx]
967
- # 2. Calculate the cross validator indices
968
- # cross validator's indices: we stored test indices only (to save space);
969
- # use the full index to re-construct each train index back.
970
- full_index = np.array([i for i in range(DATA_LENGTH)])
971
- test_index = self.indices[cv_idx]
972
- train_index = np.setdiff1d(full_index, test_index)
973
- self.fit_score_params.extend([[idx, (params_idx, parameters), (cv_idx, (train_index, test_index))]])
972
+ self.fit_score_params.extend([[idx, params_idx, cv_idx]])
974
973
 
975
974
  def end_partition(self) -> Iterator[Tuple[int, str]]:
976
975
  from sklearn.base import clone
@@ -984,14 +983,14 @@ class DistributedHPOTrainer(SnowparkModelTrainer):
984
983
  clone(self.base_estimator),
985
984
  self.X,
986
985
  self.y,
987
- train=train,
988
- test=test,
989
- parameters=parameters,
986
+ train=self.cached_train_test_indices[split_idx][0],
987
+ test=self.cached_train_test_indices[split_idx][1],
988
+ parameters=self.params_to_evaluate[cand_idx],
990
989
  split_progress=(split_idx, n_splits),
991
990
  candidate_progress=(cand_idx, n_candidates),
992
991
  **self.fit_and_score_kwargs, # load sample weight here
993
992
  )
994
- for _, (cand_idx, parameters), (split_idx, (train, test)) in self.fit_score_params
993
+ for _, cand_idx, split_idx in self.fit_score_params
995
994
  )
996
995
 
997
996
  binary_cv_results = None
@@ -136,7 +136,7 @@ class SnowparkTransformHandlers:
136
136
  estimator.n_jobs = 1
137
137
  inference_res = getattr(estimator, inference_method)(input_df, *args, **kwargs)
138
138
 
139
- transformed_numpy_array, output_cols = handle_inference_result(
139
+ transformed_numpy_array, _ = handle_inference_result(
140
140
  inference_res=inference_res,
141
141
  output_cols=expected_output_cols,
142
142
  inference_method=inference_method,
@@ -144,13 +144,13 @@ class SnowparkTransformHandlers:
144
144
  )
145
145
 
146
146
  if len(transformed_numpy_array.shape) > 1:
147
- if transformed_numpy_array.shape[1] != len(output_cols):
147
+ if transformed_numpy_array.shape[1] != len(expected_output_cols):
148
148
  series = pd.Series(transformed_numpy_array.tolist())
149
- transformed_pandas_df = pd.DataFrame(series, columns=output_cols)
149
+ transformed_pandas_df = pd.DataFrame(series, columns=expected_output_cols)
150
150
  else:
151
- transformed_pandas_df = pd.DataFrame(transformed_numpy_array.tolist(), columns=output_cols)
151
+ transformed_pandas_df = pd.DataFrame(transformed_numpy_array.tolist(), columns=expected_output_cols)
152
152
  else:
153
- transformed_pandas_df = pd.DataFrame(transformed_numpy_array, columns=output_cols)
153
+ transformed_pandas_df = pd.DataFrame(transformed_numpy_array, columns=expected_output_cols)
154
154
 
155
155
  return transformed_pandas_df.to_dict("records") # type: ignore[no-any-return]
156
156