snowflake-ml-python 1.4.0__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. snowflake/ml/_internal/env_utils.py +11 -1
  2. snowflake/ml/_internal/utils/identifier.py +3 -1
  3. snowflake/ml/_internal/utils/sql_identifier.py +2 -6
  4. snowflake/ml/feature_store/feature_store.py +151 -78
  5. snowflake/ml/feature_store/feature_view.py +12 -24
  6. snowflake/ml/fileset/sfcfs.py +56 -50
  7. snowflake/ml/fileset/stage_fs.py +48 -13
  8. snowflake/ml/model/_client/model/model_version_impl.py +2 -50
  9. snowflake/ml/model/_client/ops/model_ops.py +78 -29
  10. snowflake/ml/model/_client/sql/model.py +23 -2
  11. snowflake/ml/model/_client/sql/model_version.py +22 -1
  12. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +19 -54
  13. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +8 -1
  14. snowflake/ml/model/_model_composer/model_method/model_method.py +6 -10
  15. snowflake/ml/model/_packager/model_handlers/catboost.py +206 -0
  16. snowflake/ml/model/_packager/model_handlers/lightgbm.py +218 -0
  17. snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -0
  18. snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
  19. snowflake/ml/model/_packager/model_meta/model_meta.py +36 -6
  20. snowflake/ml/model/_packager/model_meta/model_meta_schema.py +20 -1
  21. snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -1
  22. snowflake/ml/model/_packager/model_packager.py +2 -2
  23. snowflake/ml/model/{_model_composer/model_runtime/_runtime_requirements.py → _packager/model_runtime/_snowml_inference_alternative_requirements.py} +1 -1
  24. snowflake/ml/model/_packager/model_runtime/model_runtime.py +137 -0
  25. snowflake/ml/model/type_hints.py +21 -2
  26. snowflake/ml/modeling/_internal/estimator_utils.py +16 -11
  27. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -1
  28. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +13 -14
  29. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +5 -5
  30. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +195 -123
  31. snowflake/ml/modeling/cluster/affinity_propagation.py +195 -123
  32. snowflake/ml/modeling/cluster/agglomerative_clustering.py +195 -123
  33. snowflake/ml/modeling/cluster/birch.py +195 -123
  34. snowflake/ml/modeling/cluster/bisecting_k_means.py +195 -123
  35. snowflake/ml/modeling/cluster/dbscan.py +195 -123
  36. snowflake/ml/modeling/cluster/feature_agglomeration.py +195 -123
  37. snowflake/ml/modeling/cluster/k_means.py +195 -123
  38. snowflake/ml/modeling/cluster/mean_shift.py +195 -123
  39. snowflake/ml/modeling/cluster/mini_batch_k_means.py +195 -123
  40. snowflake/ml/modeling/cluster/optics.py +195 -123
  41. snowflake/ml/modeling/cluster/spectral_biclustering.py +195 -123
  42. snowflake/ml/modeling/cluster/spectral_clustering.py +195 -123
  43. snowflake/ml/modeling/cluster/spectral_coclustering.py +195 -123
  44. snowflake/ml/modeling/compose/column_transformer.py +195 -123
  45. snowflake/ml/modeling/compose/transformed_target_regressor.py +195 -123
  46. snowflake/ml/modeling/covariance/elliptic_envelope.py +195 -123
  47. snowflake/ml/modeling/covariance/empirical_covariance.py +195 -123
  48. snowflake/ml/modeling/covariance/graphical_lasso.py +195 -123
  49. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +195 -123
  50. snowflake/ml/modeling/covariance/ledoit_wolf.py +195 -123
  51. snowflake/ml/modeling/covariance/min_cov_det.py +195 -123
  52. snowflake/ml/modeling/covariance/oas.py +195 -123
  53. snowflake/ml/modeling/covariance/shrunk_covariance.py +195 -123
  54. snowflake/ml/modeling/decomposition/dictionary_learning.py +195 -123
  55. snowflake/ml/modeling/decomposition/factor_analysis.py +195 -123
  56. snowflake/ml/modeling/decomposition/fast_ica.py +195 -123
  57. snowflake/ml/modeling/decomposition/incremental_pca.py +195 -123
  58. snowflake/ml/modeling/decomposition/kernel_pca.py +195 -123
  59. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +195 -123
  60. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +195 -123
  61. snowflake/ml/modeling/decomposition/pca.py +195 -123
  62. snowflake/ml/modeling/decomposition/sparse_pca.py +195 -123
  63. snowflake/ml/modeling/decomposition/truncated_svd.py +195 -123
  64. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +195 -123
  65. snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +195 -123
  66. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +195 -123
  67. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +195 -123
  68. snowflake/ml/modeling/ensemble/bagging_classifier.py +195 -123
  69. snowflake/ml/modeling/ensemble/bagging_regressor.py +195 -123
  70. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +195 -123
  71. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +195 -123
  72. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +195 -123
  73. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +195 -123
  74. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +195 -123
  75. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +195 -123
  76. snowflake/ml/modeling/ensemble/isolation_forest.py +195 -123
  77. snowflake/ml/modeling/ensemble/random_forest_classifier.py +195 -123
  78. snowflake/ml/modeling/ensemble/random_forest_regressor.py +195 -123
  79. snowflake/ml/modeling/ensemble/stacking_regressor.py +195 -123
  80. snowflake/ml/modeling/ensemble/voting_classifier.py +195 -123
  81. snowflake/ml/modeling/ensemble/voting_regressor.py +195 -123
  82. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +195 -123
  83. snowflake/ml/modeling/feature_selection/select_fdr.py +195 -123
  84. snowflake/ml/modeling/feature_selection/select_fpr.py +195 -123
  85. snowflake/ml/modeling/feature_selection/select_fwe.py +195 -123
  86. snowflake/ml/modeling/feature_selection/select_k_best.py +195 -123
  87. snowflake/ml/modeling/feature_selection/select_percentile.py +195 -123
  88. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +195 -123
  89. snowflake/ml/modeling/feature_selection/variance_threshold.py +195 -123
  90. snowflake/ml/modeling/framework/_utils.py +8 -1
  91. snowflake/ml/modeling/framework/base.py +9 -1
  92. snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +195 -123
  93. snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +195 -123
  94. snowflake/ml/modeling/impute/iterative_imputer.py +195 -123
  95. snowflake/ml/modeling/impute/knn_imputer.py +195 -123
  96. snowflake/ml/modeling/impute/missing_indicator.py +195 -123
  97. snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +195 -123
  98. snowflake/ml/modeling/kernel_approximation/nystroem.py +195 -123
  99. snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +195 -123
  100. snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +195 -123
  101. snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +195 -123
  102. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +195 -123
  103. snowflake/ml/modeling/lightgbm/lgbm_classifier.py +195 -123
  104. snowflake/ml/modeling/lightgbm/lgbm_regressor.py +195 -123
  105. snowflake/ml/modeling/linear_model/ard_regression.py +195 -123
  106. snowflake/ml/modeling/linear_model/bayesian_ridge.py +195 -123
  107. snowflake/ml/modeling/linear_model/elastic_net.py +195 -123
  108. snowflake/ml/modeling/linear_model/elastic_net_cv.py +195 -123
  109. snowflake/ml/modeling/linear_model/gamma_regressor.py +195 -123
  110. snowflake/ml/modeling/linear_model/huber_regressor.py +195 -123
  111. snowflake/ml/modeling/linear_model/lars.py +195 -123
  112. snowflake/ml/modeling/linear_model/lars_cv.py +195 -123
  113. snowflake/ml/modeling/linear_model/lasso.py +195 -123
  114. snowflake/ml/modeling/linear_model/lasso_cv.py +195 -123
  115. snowflake/ml/modeling/linear_model/lasso_lars.py +195 -123
  116. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +195 -123
  117. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +195 -123
  118. snowflake/ml/modeling/linear_model/linear_regression.py +195 -123
  119. snowflake/ml/modeling/linear_model/logistic_regression.py +195 -123
  120. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +195 -123
  121. snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +195 -123
  122. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +195 -123
  123. snowflake/ml/modeling/linear_model/multi_task_lasso.py +195 -123
  124. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +195 -123
  125. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +195 -123
  126. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +195 -123
  127. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +195 -123
  128. snowflake/ml/modeling/linear_model/perceptron.py +195 -123
  129. snowflake/ml/modeling/linear_model/poisson_regressor.py +195 -123
  130. snowflake/ml/modeling/linear_model/ransac_regressor.py +195 -123
  131. snowflake/ml/modeling/linear_model/ridge.py +195 -123
  132. snowflake/ml/modeling/linear_model/ridge_classifier.py +195 -123
  133. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +195 -123
  134. snowflake/ml/modeling/linear_model/ridge_cv.py +195 -123
  135. snowflake/ml/modeling/linear_model/sgd_classifier.py +195 -123
  136. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +195 -123
  137. snowflake/ml/modeling/linear_model/sgd_regressor.py +195 -123
  138. snowflake/ml/modeling/linear_model/theil_sen_regressor.py +195 -123
  139. snowflake/ml/modeling/linear_model/tweedie_regressor.py +195 -123
  140. snowflake/ml/modeling/manifold/isomap.py +195 -123
  141. snowflake/ml/modeling/manifold/mds.py +195 -123
  142. snowflake/ml/modeling/manifold/spectral_embedding.py +195 -123
  143. snowflake/ml/modeling/manifold/tsne.py +195 -123
  144. snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +195 -123
  145. snowflake/ml/modeling/mixture/gaussian_mixture.py +195 -123
  146. snowflake/ml/modeling/model_selection/grid_search_cv.py +42 -18
  147. snowflake/ml/modeling/model_selection/randomized_search_cv.py +42 -18
  148. snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +195 -123
  149. snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +195 -123
  150. snowflake/ml/modeling/multiclass/output_code_classifier.py +195 -123
  151. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +195 -123
  152. snowflake/ml/modeling/naive_bayes/categorical_nb.py +195 -123
  153. snowflake/ml/modeling/naive_bayes/complement_nb.py +195 -123
  154. snowflake/ml/modeling/naive_bayes/gaussian_nb.py +195 -123
  155. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +195 -123
  156. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +195 -123
  157. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +195 -123
  158. snowflake/ml/modeling/neighbors/kernel_density.py +195 -123
  159. snowflake/ml/modeling/neighbors/local_outlier_factor.py +195 -123
  160. snowflake/ml/modeling/neighbors/nearest_centroid.py +195 -123
  161. snowflake/ml/modeling/neighbors/nearest_neighbors.py +195 -123
  162. snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +195 -123
  163. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +195 -123
  164. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +195 -123
  165. snowflake/ml/modeling/neural_network/bernoulli_rbm.py +195 -123
  166. snowflake/ml/modeling/neural_network/mlp_classifier.py +195 -123
  167. snowflake/ml/modeling/neural_network/mlp_regressor.py +195 -123
  168. snowflake/ml/modeling/pipeline/pipeline.py +4 -4
  169. snowflake/ml/modeling/preprocessing/binarizer.py +1 -5
  170. snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +1 -5
  171. snowflake/ml/modeling/preprocessing/label_encoder.py +1 -5
  172. snowflake/ml/modeling/preprocessing/max_abs_scaler.py +1 -5
  173. snowflake/ml/modeling/preprocessing/min_max_scaler.py +10 -12
  174. snowflake/ml/modeling/preprocessing/normalizer.py +1 -5
  175. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +1 -5
  176. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +1 -5
  177. snowflake/ml/modeling/preprocessing/polynomial_features.py +195 -123
  178. snowflake/ml/modeling/preprocessing/robust_scaler.py +1 -5
  179. snowflake/ml/modeling/preprocessing/standard_scaler.py +11 -11
  180. snowflake/ml/modeling/semi_supervised/label_propagation.py +195 -123
  181. snowflake/ml/modeling/semi_supervised/label_spreading.py +195 -123
  182. snowflake/ml/modeling/svm/linear_svc.py +195 -123
  183. snowflake/ml/modeling/svm/linear_svr.py +195 -123
  184. snowflake/ml/modeling/svm/nu_svc.py +195 -123
  185. snowflake/ml/modeling/svm/nu_svr.py +195 -123
  186. snowflake/ml/modeling/svm/svc.py +195 -123
  187. snowflake/ml/modeling/svm/svr.py +195 -123
  188. snowflake/ml/modeling/tree/decision_tree_classifier.py +195 -123
  189. snowflake/ml/modeling/tree/decision_tree_regressor.py +195 -123
  190. snowflake/ml/modeling/tree/extra_tree_classifier.py +195 -123
  191. snowflake/ml/modeling/tree/extra_tree_regressor.py +195 -123
  192. snowflake/ml/modeling/xgboost/xgb_classifier.py +195 -123
  193. snowflake/ml/modeling/xgboost/xgb_regressor.py +195 -123
  194. snowflake/ml/modeling/xgboost/xgbrf_classifier.py +195 -123
  195. snowflake/ml/modeling/xgboost/xgbrf_regressor.py +195 -123
  196. snowflake/ml/registry/registry.py +1 -1
  197. snowflake/ml/version.py +1 -1
  198. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/METADATA +68 -57
  199. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/RECORD +202 -200
  200. snowflake/ml/model/_model_composer/model_runtime/model_runtime.py +0 -97
  201. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/LICENSE.txt +0 -0
  202. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/WHEEL +0 -0
  203. {snowflake_ml_python-1.4.0.dist-info → snowflake_ml_python-1.4.1.dist-info}/top_level.txt +0 -0
@@ -60,6 +60,8 @@ class SFFileSystem(fsspec.AbstractFileSystem):
60
60
  b'2014-02-05 14:35:00.00000054,13,2014-02-05 14:35:00 UTC,-74.00688,40.73049,-74.00563,40.70676,2\n'
61
61
  """
62
62
 
63
+ protocol = PROTOCOL_NAME
64
+
63
65
  def __init__(
64
66
  self,
65
67
  sf_connection: Optional[connection.SnowflakeConnection] = None,
@@ -204,7 +206,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
204
206
  >>> sffs.ls("@MYDB.public.FOO/nytrain/")
205
207
  ['@MYDB.public.FOO/nytrain/data_0_0_0.csv', '@MYDB.public.FOO/nytrain/data_0_0_1.csv']
206
208
  """
207
- file_path = _parse_sfc_file_path(path)
209
+ file_path = self._parse_file_path(path)
208
210
  stage_fs = self._get_stage_fs(file_path)
209
211
  stage_path_list = stage_fs.ls(file_path.filepath, detail=True, **kwargs)
210
212
  stage_path_list = cast(List[Dict[str, Any]], stage_path_list)
@@ -226,13 +228,15 @@ class SFFileSystem(fsspec.AbstractFileSystem):
226
228
  """
227
229
  if not files:
228
230
  return
229
- stage_file_paths: Dict[Tuple[str, str, str], List[str]] = collections.defaultdict(list)
231
+ stage_fs_dict: Dict[str, stage_fs.SFStageFileSystem] = {}
232
+ stage_file_paths: Dict[str, List[str]] = collections.defaultdict(list)
230
233
  for file in files:
231
- file_path = _parse_sfc_file_path(file)
232
- stage_file_paths[(file_path.database, file_path.schema, file_path.stage)].append(file_path.filepath)
234
+ path_info = self._parse_file_path(file)
235
+ fs = self._get_stage_fs(path_info)
236
+ stage_fs_dict[fs.stage_name] = fs
237
+ stage_file_paths[fs.stage_name].append(path_info.filepath)
233
238
  for k, v in stage_file_paths.items():
234
- stage_fs = self._get_stage_fs(_SFFilePath(k[0], k[1], k[2], "*"))
235
- stage_fs.optimize_read(v)
239
+ stage_fs_dict[k].optimize_read(v)
236
240
 
237
241
  @telemetry.send_api_usage_telemetry(
238
242
  project=_PROJECT,
@@ -256,7 +260,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
256
260
  Returns:
257
261
  A fsspec AbstractBufferedFile which supports python file operations.
258
262
  """
259
- file_path = _parse_sfc_file_path(path)
263
+ file_path = self._parse_file_path(path)
260
264
  stage_fs = self._get_stage_fs(file_path)
261
265
  return stage_fs._open(file_path.filepath, **kwargs)
262
266
 
@@ -267,7 +271,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
267
271
  @snowpark._internal.utils.private_preview(version="0.2.0")
268
272
  def info(self, path: str, **kwargs: Any) -> Dict[str, Any]:
269
273
  """Override fsspec `info` method. Give details of entry at path."""
270
- file_path = _parse_sfc_file_path(path)
274
+ file_path = self._parse_file_path(path)
271
275
  stage_fs = self._get_stage_fs(file_path)
272
276
  res: Dict[str, Any] = stage_fs.info(file_path.filepath, **kwargs)
273
277
  if res:
@@ -292,52 +296,54 @@ class SFFileSystem(fsspec.AbstractFileSystem):
292
296
  """Convert the relative path in a stage to an absolute path starts with the location of the stage."""
293
297
  return stage_fs.stage_name + "/" + path
294
298
 
299
+ @classmethod
300
+ def _parse_file_path(cls, path: str) -> _SFFilePath:
301
+ """Parse a snowflake location path.
295
302
 
296
- fsspec.register_implementation(PROTOCOL_NAME, SFFileSystem)
297
-
303
+ The following propertis will be extracted from the path input:
304
+ - database
305
+ - schema
306
+ - stage
307
+ - path (optional)
298
308
 
299
- def _parse_sfc_file_path(path: str) -> _SFFilePath:
300
- """Parse a snowflake location path.
309
+ Args:
310
+ path: A string in the format of "@{database}.{schema}.{stage}/{path}".
301
311
 
302
- The following propertis will be extracted from the path input:
303
- - database
304
- - schema
305
- - stage
306
- - path (optional)
312
+ Example:
313
+ "@my_db.my_schema.my_stage/"
314
+ "@my_db.my_schema.my_stage/file1"
315
+ "@my_db.my_schema.my_stage/dir1/"
316
+ "@my_db.my_schema.my_stage/dir1/file2"
307
317
 
308
- Args:
309
- path: A string in the format of "@{database}.{schema}.{stage}/{path}".
318
+ Returns:
319
+ A namedtuple consists of database name, schema name, stage name and path.
310
320
 
311
- Example:
312
- "@my_db.my_schema.my_stage/"
313
- "@my_db.my_schema.my_stage/file1"
314
- "@my_db.my_schema.my_stage/dir1/"
315
- "@my_db.my_schema.my_stage/dir1/file2"
321
+ Raises:
322
+ SnowflakeMLException: An error occurred when invalid path is given.
323
+ """
324
+ sfc_prefix = f"{PROTOCOL_NAME}://"
325
+ if path.startswith(sfc_prefix):
326
+ path = path[len(sfc_prefix) :]
327
+ if not path.startswith("@"):
328
+ raise snowml_exceptions.SnowflakeMLException(
329
+ error_code=error_codes.SNOWML_INVALID_STAGE,
330
+ original_exception=ValueError(
331
+ 'Invalid path. Expected path to start with "@". Example: @database.schema.stage/optional_path.'
332
+ ),
333
+ )
334
+ try:
335
+ res = identifier.parse_schema_level_object_identifier(path[1:])
336
+ if res[1] is None or res[0] is None or (res[3] and not res[3].startswith("/")):
337
+ raise ValueError("Invalid path. Missing database or schema identifier.")
338
+ logging.debug(f"Parsed path: {res}")
339
+ return _SFFilePath(res[0], res[1], res[2], res[3][1:])
340
+ except ValueError:
341
+ raise snowml_exceptions.SnowflakeMLException(
342
+ error_code=error_codes.SNOWML_INVALID_STAGE,
343
+ original_exception=ValueError(
344
+ f"Invalid path. Expected format: @database.schema.stage/optional_path. Getting {path}"
345
+ ),
346
+ )
316
347
 
317
- Returns:
318
- A namedtuple consists of database name, schema name, stage name and path.
319
348
 
320
- Raises:
321
- SnowflakeMLException: An error occurred when invalid path is given.
322
- """
323
- sfc_prefix = f"{PROTOCOL_NAME}://"
324
- if path.startswith(sfc_prefix):
325
- path = path[len(sfc_prefix) :]
326
- if not path.startswith("@"):
327
- raise snowml_exceptions.SnowflakeMLException(
328
- error_code=error_codes.SNOWML_INVALID_STAGE,
329
- original_exception=ValueError(
330
- 'Invalid path. Expected path to start with "@". Example: @database.schema.stage/optional_path.'
331
- ),
332
- )
333
- try:
334
- res = identifier.parse_schema_level_object_identifier(path[1:])
335
- logging.debug(f"Parsed path: {res}")
336
- return _SFFilePath(res[0], res[1], res[2], res[3][1:])
337
- except ValueError:
338
- raise snowml_exceptions.SnowflakeMLException(
339
- error_code=error_codes.SNOWML_INVALID_STAGE,
340
- original_exception=ValueError(
341
- f"Invalid path. Expected format: @database.schema.stage/optional_path. Getting {path}"
342
- ),
343
- )
349
+ fsspec.register_implementation(PROTOCOL_NAME, SFFileSystem)
@@ -8,7 +8,7 @@ import fsspec
8
8
  from fsspec.implementations import http as httpfs
9
9
 
10
10
  from snowflake import snowpark
11
- from snowflake.connector import connection
11
+ from snowflake.connector import connection, errorcode
12
12
  from snowflake.ml._internal import telemetry
13
13
  from snowflake.ml._internal.exceptions import (
14
14
  error_codes,
@@ -17,6 +17,7 @@ from snowflake.ml._internal.exceptions import (
17
17
  fileset_errors,
18
18
  )
19
19
  from snowflake.snowpark import exceptions as snowpark_exceptions
20
+ from snowflake.snowpark._internal import utils as snowpark_utils
20
21
 
21
22
  # The default length of how long a presigned url stays active in seconds.
22
23
  # Presigned url here is used to fetch file objects from Snowflake when SFStageFileSystem.open() is called.
@@ -79,7 +80,9 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
79
80
  # None -> Try pre-signed URL access, fall back to file download
80
81
  # True -> Use file download path without trying pre-signed URL access
81
82
  # False -> Use pre-signed URL access, skip download fallback on failure
82
- _USE_FALLBACK_FILE_ACCESS = None
83
+ _USE_FALLBACK_FILE_ACCESS = (
84
+ True if snowpark_utils.is_in_stored_procedure() else None # type: ignore[no-untyped-call]
85
+ )
83
86
 
84
87
  def __init__(
85
88
  self,
@@ -164,6 +167,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
164
167
  """
165
168
  try:
166
169
  loc = self.stage_name
170
+ path = path.lstrip("/")
167
171
  objects = self._session.sql(f"LIST {loc}/{path}").collect()
168
172
  except snowpark_exceptions.SnowparkClientException as e:
169
173
  if e.message.startswith(fileset_errors.ERRNO_DOMAIN_NOT_EXIST):
@@ -234,7 +238,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
234
238
  """
235
239
  path = path.lstrip("/")
236
240
  if self._USE_FALLBACK_FILE_ACCESS:
237
- return self._session.file.get_stream(f"{self.stage_name}/{path}")
241
+ return self._open_with_snowpark(path)
238
242
  cached_presigned_url = self._url_cache.get(path, None)
239
243
  if not cached_presigned_url:
240
244
  res = self._fetch_presigned_urls([path])
@@ -252,19 +256,42 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
252
256
  except FileNotFoundError:
253
257
  # Enable fallback if _USE_FALLBACK_FILE_ACCESS is True or None; set to False to disable
254
258
  if self._USE_FALLBACK_FILE_ACCESS != False: # noqa: E712
255
- try:
256
- # Try falling back to Snowpark file read if presigned URL access failed
257
- # If fallback is successful, most likely in sproc without External Access Integration
258
- content = self._session.file.get_stream(f"{self.stage_name}/{path}")
259
- self._USE_FALLBACK_FILE_ACCESS = True
260
- return content
261
- except Exception:
262
- pass
259
+ content = self._open_with_snowpark(path)
260
+ self._USE_FALLBACK_FILE_ACCESS = True
261
+ return content
263
262
  raise snowml_exceptions.SnowflakeMLException(
264
263
  error_code=error_codes.SNOWML_NOT_FOUND,
265
264
  original_exception=fileset_errors.StageFileNotFoundError(f"Stage file {path} doesn't exist."),
266
265
  )
267
266
 
267
+ def _open_with_snowpark(self, path: str, **kwargs: Dict[str, Any]) -> fsspec.spec.AbstractBufferedFile:
268
+ """Open the a file for reading using snowflake.snowpark.file_operation
269
+
270
+ Args:
271
+ path: Path of file in Snowflake stage.
272
+ **kwargs: Extra options to pass to snowflake.snowpark.file_operation.get_stream
273
+
274
+ Returns:
275
+ A fsspec file-like object.
276
+
277
+ Raises:
278
+ SnowflakeMLException: An error occurred when the given path points to a file that cannot be found.
279
+ SnowflakeMLException: An unknown Snowpark error occurred during file read.
280
+ """
281
+ try:
282
+ return self._session.file.get_stream(f"{self.stage_name}/{path}", **kwargs)
283
+ except snowpark_exceptions.SnowparkSQLException as e:
284
+ if _match_error_code(e, errorcode.ER_FILE_NOT_EXISTS):
285
+ raise snowml_exceptions.SnowflakeMLException(
286
+ error_code=error_codes.SNOWML_NOT_FOUND,
287
+ original_exception=fileset_errors.StageFileNotFoundError(f"Stage file {path} doesn't exist."),
288
+ )
289
+ else:
290
+ raise snowml_exceptions.SnowflakeMLException(
291
+ error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
292
+ original_exception=e,
293
+ )
294
+
268
295
  def _parse_list_result(
269
296
  self, list_result: List[Tuple[str, int, str, str]], search_path: str
270
297
  ) -> List[Dict[str, Any]]:
@@ -352,7 +379,7 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
352
379
  file_df = self._session.create_dataframe(files).to_df("name")
353
380
  try:
354
381
  presigned_urls: List[Tuple[str, str]] = file_df.select_expr(
355
- f"name, get_presigned_url({self.stage_name}, name, {url_lifetime}) as url"
382
+ f"name, get_presigned_url('{self.stage_name}', name, {url_lifetime}) as url"
356
383
  ).collect(
357
384
  statement_params=telemetry.get_function_usage_statement_params(
358
385
  project=_PROJECT,
@@ -363,7 +390,9 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
363
390
  ),
364
391
  )
365
392
  except snowpark_exceptions.SnowparkClientException as e:
366
- if e.message.startswith(fileset_errors.ERRNO_STAGE_NOT_EXIST):
393
+ if e.message.startswith(fileset_errors.ERRNO_DOMAIN_NOT_EXIST) or e.message.startswith(
394
+ fileset_errors.ERRNO_STAGE_NOT_EXIST
395
+ ):
367
396
  raise snowml_exceptions.SnowflakeMLException(
368
397
  error_code=error_codes.SNOWML_NOT_FOUND,
369
398
  original_exception=fileset_errors.StageNotFoundError(
@@ -376,3 +405,9 @@ class SFStageFileSystem(fsspec.AbstractFileSystem):
376
405
  original_exception=fileset_errors.FileSetError(str(e)),
377
406
  )
378
407
  return presigned_urls
408
+
409
+
410
+ def _match_error_code(ex: snowpark_exceptions.SnowparkSQLException, error_code: int) -> bool:
411
+ # Snowpark writes error code to message instead of populating e.error_code
412
+ error_code_str = str(error_code)
413
+ return ex.error_code == error_code_str or error_code_str in ex.message
@@ -1,12 +1,9 @@
1
- import re
2
1
  from typing import Any, Callable, Dict, List, Optional, Union
3
2
 
4
3
  import pandas as pd
5
4
 
6
- from snowflake import connector
7
5
  from snowflake.ml._internal import telemetry
8
6
  from snowflake.ml._internal.utils import sql_identifier
9
- from snowflake.ml.model import model_signature
10
7
  from snowflake.ml.model._client.ops import metadata_ops, model_ops
11
8
  from snowflake.ml.model._model_composer.model_manifest import model_manifest_schema
12
9
  from snowflake.snowpark import dataframe
@@ -207,61 +204,16 @@ class ModelVersion:
207
204
  statement_params=statement_params,
208
205
  )
209
206
 
210
- # Only used when the model does not contains user_data with client SDK information.
211
- def _legacy_show_functions(self) -> List[model_manifest_schema.ModelFunctionInfo]:
207
+ def _get_functions(self) -> List[model_manifest_schema.ModelFunctionInfo]:
212
208
  statement_params = telemetry.get_statement_params(
213
209
  project=_TELEMETRY_PROJECT,
214
210
  subproject=_TELEMETRY_SUBPROJECT,
215
211
  )
216
- manifest = self._model_ops.get_model_version_manifest(
212
+ return self._model_ops.get_functions(
217
213
  model_name=self._model_name,
218
214
  version_name=self._version_name,
219
215
  statement_params=statement_params,
220
216
  )
221
- model_meta = self._model_ops.get_model_version_native_packing_meta(
222
- model_name=self._model_name,
223
- version_name=self._version_name,
224
- statement_params=statement_params,
225
- )
226
- return_functions_info: List[model_manifest_schema.ModelFunctionInfo] = []
227
- for method in manifest["methods"]:
228
- # Method's name is resolved so we need to use case_sensitive as True to get the user-facing identifier.
229
- method_name = sql_identifier.SqlIdentifier(method["name"], case_sensitive=True).identifier()
230
- # Method's handler is `functions.<target_method>.infer`
231
- assert re.match(
232
- r"^functions\.([^\d\W]\w*)\.infer$", method["handler"]
233
- ), f"Get unexpected handler name {method['handler']}"
234
- target_method = method["handler"].split(".")[1]
235
- signature_dict = model_meta["signatures"][target_method]
236
- fi = model_manifest_schema.ModelFunctionInfo(
237
- name=method_name,
238
- target_method=target_method,
239
- signature=model_signature.ModelSignature.from_dict(signature_dict),
240
- )
241
- return_functions_info.append(fi)
242
- return return_functions_info
243
-
244
- def _get_functions(self) -> List[model_manifest_schema.ModelFunctionInfo]:
245
- statement_params = telemetry.get_statement_params(
246
- project=_TELEMETRY_PROJECT,
247
- subproject=_TELEMETRY_SUBPROJECT,
248
- )
249
- try:
250
- client_data = self._model_ops.get_client_data_in_user_data(
251
- model_name=self._model_name,
252
- version_name=self._version_name,
253
- statement_params=statement_params,
254
- )
255
- return [
256
- model_manifest_schema.ModelFunctionInfo(
257
- name=fi["name"],
258
- target_method=fi["target_method"],
259
- signature=model_signature.ModelSignature.from_dict(fi["signature"]),
260
- )
261
- for fi in client_data["functions"]
262
- ]
263
- except (NotImplementedError, ValueError, connector.DataError):
264
- return self._legacy_show_functions()
265
217
 
266
218
  @telemetry.send_api_usage_telemetry(
267
219
  project=_TELEMETRY_PROJECT,
@@ -1,7 +1,7 @@
1
- import json
2
1
  import pathlib
3
2
  import tempfile
4
- from typing import Any, Dict, List, Optional, Union, cast
3
+ from contextlib import contextmanager
4
+ from typing import Any, Dict, Generator, List, Optional, Union, cast
5
5
 
6
6
  import yaml
7
7
 
@@ -19,7 +19,7 @@ from snowflake.ml.model._model_composer.model_manifest import (
19
19
  model_manifest,
20
20
  model_manifest_schema,
21
21
  )
22
- from snowflake.ml.model._packager.model_meta import model_meta, model_meta_schema
22
+ from snowflake.ml.model._packager.model_meta import model_meta
23
23
  from snowflake.ml.model._signatures import snowpark_handler
24
24
  from snowflake.snowpark import dataframe, row, session
25
25
  from snowflake.snowpark._internal import utils as snowpark_utils
@@ -337,42 +337,91 @@ class ModelOperator:
337
337
  mm = model_manifest.ModelManifest(pathlib.Path(tmpdir))
338
338
  return mm.load()
339
339
 
340
- def get_model_version_native_packing_meta(
340
+ @contextmanager
341
+ def _enable_model_details(
342
+ self,
343
+ *,
344
+ statement_params: Optional[Dict[str, Any]] = None,
345
+ ) -> Generator[None, None, None]:
346
+ self._model_client.config_model_details(enable=True, statement_params=statement_params)
347
+ yield
348
+ self._model_client.config_model_details(enable=False, statement_params=statement_params)
349
+
350
+ @staticmethod
351
+ def _match_model_spec_with_sql_functions(
352
+ sql_functions_names: List[sql_identifier.SqlIdentifier], target_methods: List[str]
353
+ ) -> Dict[sql_identifier.SqlIdentifier, str]:
354
+ res = {}
355
+ for target_method in target_methods:
356
+ # Here we need to find the SQL function corresponding to the Python function.
357
+ # If the python function name is `abc`, then SQL function name can be `ABC` or `"abc"`.
358
+ # We will try to match`"abc"` first, then `ABC`.
359
+ # The reason why is because, if we have two python methods whose names are `abc` and `aBc`.
360
+ # At most 1 of them can be `ABC`, so if we check `"abc"` or `"aBc"` first we could resolve them correctly.
361
+ function_name = sql_identifier.SqlIdentifier(target_method, case_sensitive=True)
362
+ if function_name not in sql_functions_names:
363
+ function_name = sql_identifier.SqlIdentifier(target_method)
364
+ assert (
365
+ function_name in sql_functions_names
366
+ ), f"Unable to match {target_method} in {sql_functions_names}."
367
+ res[function_name] = target_method
368
+ return res
369
+
370
+ def get_functions(
341
371
  self,
342
372
  *,
343
373
  model_name: sql_identifier.SqlIdentifier,
344
374
  version_name: sql_identifier.SqlIdentifier,
345
375
  statement_params: Optional[Dict[str, Any]] = None,
346
- ) -> model_meta_schema.ModelMetadataDict:
347
- with tempfile.TemporaryDirectory() as tmpdir:
348
- model_meta_file_path = self._model_version_client.get_file(
376
+ ) -> List[model_manifest_schema.ModelFunctionInfo]:
377
+ with self._enable_model_details(statement_params=statement_params):
378
+ raw_model_spec_res = self._model_client.show_versions(
349
379
  model_name=model_name,
350
380
  version_name=version_name,
351
- file_path=pathlib.PurePosixPath(
352
- model_composer.ModelComposer.MODEL_DIR_REL_PATH, model_meta.MODEL_METADATA_FILE
353
- ),
354
- target_path=pathlib.Path(tmpdir),
381
+ check_model_details=True,
355
382
  statement_params=statement_params,
383
+ )[0][self._model_client.MODEL_VERSION_MODEL_SPEC_COL_NAME]
384
+ model_spec_dict = yaml.safe_load(raw_model_spec_res)
385
+ model_spec = model_meta.ModelMetadata._validate_model_metadata(model_spec_dict)
386
+ show_functions_res = self._model_version_client.show_functions(
387
+ model_name=model_name,
388
+ version_name=version_name,
389
+ statement_params=statement_params,
390
+ )
391
+ function_names_and_types = []
392
+ for r in show_functions_res:
393
+ function_name = sql_identifier.SqlIdentifier(
394
+ r[self._model_version_client.FUNCTION_NAME_COL_NAME], case_sensitive=True
395
+ )
396
+
397
+ function_type = model_manifest_schema.ModelMethodFunctionTypes.FUNCTION.value
398
+ try:
399
+ return_type = r[self._model_version_client.FUNCTION_RETURN_TYPE_COL_NAME]
400
+ except KeyError:
401
+ pass
402
+ else:
403
+ if "TABLE" in return_type:
404
+ function_type = model_manifest_schema.ModelMethodFunctionTypes.TABLE_FUNCTION.value
405
+
406
+ function_names_and_types.append((function_name, function_type))
407
+
408
+ signatures = model_spec["signatures"]
409
+ function_names = [name for name, _ in function_names_and_types]
410
+ function_name_mapping = ModelOperator._match_model_spec_with_sql_functions(
411
+ function_names, list(signatures.keys())
356
412
  )
357
- with open(model_meta_file_path, encoding="utf-8") as f:
358
- raw_model_meta = yaml.safe_load(f)
359
- return model_meta.ModelMetadata._validate_model_metadata(raw_model_meta)
360
413
 
361
- def get_client_data_in_user_data(
362
- self,
363
- *,
364
- model_name: sql_identifier.SqlIdentifier,
365
- version_name: sql_identifier.SqlIdentifier,
366
- statement_params: Optional[Dict[str, Any]] = None,
367
- ) -> model_manifest_schema.SnowparkMLDataDict:
368
- raw_user_data_json_string = self._model_client.show_versions(
369
- model_name=model_name,
370
- version_name=version_name,
371
- statement_params=statement_params,
372
- )[0][self._model_client.MODEL_VERSION_USER_DATA_COL_NAME]
373
- raw_user_data = json.loads(raw_user_data_json_string)
374
- assert isinstance(raw_user_data, dict), "user data should be a dictionary"
375
- return model_manifest.ModelManifest.parse_client_data_from_user_data(raw_user_data)
414
+ return [
415
+ model_manifest_schema.ModelFunctionInfo(
416
+ name=function_name.identifier(),
417
+ target_method=function_name_mapping[function_name],
418
+ target_method_function_type=function_type,
419
+ signature=model_signature.ModelSignature.from_dict(
420
+ signatures[function_name_mapping[function_name]]
421
+ ),
422
+ )
423
+ for function_name, function_type in function_names_and_types
424
+ ]
376
425
 
377
426
  def invoke_method(
378
427
  self,
@@ -16,7 +16,7 @@ class ModelSQLClient:
16
16
  MODEL_VERSION_NAME_COL_NAME = "name"
17
17
  MODEL_VERSION_COMMENT_COL_NAME = "comment"
18
18
  MODEL_VERSION_METADATA_COL_NAME = "metadata"
19
- MODEL_VERSION_USER_DATA_COL_NAME = "user_data"
19
+ MODEL_VERSION_MODEL_SPEC_COL_NAME = "model_spec"
20
20
 
21
21
  def __init__(
22
22
  self,
@@ -72,6 +72,7 @@ class ModelSQLClient:
72
72
  model_name: sql_identifier.SqlIdentifier,
73
73
  version_name: Optional[sql_identifier.SqlIdentifier] = None,
74
74
  validate_result: bool = True,
75
+ check_model_details: bool = False,
75
76
  statement_params: Optional[Dict[str, Any]] = None,
76
77
  ) -> List[row.Row]:
77
78
  like_sql = ""
@@ -87,10 +88,11 @@ class ModelSQLClient:
87
88
  .has_column(ModelSQLClient.MODEL_VERSION_NAME_COL_NAME, allow_empty=True)
88
89
  .has_column(ModelSQLClient.MODEL_VERSION_COMMENT_COL_NAME, allow_empty=True)
89
90
  .has_column(ModelSQLClient.MODEL_VERSION_METADATA_COL_NAME, allow_empty=True)
90
- .has_column(ModelSQLClient.MODEL_VERSION_USER_DATA_COL_NAME, allow_empty=True)
91
91
  )
92
92
  if validate_result and version_name:
93
93
  res = res.has_dimensions(expected_rows=1)
94
+ if check_model_details:
95
+ res = res.has_column(ModelSQLClient.MODEL_VERSION_MODEL_SPEC_COL_NAME, allow_empty=True)
94
96
 
95
97
  return res.validate()
96
98
 
@@ -118,3 +120,22 @@ class ModelSQLClient:
118
120
  f"DROP MODEL {self.fully_qualified_model_name(model_name)}",
119
121
  statement_params=statement_params,
120
122
  ).has_dimensions(expected_rows=1, expected_cols=1).validate()
123
+
124
+ def config_model_details(
125
+ self,
126
+ *,
127
+ enable: bool,
128
+ statement_params: Optional[Dict[str, Any]] = None,
129
+ ) -> None:
130
+ if enable:
131
+ query_result_checker.SqlResultValidator(
132
+ self._session,
133
+ "ALTER SESSION SET SHOW_MODEL_DETAILS_IN_SHOW_VERSIONS_IN_MODEL=true",
134
+ statement_params=statement_params,
135
+ ).has_dimensions(expected_rows=1, expected_cols=1).validate()
136
+ else:
137
+ query_result_checker.SqlResultValidator(
138
+ self._session,
139
+ "ALTER SESSION UNSET SHOW_MODEL_DETAILS_IN_SHOW_VERSIONS_IN_MODEL",
140
+ statement_params=statement_params,
141
+ ).has_dimensions(expected_rows=1, expected_cols=1).validate()
@@ -9,7 +9,7 @@ from snowflake.ml._internal.utils import (
9
9
  query_result_checker,
10
10
  sql_identifier,
11
11
  )
12
- from snowflake.snowpark import dataframe, functions as F, session, types as spt
12
+ from snowflake.snowpark import dataframe, functions as F, row, session, types as spt
13
13
  from snowflake.snowpark._internal import utils as snowpark_utils
14
14
 
15
15
 
@@ -21,6 +21,9 @@ def _normalize_url_for_sql(url: str) -> str:
21
21
 
22
22
 
23
23
  class ModelVersionSQLClient:
24
+ FUNCTION_NAME_COL_NAME = "name"
25
+ FUNCTION_RETURN_TYPE_COL_NAME = "return_type"
26
+
24
27
  def __init__(
25
28
  self,
26
29
  session: session.Session,
@@ -124,6 +127,24 @@ class ModelVersionSQLClient:
124
127
  ).has_dimensions(expected_rows=1).validate()
125
128
  return target_path / file_path.name
126
129
 
130
+ def show_functions(
131
+ self,
132
+ *,
133
+ model_name: sql_identifier.SqlIdentifier,
134
+ version_name: sql_identifier.SqlIdentifier,
135
+ statement_params: Optional[Dict[str, Any]] = None,
136
+ ) -> List[row.Row]:
137
+ res = query_result_checker.SqlResultValidator(
138
+ self._session,
139
+ (
140
+ f"SHOW FUNCTIONS IN MODEL {self.fully_qualified_model_name(model_name)}"
141
+ f" VERSION {version_name.identifier()}"
142
+ ),
143
+ statement_params=statement_params,
144
+ ).has_column(ModelVersionSQLClient.FUNCTION_NAME_COL_NAME, allow_empty=True)
145
+
146
+ return res.validate()
147
+
127
148
  def set_comment(
128
149
  self,
129
150
  *,