snowflake-ml-python 1.6.3__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. snowflake/ml/_internal/telemetry.py +4 -2
  2. snowflake/ml/_internal/utils/import_utils.py +31 -0
  3. snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +13 -0
  4. snowflake/ml/data/_internal/arrow_ingestor.py +8 -0
  5. snowflake/ml/data/data_connector.py +1 -1
  6. snowflake/ml/data/torch_utils.py +33 -14
  7. snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +5 -3
  8. snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +7 -5
  9. snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +4 -2
  10. snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +3 -1
  11. snowflake/ml/feature_store/examples/example_helper.py +6 -3
  12. snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +4 -2
  13. snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +4 -2
  14. snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +3 -1
  15. snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +3 -1
  16. snowflake/ml/feature_store/feature_store.py +1 -2
  17. snowflake/ml/feature_store/feature_view.py +5 -1
  18. snowflake/ml/model/_client/model/model_version_impl.py +144 -10
  19. snowflake/ml/model/_client/ops/model_ops.py +25 -6
  20. snowflake/ml/model/_client/ops/service_ops.py +33 -28
  21. snowflake/ml/model/_client/service/model_deployment_spec.py +19 -8
  22. snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -1
  23. snowflake/ml/model/_client/sql/model.py +14 -0
  24. snowflake/ml/model/_client/sql/service.py +6 -18
  25. snowflake/ml/model/_model_composer/model_composer.py +2 -0
  26. snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +4 -0
  27. snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
  28. snowflake/ml/model/_model_composer/model_method/model_method.py +1 -1
  29. snowflake/ml/model/_packager/model_handlers/_utils.py +5 -1
  30. snowflake/ml/model/_packager/model_handlers/catboost.py +3 -6
  31. snowflake/ml/model/_packager/model_handlers/custom.py +2 -0
  32. snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +10 -1
  33. snowflake/ml/model/_packager/model_handlers/lightgbm.py +3 -6
  34. snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -1
  35. snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -6
  36. snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -65
  37. snowflake/ml/model/_packager/model_handlers/xgboost.py +10 -40
  38. snowflake/ml/model/_packager/model_packager.py +0 -11
  39. snowflake/ml/model/_packager/{model_handlers/model_objective_utils.py → model_task/model_task_utils.py} +13 -25
  40. snowflake/ml/model/_signatures/pandas_handler.py +16 -0
  41. snowflake/ml/model/custom_model.py +47 -7
  42. snowflake/ml/model/model_signature.py +2 -0
  43. snowflake/ml/model/type_hints.py +8 -0
  44. snowflake/ml/modeling/_internal/estimator_utils.py +13 -0
  45. snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +7 -2
  46. snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +16 -5
  47. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +8 -2
  48. snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -3
  49. snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -8
  50. snowflake/ml/modeling/cluster/agglomerative_clustering.py +17 -19
  51. snowflake/ml/modeling/cluster/dbscan.py +5 -2
  52. snowflake/ml/modeling/cluster/feature_agglomeration.py +7 -19
  53. snowflake/ml/modeling/cluster/k_means.py +14 -19
  54. snowflake/ml/modeling/cluster/mini_batch_k_means.py +3 -3
  55. snowflake/ml/modeling/cluster/optics.py +6 -6
  56. snowflake/ml/modeling/cluster/spectral_clustering.py +4 -3
  57. snowflake/ml/modeling/compose/column_transformer.py +15 -5
  58. snowflake/ml/modeling/compose/transformed_target_regressor.py +7 -6
  59. snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
  60. snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
  61. snowflake/ml/modeling/covariance/min_cov_det.py +2 -2
  62. snowflake/ml/modeling/covariance/oas.py +1 -1
  63. snowflake/ml/modeling/decomposition/kernel_pca.py +2 -2
  64. snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -12
  65. snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -12
  66. snowflake/ml/modeling/decomposition/pca.py +28 -15
  67. snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -0
  68. snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -12
  69. snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -11
  70. snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -8
  71. snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -8
  72. snowflake/ml/modeling/ensemble/extra_trees_classifier.py +21 -2
  73. snowflake/ml/modeling/ensemble/extra_trees_regressor.py +18 -2
  74. snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +2 -0
  75. snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +2 -0
  76. snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +21 -8
  77. snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +21 -11
  78. snowflake/ml/modeling/ensemble/random_forest_classifier.py +21 -2
  79. snowflake/ml/modeling/ensemble/random_forest_regressor.py +18 -2
  80. snowflake/ml/modeling/feature_selection/generic_univariate_select.py +2 -1
  81. snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
  82. snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +2 -2
  83. snowflake/ml/modeling/linear_model/ard_regression.py +5 -10
  84. snowflake/ml/modeling/linear_model/bayesian_ridge.py +5 -11
  85. snowflake/ml/modeling/linear_model/elastic_net.py +3 -0
  86. snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
  87. snowflake/ml/modeling/linear_model/lars.py +0 -10
  88. snowflake/ml/modeling/linear_model/lars_cv.py +1 -11
  89. snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
  90. snowflake/ml/modeling/linear_model/lasso_lars.py +0 -10
  91. snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -11
  92. snowflake/ml/modeling/linear_model/lasso_lars_ic.py +0 -10
  93. snowflake/ml/modeling/linear_model/logistic_regression.py +28 -22
  94. snowflake/ml/modeling/linear_model/logistic_regression_cv.py +30 -24
  95. snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
  96. snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
  97. snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +4 -13
  98. snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +4 -4
  99. snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
  100. snowflake/ml/modeling/linear_model/perceptron.py +3 -3
  101. snowflake/ml/modeling/linear_model/ransac_regressor.py +3 -2
  102. snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +14 -6
  103. snowflake/ml/modeling/linear_model/ridge_cv.py +17 -11
  104. snowflake/ml/modeling/linear_model/sgd_classifier.py +2 -2
  105. snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -1
  106. snowflake/ml/modeling/linear_model/sgd_regressor.py +12 -3
  107. snowflake/ml/modeling/manifold/isomap.py +1 -1
  108. snowflake/ml/modeling/manifold/mds.py +3 -3
  109. snowflake/ml/modeling/manifold/tsne.py +10 -4
  110. snowflake/ml/modeling/metrics/classification.py +12 -16
  111. snowflake/ml/modeling/metrics/ranking.py +3 -3
  112. snowflake/ml/modeling/metrics/regression.py +3 -3
  113. snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +3 -3
  114. snowflake/ml/modeling/naive_bayes/categorical_nb.py +3 -3
  115. snowflake/ml/modeling/naive_bayes/complement_nb.py +3 -3
  116. snowflake/ml/modeling/naive_bayes/multinomial_nb.py +3 -3
  117. snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +10 -4
  118. snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +5 -2
  119. snowflake/ml/modeling/neighbors/local_outlier_factor.py +2 -2
  120. snowflake/ml/modeling/neighbors/nearest_centroid.py +7 -14
  121. snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
  122. snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -1
  123. snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
  124. snowflake/ml/modeling/neural_network/mlp_classifier.py +7 -1
  125. snowflake/ml/modeling/neural_network/mlp_regressor.py +3 -0
  126. snowflake/ml/modeling/pipeline/pipeline.py +16 -14
  127. snowflake/ml/modeling/preprocessing/one_hot_encoder.py +8 -4
  128. snowflake/ml/modeling/preprocessing/ordinal_encoder.py +9 -7
  129. snowflake/ml/modeling/svm/linear_svc.py +25 -16
  130. snowflake/ml/modeling/svm/linear_svr.py +23 -17
  131. snowflake/ml/modeling/svm/nu_svc.py +5 -3
  132. snowflake/ml/modeling/svm/nu_svr.py +3 -1
  133. snowflake/ml/modeling/svm/svc.py +9 -5
  134. snowflake/ml/modeling/svm/svr.py +3 -1
  135. snowflake/ml/modeling/tree/decision_tree_classifier.py +21 -2
  136. snowflake/ml/modeling/tree/decision_tree_regressor.py +18 -2
  137. snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -9
  138. snowflake/ml/modeling/tree/extra_tree_regressor.py +18 -2
  139. snowflake/ml/monitoring/_client/{monitor_sql_client.py → model_monitor_sql_client.py} +1 -1
  140. snowflake/ml/monitoring/{_client → _manager}/model_monitor_manager.py +9 -8
  141. snowflake/ml/monitoring/{_client/model_monitor.py → model_monitor.py} +3 -3
  142. snowflake/ml/registry/_manager/model_manager.py +15 -1
  143. snowflake/ml/registry/registry.py +15 -8
  144. snowflake/ml/version.py +1 -1
  145. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/METADATA +81 -9
  146. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/RECORD +150 -150
  147. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/WHEEL +1 -1
  148. /snowflake/ml/monitoring/{_client/model_monitor_version.py → model_monitor_version.py} +0 -0
  149. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/LICENSE.txt +0 -0
  150. {snowflake_ml_python-1.6.3.dist-info → snowflake_ml_python-1.7.0.dist-info}/top_level.txt +0 -0
@@ -544,7 +544,7 @@ def send_api_usage_telemetry(
544
544
  if not isinstance(e, snowml_exceptions.SnowflakeMLException):
545
545
  # already handled via a nested decorated function
546
546
  if getattr(e, "_snowflake_ml_handled", False):
547
- raise e
547
+ raise
548
548
  if isinstance(e, snowpark_exceptions.SnowparkClientException):
549
549
  me = snowml_exceptions.SnowflakeMLException(
550
550
  error_code=error_codes.INTERNAL_SNOWPARK_ERROR, original_exception=e
@@ -558,7 +558,9 @@ def send_api_usage_telemetry(
558
558
  telemetry_args["error"] = repr(me)
559
559
  telemetry_args["error_code"] = me.error_code
560
560
  me.original_exception._snowflake_ml_handled = True # type: ignore[attr-defined]
561
- if me.suppress_source_trace:
561
+ if e is not me:
562
+ raise # Directly raise non-wrapped exceptions to preserve original stacktrace
563
+ elif me.suppress_source_trace:
562
564
  raise me.original_exception from None
563
565
  else:
564
566
  raise me.original_exception from e
@@ -19,6 +19,33 @@ class MissingOptionalDependency:
19
19
  raise ImportError(f"Unable to import {self._dep_name}.")
20
20
 
21
21
 
22
+ def import_with_fallbacks(*targets: str) -> Any:
23
+ """Import a module which may be located in different locations.
24
+
25
+ This method will iterate through the provided targets, returning the first available import target.
26
+ If none of the requested import targets are available, ImportError will be raised.
27
+
28
+ Args:
29
+ targets: Strings representing the target which needs to be imported. It should be a list of symbol name
30
+ joined by dot. Some valid examples:
31
+ - <some_package>
32
+ - <some_module>
33
+ - <some_package>.<some_module>
34
+ - <some_module>.<some_symbol>
35
+
36
+ Returns:
37
+ The imported target.
38
+
39
+ Raises:
40
+ ImportError: None of the requested targets are available
41
+ """
42
+ for target in targets:
43
+ result, success = import_or_get_dummy(target)
44
+ if success:
45
+ return result
46
+ raise ImportError(f"None of the requested targets could be imported. Requested: {', '.join(targets)}")
47
+
48
+
22
49
  def import_or_get_dummy(target: str) -> Tuple[Any, bool]:
23
50
  """Try to import the the given target or return a dummy object.
24
51
 
@@ -43,6 +70,10 @@ def import_or_get_dummy(target: str) -> Tuple[Any, bool]:
43
70
  except ImportError:
44
71
  pass
45
72
 
73
+ # Don't try symbol resolution if target doesn't contain '.'
74
+ if "." not in target:
75
+ return (MissingOptionalDependency(target), False)
76
+
46
77
  # Try to import the target as a symbol
47
78
  try:
48
79
  res = _try_import_symbol(target)
@@ -121,3 +121,16 @@ def cast_snowpark_dataframe_column_types(df: snowpark.DataFrame) -> snowpark.Dat
121
121
  selected_cols.append(functions.col(src))
122
122
  df = df.select(selected_cols)
123
123
  return df
124
+
125
+
126
+ def is_single_query_snowpark_dataframe(df: snowpark.DataFrame) -> bool:
127
+ """Check if dataframe only has a single query.
128
+
129
+ Args:
130
+ df: A snowpark dataframe.
131
+
132
+ Returns:
133
+ true if there is only on query in the dataframe and no post_actions,
134
+ false otherwise.
135
+ """
136
+ return len(df.queries["queries"]) == 1 and len(df.queries["post_actions"]) == 0
@@ -198,7 +198,15 @@ def _record_batch_to_arrays(rb: pa.RecordBatch) -> Dict[str, npt.NDArray[Any]]:
198
198
  for column, column_schema in zip(rb, rb.schema):
199
199
  # zero_copy_only=False because of nans. Ideally nans should have been imputed in feature engineering.
200
200
  array = column.to_numpy(zero_copy_only=False)
201
+ # If this column is a list, use the underlying type from the list values. Since this is just one column,
202
+ # there should only be one type within the list.
203
+ # TODO: Refactor to reduce data copies.
204
+ if isinstance(column_schema.type, pa.ListType):
205
+ # Update dtype of outer array:
206
+ array = np.array(array.tolist(), dtype=column_schema.type.value_type.to_pandas_dtype())
207
+
201
208
  batch_dict[column_schema.name] = array
209
+
202
210
  return batch_dict
203
211
 
204
212
 
@@ -159,7 +159,7 @@ class DataConnector:
159
159
  func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
160
160
  )
161
161
  def to_torch_dataset(
162
- self, *, batch_size: int = 1, shuffle: bool = False, drop_last_batch: bool = True
162
+ self, *, batch_size: Optional[int] = None, shuffle: bool = False, drop_last_batch: bool = True
163
163
  ) -> "torch_data.IterableDataset": # type: ignore[type-arg]
164
164
  """Transform the Snowflake data into a PyTorch Iterable Dataset to be used with a DataLoader.
165
165
 
@@ -1,4 +1,4 @@
1
- from typing import Any, Dict, Iterator, List, Union
1
+ from typing import Any, Dict, Iterator, List, Optional, Union
2
2
 
3
3
  import numpy as np
4
4
  import numpy.typing as npt
@@ -14,17 +14,21 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
14
14
  self,
15
15
  ingestor: data_ingestor.DataIngestor,
16
16
  *,
17
- batch_size: int,
17
+ batch_size: Optional[int],
18
18
  shuffle: bool = False,
19
19
  drop_last: bool = False,
20
- squeeze_outputs: bool = True
21
20
  ) -> None:
22
21
  """Not intended for direct usage. Use DataConnector.to_torch_dataset() instead"""
22
+ squeeze = False
23
+ if batch_size is None:
24
+ batch_size = 1
25
+ squeeze = True
26
+
23
27
  self._ingestor = ingestor
24
28
  self._batch_size = batch_size
25
29
  self._shuffle = shuffle
26
30
  self._drop_last = drop_last
27
- self._squeeze_outputs = squeeze_outputs
31
+ self._squeeze_outputs = squeeze
28
32
 
29
33
  def __iter__(self) -> Iterator[Dict[str, Union[npt.NDArray[Any], List[Any]]]]:
30
34
  max_idx = 0
@@ -43,15 +47,7 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
43
47
  ):
44
48
  # Skip indices during multi-process data loading to prevent data duplication
45
49
  if counter == filter_idx:
46
- # Basic preprocessing on batch values: squeeze away extra dimensions
47
- # and convert object arrays (e.g. strings) to lists
48
- if self._squeeze_outputs:
49
- yield {
50
- k: (v.squeeze().tolist() if v.dtype == np.object_ else v.squeeze()) for k, v in batch.items()
51
- }
52
- else:
53
- yield batch # type: ignore[misc]
54
-
50
+ yield {k: _preprocess_array(v, squeeze=self._squeeze_outputs) for k, v in batch.items()}
55
51
  if counter < max_idx:
56
52
  counter += 1
57
53
  else:
@@ -65,4 +61,27 @@ class TorchDataPipeWrapper(TorchDatasetWrapper, torch.utils.data.IterDataPipe[Di
65
61
  self, ingestor: data_ingestor.DataIngestor, *, batch_size: int, shuffle: bool = False, drop_last: bool = False
66
62
  ) -> None:
67
63
  """Not intended for direct usage. Use DataConnector.to_torch_datapipe() instead"""
68
- super().__init__(ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, squeeze_outputs=False)
64
+ super().__init__(ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
65
+
66
+
67
+ def _preprocess_array(arr: npt.NDArray[Any], squeeze: bool = False) -> Union[npt.NDArray[Any], List[np.object_]]:
68
+ """Preprocesses batch column values."""
69
+ single_dimensional = arr.ndim < 2 and not arr.dtype == np.object_
70
+
71
+ # Squeeze away all extra dimensions. This is only used when batch_size = None.
72
+ if squeeze:
73
+ arr = arr.squeeze(axis=0)
74
+
75
+ # For single dimensional data,
76
+ if single_dimensional:
77
+ axis = 0 if arr.ndim == 0 else 1
78
+ arr = np.expand_dims(arr, axis=axis)
79
+
80
+ # Handle object arrays.
81
+ if arr.dtype == np.object_:
82
+ array_list = arr.tolist()
83
+ # If this is an array of arrays, convert the dtype to match the underlying array.
84
+ # Otherwise, if this is a numpy array of strings, convert the array to a list.
85
+ arr = np.array(array_list, dtype=arr.flat[0].dtype) if isinstance(arr.flat[0], np.ndarray) else array_list
86
+
87
+ return arr
@@ -6,15 +6,17 @@ from snowflake.snowpark import DataFrame, Session
6
6
 
7
7
 
8
8
  # This function will be invoked by example_helper.py. Do not change the name.
9
- def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
9
+ def create_draft_feature_view(
10
+ session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
11
+ ) -> FeatureView:
10
12
  """Create a feature view about airplane model."""
11
13
  query = session.sql(
12
- """
14
+ f"""
13
15
  select
14
16
  PLANE_MODEL,
15
17
  SEATING_CAPACITY
16
18
  from
17
- PLANE_MODEL_ATTRIBUTES
19
+ {database}.{schema}.PLANE_MODEL_ATTRIBUTES
18
20
  """
19
21
  )
20
22
 
@@ -6,10 +6,12 @@ from snowflake.snowpark import DataFrame, Session
6
6
 
7
7
 
8
8
  # This function will be invoked by example_helper.py. Do not change the name.
9
- def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
9
+ def create_draft_feature_view(
10
+ session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
11
+ ) -> FeatureView:
10
12
  """Create a feature view about airport weather."""
11
13
  query = session.sql(
12
- """
14
+ f"""
13
15
  select
14
16
  DATETIME_UTC AS TS,
15
17
  AIRPORT_ZIP_CODE,
@@ -21,9 +23,9 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
21
23
  sum(RAIN_MM_H) over (
22
24
  partition by AIRPORT_ZIP_CODE
23
25
  order by DATETIME_UTC
24
- range between interval '1 day' preceding and current row
26
+ range between interval '60 minutes' preceding and current row
25
27
  ) RAIN_SUM_60M
26
- from AIRPORT_WEATHER_STATION
28
+ from {database}.{schema}.AIRPORT_WEATHER_STATION
27
29
  """
28
30
  )
29
31
 
@@ -37,6 +39,6 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
37
39
  ).attach_feature_desc(
38
40
  {
39
41
  "RAIN_SUM_30M": "The sum of rain fall over past 30 minutes for one zipcode.",
40
- "RAIN_SUM_60M": "The sum of rain fall over past 1 day for one zipcode.",
42
+ "RAIN_SUM_60M": "The sum of rain fall over past 1 hour for one zipcode.",
41
43
  }
42
44
  )
@@ -8,7 +8,9 @@ from snowflake.snowpark import DataFrame, Session
8
8
 
9
9
 
10
10
  # This function will be invoked by example_helper.py. Do not change the name.
11
- def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
11
+ def create_draft_feature_view(
12
+ session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
13
+ ) -> FeatureView:
12
14
  """Create a feature view about trip station."""
13
15
  query = session.sql(
14
16
  f"""
@@ -17,7 +19,7 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
17
19
  count(end_station_id) as f_count,
18
20
  avg(end_station_latitude) as f_avg_latitude,
19
21
  avg(end_station_longitude) as f_avg_longtitude
20
- from {source_tables[0]}
22
+ from {database}.{schema}.{source_tables[0]}
21
23
  group by end_station_id
22
24
  """
23
25
  )
@@ -6,7 +6,9 @@ from snowflake.snowpark import DataFrame, Session, functions as F
6
6
 
7
7
 
8
8
  # This function will be invoked by example_helper.py. Do not change the name.
9
- def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
9
+ def create_draft_feature_view(
10
+ session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
11
+ ) -> FeatureView:
10
12
  """Create a feature view about trip."""
11
13
  feature_df = source_dfs[0].select(
12
14
  "trip_id",
@@ -66,7 +66,9 @@ class ExampleHelper:
66
66
  continue
67
67
  mod_path = f"{__package__}.{self._selected_example}.features.{f_name.rstrip('.py')}"
68
68
  mod = importlib.import_module(mod_path)
69
- fv = mod.create_draft_feature_view(self._session, self._source_dfs, self._source_tables)
69
+ fv = mod.create_draft_feature_view(
70
+ self._session, self._source_dfs, self._source_tables, self._database_name, self._dataset_schema
71
+ )
70
72
  fvs.append(fv)
71
73
 
72
74
  return fvs
@@ -140,7 +142,7 @@ class ExampleHelper:
140
142
  """
141
143
  ).collect()
142
144
 
143
- return [destination_table]
145
+ return [schema_dict["destination_table_name"]]
144
146
 
145
147
  def _load_parquet(self, schema_dict: Dict[str, str], temp_stage_name: str) -> List[str]:
146
148
  regex_pattern = schema_dict["load_files_pattern"]
@@ -173,13 +175,14 @@ class ExampleHelper:
173
175
  dest_table_name = (
174
176
  f"{self._database_name}.{self._dataset_schema}.{schema_dict['destination_table_name']}"
175
177
  )
178
+ result.append(schema_dict["destination_table_name"])
176
179
  else:
177
180
  regex_pattern = schema_dict["destination_table_name"]
178
181
  dest_table_name = re.match(regex_pattern, file_name).group("table_name") # type: ignore[union-attr]
182
+ result.append(dest_table_name)
179
183
  dest_table_name = f"{self._database_name}.{self._dataset_schema}.{dest_table_name}"
180
184
 
181
185
  df.write.mode("overwrite").save_as_table(dest_table_name)
182
- result.append(dest_table_name)
183
186
 
184
187
  return result
185
188
 
@@ -8,7 +8,9 @@ from snowflake.snowpark import DataFrame, Session
8
8
 
9
9
 
10
10
  # This function will be invoked by example_helper.py. Do not change the name.
11
- def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
11
+ def create_draft_feature_view(
12
+ session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
13
+ ) -> FeatureView:
12
14
  """Create a draft feature view."""
13
15
  feature_df = session.sql(
14
16
  f"""
@@ -25,7 +27,7 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
25
27
  order by TPEP_DROPOFF_DATETIME
26
28
  range between interval '10 hours' preceding and current row
27
29
  ) AVG_FARE_10h
28
- from {source_tables[0]}
30
+ from {database}.{schema}.{source_tables[0]}
29
31
  """
30
32
  )
31
33
 
@@ -6,7 +6,9 @@ from snowflake.snowpark import DataFrame, Session
6
6
 
7
7
 
8
8
  # This function will be invoked by example_helper.py. Do not change the name.
9
- def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
9
+ def create_draft_feature_view(
10
+ session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
11
+ ) -> FeatureView:
10
12
  """Create a draft feature view."""
11
13
  feature_df = session.sql(
12
14
  f"""
@@ -16,7 +18,7 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
16
18
  TRIP_DISTANCE,
17
19
  FARE_AMOUNT
18
20
  from
19
- {source_tables[0]}
21
+ {database}.{schema}.{source_tables[0]}
20
22
  """
21
23
  )
22
24
 
@@ -6,7 +6,9 @@ from snowflake.snowpark import DataFrame, Session, functions as F
6
6
 
7
7
 
8
8
  # This function will be invoked by example_helper.py. Do not change the name.
9
- def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
9
+ def create_draft_feature_view(
10
+ session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
11
+ ) -> FeatureView:
10
12
  """Create a feature view about trip station."""
11
13
  feature_df = source_dfs[0].select(
12
14
  "WINE_ID",
@@ -6,7 +6,9 @@ from snowflake.snowpark import DataFrame, Session
6
6
 
7
7
 
8
8
  # This function will be invoked by example_helper.py. Do not change the name.
9
- def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], source_tables: List[str]) -> FeatureView:
9
+ def create_draft_feature_view(
10
+ session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
11
+ ) -> FeatureView:
10
12
  """Create a feature view about trip station."""
11
13
  feature_df = source_dfs[0].select("WINE_ID", "SULPHATES", "ALCOHOL")
12
14
 
@@ -1886,8 +1886,7 @@ class FeatureStore:
1886
1886
  if found_dts[0]["refresh_mode"] != "INCREMENTAL":
1887
1887
  warnings.warn(
1888
1888
  "Your pipeline won't be incrementally refreshed due to: "
1889
- + f"\"{found_dts[0]['refresh_mode_reason']}\". "
1890
- + "It will likely incurr higher cost.",
1889
+ + f"\"{found_dts[0]['refresh_mode_reason']}\".",
1891
1890
  stacklevel=2,
1892
1891
  category=UserWarning,
1893
1892
  )
@@ -169,6 +169,7 @@ class FeatureView(lineage_node.LineageNode):
169
169
  desc: str = "",
170
170
  warehouse: Optional[str] = None,
171
171
  initialize: str = "ON_CREATE",
172
+ refresh_mode: str = "AUTO",
172
173
  **_kwargs: Any,
173
174
  ) -> None:
174
175
  """
@@ -196,6 +197,9 @@ class FeatureView(lineage_node.LineageNode):
196
197
  after you register the feature view. It supports ON_CREATE (default) or ON_SCHEDULE. ON_CREATE refreshes
197
198
  the feature view synchronously at creation. ON_SCHEDULE refreshes the feature view at the next scheduled
198
199
  refresh. It is only effective when refresh_freq is not None.
200
+ refresh_mode: The refresh mode of managed feature view. The value can be 'AUTO', 'FULL' or 'INCREMENETAL'.
201
+ For managed feature view, the default value is 'AUTO'. For static feature view it has no effect.
202
+ Check https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table for for details.
199
203
  _kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
200
204
 
201
205
  Example::
@@ -242,7 +246,7 @@ class FeatureView(lineage_node.LineageNode):
242
246
  self._schema: Optional[SqlIdentifier] = None
243
247
  self._initialize: str = initialize
244
248
  self._warehouse: Optional[SqlIdentifier] = SqlIdentifier(warehouse) if warehouse is not None else None
245
- self._refresh_mode: Optional[str] = _kwargs.get("refresh_mode", "AUTO")
249
+ self._refresh_mode: Optional[str] = refresh_mode
246
250
  self._refresh_mode_reason: Optional[str] = None
247
251
  self._owner: Optional[str] = None
248
252
  self._validate()
@@ -614,6 +614,102 @@ class ModelVersion(lineage_node.LineageNode):
614
614
  version_name=sql_identifier.SqlIdentifier(version),
615
615
  )
616
616
 
617
+ @overload
618
+ def create_service(
619
+ self,
620
+ *,
621
+ service_name: str,
622
+ image_build_compute_pool: Optional[str] = None,
623
+ service_compute_pool: str,
624
+ image_repo: str,
625
+ ingress_enabled: bool = False,
626
+ max_instances: int = 1,
627
+ cpu_requests: Optional[str] = None,
628
+ memory_requests: Optional[str] = None,
629
+ gpu_requests: Optional[str] = None,
630
+ num_workers: Optional[int] = None,
631
+ max_batch_rows: Optional[int] = None,
632
+ force_rebuild: bool = False,
633
+ build_external_access_integration: Optional[str] = None,
634
+ ) -> str:
635
+ """Create an inference service with the given spec.
636
+
637
+ Args:
638
+ service_name: The name of the service, can be fully qualified. If not fully qualified, the database or
639
+ schema of the model will be used.
640
+ image_build_compute_pool: The name of the compute pool used to build the model inference image. It uses
641
+ the service compute pool if None.
642
+ service_compute_pool: The name of the compute pool used to run the inference service.
643
+ image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
644
+ or schema of the model will be used.
645
+ ingress_enabled: If true, creates an service endpoint associated with the service. User must have
646
+ BIND SERVICE ENDPOINT privilege on the account.
647
+ max_instances: The maximum number of inference service instances to run. The same value it set to
648
+ MIN_INSTANCES property of the service.
649
+ cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
650
+ None, we attempt to utilize all the vCPU of the node.
651
+ memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
652
+ requires a unit (GiB, MiB). If None, we attempt to utilize all the memory of the node.
653
+ gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
654
+ if None.
655
+ num_workers: The number of workers to run the inference service for handling requests in parallel within an
656
+ instance of the service. By default, it is set to 2*vCPU+1 of the node for CPU based inference and 1 for
657
+ GPU based inference. For GPU based inference, please see best practices before playing with this value.
658
+ max_batch_rows: The maximum number of rows to batch for inference. Auto determined if None. Minimum 32.
659
+ force_rebuild: Whether to force a model inference image rebuild.
660
+ build_external_access_integration: (Deprecated) The external access integration for image build. This is
661
+ usually permitting access to conda & PyPI repositories.
662
+ """
663
+ ...
664
+
665
+ @overload
666
+ def create_service(
667
+ self,
668
+ *,
669
+ service_name: str,
670
+ image_build_compute_pool: Optional[str] = None,
671
+ service_compute_pool: str,
672
+ image_repo: str,
673
+ ingress_enabled: bool = False,
674
+ max_instances: int = 1,
675
+ cpu_requests: Optional[str] = None,
676
+ memory_requests: Optional[str] = None,
677
+ gpu_requests: Optional[str] = None,
678
+ num_workers: Optional[int] = None,
679
+ max_batch_rows: Optional[int] = None,
680
+ force_rebuild: bool = False,
681
+ build_external_access_integrations: Optional[List[str]] = None,
682
+ ) -> str:
683
+ """Create an inference service with the given spec.
684
+
685
+ Args:
686
+ service_name: The name of the service, can be fully qualified. If not fully qualified, the database or
687
+ schema of the model will be used.
688
+ image_build_compute_pool: The name of the compute pool used to build the model inference image. It uses
689
+ the service compute pool if None.
690
+ service_compute_pool: The name of the compute pool used to run the inference service.
691
+ image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
692
+ or schema of the model will be used.
693
+ ingress_enabled: If true, creates an service endpoint associated with the service. User must have
694
+ BIND SERVICE ENDPOINT privilege on the account.
695
+ max_instances: The maximum number of inference service instances to run. The same value it set to
696
+ MIN_INSTANCES property of the service.
697
+ cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
698
+ None, we attempt to utilize all the vCPU of the node.
699
+ memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
700
+ requires a unit (GiB, MiB). If None, we attempt to utilize all the memory of the node.
701
+ gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
702
+ if None.
703
+ num_workers: The number of workers to run the inference service for handling requests in parallel within an
704
+ instance of the service. By default, it is set to 2*vCPU+1 of the node for CPU based inference and 1 for
705
+ GPU based inference. For GPU based inference, please see best practices before playing with this value.
706
+ max_batch_rows: The maximum number of rows to batch for inference. Auto determined if None. Minimum 32.
707
+ force_rebuild: Whether to force a model inference image rebuild.
708
+ build_external_access_integrations: The external access integrations for image build. This is usually
709
+ permitting access to conda & PyPI repositories.
710
+ """
711
+ ...
712
+
617
713
  @telemetry.send_api_usage_telemetry(
618
714
  project=_TELEMETRY_PROJECT,
619
715
  subproject=_TELEMETRY_SUBPROJECT,
@@ -638,11 +734,14 @@ class ModelVersion(lineage_node.LineageNode):
638
734
  image_repo: str,
639
735
  ingress_enabled: bool = False,
640
736
  max_instances: int = 1,
737
+ cpu_requests: Optional[str] = None,
738
+ memory_requests: Optional[str] = None,
641
739
  gpu_requests: Optional[str] = None,
642
740
  num_workers: Optional[int] = None,
643
741
  max_batch_rows: Optional[int] = None,
644
742
  force_rebuild: bool = False,
645
- build_external_access_integration: str,
743
+ build_external_access_integration: Optional[str] = None,
744
+ build_external_access_integrations: Optional[List[str]] = None,
646
745
  ) -> str:
647
746
  """Create an inference service with the given spec.
648
747
 
@@ -658,6 +757,10 @@ class ModelVersion(lineage_node.LineageNode):
658
757
  BIND SERVICE ENDPOINT privilege on the account.
659
758
  max_instances: The maximum number of inference service instances to run. The same value it set to
660
759
  MIN_INSTANCES property of the service.
760
+ cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
761
+ None, we attempt to utilize all the vCPU of the node.
762
+ memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
763
+ requires a unit (GiB, MiB). If None, we attempt to utilize all the memory of the node.
661
764
  gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
662
765
  if None.
663
766
  num_workers: The number of workers to run the inference service for handling requests in parallel within an
@@ -665,9 +768,14 @@ class ModelVersion(lineage_node.LineageNode):
665
768
  GPU based inference. For GPU based inference, please see best practices before playing with this value.
666
769
  max_batch_rows: The maximum number of rows to batch for inference. Auto determined if None. Minimum 32.
667
770
  force_rebuild: Whether to force a model inference image rebuild.
668
- build_external_access_integration: The external access integration for image build. This is usually
771
+ build_external_access_integration: (Deprecated) The external access integration for image build. This is
772
+ usually permitting access to conda & PyPI repositories.
773
+ build_external_access_integrations: The external access integrations for image build. This is usually
669
774
  permitting access to conda & PyPI repositories.
670
775
 
776
+ Raises:
777
+ ValueError: Illegal external access integration arguments.
778
+
671
779
  Returns:
672
780
  Result information about service creation from server.
673
781
  """
@@ -675,6 +783,20 @@ class ModelVersion(lineage_node.LineageNode):
675
783
  project=_TELEMETRY_PROJECT,
676
784
  subproject=_TELEMETRY_SUBPROJECT,
677
785
  )
786
+ if build_external_access_integration is not None:
787
+ msg = (
788
+ "`build_external_access_integration` is deprecated. "
789
+ "Please use `build_external_access_integrations` instead."
790
+ )
791
+ warnings.warn(msg, DeprecationWarning, stacklevel=2)
792
+ if build_external_access_integrations is not None:
793
+ msg = (
794
+ "`build_external_access_integration` and `build_external_access_integrations` cannot be set at the"
795
+ "same time. Please use `build_external_access_integrations` only."
796
+ )
797
+ raise ValueError(msg)
798
+ build_external_access_integrations = [build_external_access_integration]
799
+
678
800
  service_db_id, service_schema_id, service_id = sql_identifier.parse_fully_qualified_name(service_name)
679
801
  image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
680
802
  return self._service_ops.create_service(
@@ -696,11 +818,17 @@ class ModelVersion(lineage_node.LineageNode):
696
818
  image_repo_name=image_repo_id,
697
819
  ingress_enabled=ingress_enabled,
698
820
  max_instances=max_instances,
821
+ cpu_requests=cpu_requests,
822
+ memory_requests=memory_requests,
699
823
  gpu_requests=gpu_requests,
700
824
  num_workers=num_workers,
701
825
  max_batch_rows=max_batch_rows,
702
826
  force_rebuild=force_rebuild,
703
- build_external_access_integration=sql_identifier.SqlIdentifier(build_external_access_integration),
827
+ build_external_access_integrations=(
828
+ None
829
+ if build_external_access_integrations is None
830
+ else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
831
+ ),
704
832
  statement_params=statement_params,
705
833
  )
706
834
 
@@ -710,7 +838,7 @@ class ModelVersion(lineage_node.LineageNode):
710
838
  )
711
839
  def list_services(
712
840
  self,
713
- ) -> List[str]:
841
+ ) -> pd.DataFrame:
714
842
  """List all the service names using this model version.
715
843
 
716
844
  Returns:
@@ -722,12 +850,18 @@ class ModelVersion(lineage_node.LineageNode):
722
850
  subproject=_TELEMETRY_SUBPROJECT,
723
851
  )
724
852
 
725
- return self._model_ops.list_inference_services(
726
- database_name=None,
727
- schema_name=None,
728
- model_name=self._model_name,
729
- version_name=self._version_name,
730
- statement_params=statement_params,
853
+ return pd.DataFrame(
854
+ self._model_ops.list_inference_services(
855
+ database_name=None,
856
+ schema_name=None,
857
+ model_name=self._model_name,
858
+ version_name=self._version_name,
859
+ statement_params=statement_params,
860
+ ),
861
+ columns=[
862
+ self._model_ops.INFERENCE_SERVICE_NAME_COL_NAME,
863
+ self._model_ops.INFERENCE_SERVICE_ENDPOINT_COL_NAME,
864
+ ],
731
865
  )
732
866
 
733
867
  @telemetry.send_api_usage_telemetry(