snowflake-ml-python 1.6.4__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/telemetry.py +4 -2
- snowflake/ml/_internal/utils/import_utils.py +31 -0
- snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +13 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +8 -0
- snowflake/ml/data/data_connector.py +1 -1
- snowflake/ml/data/torch_utils.py +33 -14
- snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +5 -3
- snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +7 -5
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +4 -2
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +3 -1
- snowflake/ml/feature_store/examples/example_helper.py +6 -3
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +4 -2
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +4 -2
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +3 -1
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +3 -1
- snowflake/ml/feature_store/feature_store.py +1 -2
- snowflake/ml/feature_store/feature_view.py +5 -1
- snowflake/ml/model/_client/model/model_version_impl.py +144 -10
- snowflake/ml/model/_client/ops/model_ops.py +25 -6
- snowflake/ml/model/_client/ops/service_ops.py +33 -28
- snowflake/ml/model/_client/service/model_deployment_spec.py +19 -8
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +3 -1
- snowflake/ml/model/_client/sql/model.py +14 -0
- snowflake/ml/model/_model_composer/model_composer.py +2 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +4 -0
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/model_method.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -1
- snowflake/ml/model/_packager/model_handlers/catboost.py +3 -6
- snowflake/ml/model/_packager/model_handlers/custom.py +2 -0
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +10 -1
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +3 -6
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +8 -1
- snowflake/ml/model/_packager/model_handlers/sklearn.py +3 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +7 -65
- snowflake/ml/model/_packager/model_handlers/xgboost.py +10 -40
- snowflake/ml/model/_packager/model_packager.py +0 -11
- snowflake/ml/model/_packager/{model_handlers/model_objective_utils.py → model_task/model_task_utils.py} +13 -25
- snowflake/ml/model/_signatures/pandas_handler.py +16 -0
- snowflake/ml/model/custom_model.py +47 -7
- snowflake/ml/model/model_signature.py +2 -0
- snowflake/ml/model/type_hints.py +8 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +13 -0
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +7 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +16 -5
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +8 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +1 -8
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +17 -19
- snowflake/ml/modeling/cluster/dbscan.py +5 -2
- snowflake/ml/modeling/cluster/feature_agglomeration.py +7 -19
- snowflake/ml/modeling/cluster/k_means.py +14 -19
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +3 -3
- snowflake/ml/modeling/cluster/optics.py +6 -6
- snowflake/ml/modeling/cluster/spectral_clustering.py +4 -3
- snowflake/ml/modeling/compose/column_transformer.py +15 -5
- snowflake/ml/modeling/compose/transformed_target_regressor.py +7 -6
- snowflake/ml/modeling/covariance/elliptic_envelope.py +1 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +1 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +2 -2
- snowflake/ml/modeling/covariance/oas.py +1 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +2 -2
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +5 -12
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +5 -12
- snowflake/ml/modeling/decomposition/pca.py +28 -15
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +6 -0
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +1 -12
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +1 -11
- snowflake/ml/modeling/ensemble/bagging_classifier.py +1 -8
- snowflake/ml/modeling/ensemble/bagging_regressor.py +1 -8
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +21 -2
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +18 -2
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +2 -0
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +2 -0
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +21 -8
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +21 -11
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +21 -2
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +18 -2
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +2 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +5 -3
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +2 -2
- snowflake/ml/modeling/linear_model/ard_regression.py +5 -10
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +5 -11
- snowflake/ml/modeling/linear_model/elastic_net.py +3 -0
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lars.py +0 -10
- snowflake/ml/modeling/linear_model/lars_cv.py +1 -11
- snowflake/ml/modeling/linear_model/lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +0 -10
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +1 -11
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +0 -10
- snowflake/ml/modeling/linear_model/logistic_regression.py +28 -22
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +30 -24
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +1 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +1 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +4 -13
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +4 -4
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +1 -1
- snowflake/ml/modeling/linear_model/perceptron.py +3 -3
- snowflake/ml/modeling/linear_model/ransac_regressor.py +3 -2
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +14 -6
- snowflake/ml/modeling/linear_model/ridge_cv.py +17 -11
- snowflake/ml/modeling/linear_model/sgd_classifier.py +2 -2
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +5 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +12 -3
- snowflake/ml/modeling/manifold/isomap.py +1 -1
- snowflake/ml/modeling/manifold/mds.py +3 -3
- snowflake/ml/modeling/manifold/tsne.py +10 -4
- snowflake/ml/modeling/metrics/classification.py +12 -16
- snowflake/ml/modeling/metrics/ranking.py +3 -3
- snowflake/ml/modeling/metrics/regression.py +3 -3
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +3 -3
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +3 -3
- snowflake/ml/modeling/naive_bayes/complement_nb.py +3 -3
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +3 -3
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +10 -4
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +5 -2
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +2 -2
- snowflake/ml/modeling/neighbors/nearest_centroid.py +7 -14
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +1 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +6 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +1 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +7 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +3 -0
- snowflake/ml/modeling/pipeline/pipeline.py +16 -14
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +8 -4
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +9 -7
- snowflake/ml/modeling/svm/linear_svc.py +25 -16
- snowflake/ml/modeling/svm/linear_svr.py +23 -17
- snowflake/ml/modeling/svm/nu_svc.py +5 -3
- snowflake/ml/modeling/svm/nu_svr.py +3 -1
- snowflake/ml/modeling/svm/svc.py +9 -5
- snowflake/ml/modeling/svm/svr.py +3 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +21 -2
- snowflake/ml/modeling/tree/decision_tree_regressor.py +18 -2
- snowflake/ml/modeling/tree/extra_tree_classifier.py +28 -9
- snowflake/ml/modeling/tree/extra_tree_regressor.py +18 -2
- snowflake/ml/monitoring/_client/{monitor_sql_client.py → model_monitor_sql_client.py} +1 -1
- snowflake/ml/monitoring/{_client → _manager}/model_monitor_manager.py +9 -8
- snowflake/ml/monitoring/{_client/model_monitor.py → model_monitor.py} +3 -3
- snowflake/ml/registry/_manager/model_manager.py +15 -1
- snowflake/ml/registry/registry.py +15 -8
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/METADATA +75 -9
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/RECORD +149 -149
- /snowflake/ml/monitoring/{_client/model_monitor_version.py → model_monitor_version.py} +0 -0
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.6.4.dist-info → snowflake_ml_python-1.7.0.dist-info}/top_level.txt +0 -0
@@ -544,7 +544,7 @@ def send_api_usage_telemetry(
|
|
544
544
|
if not isinstance(e, snowml_exceptions.SnowflakeMLException):
|
545
545
|
# already handled via a nested decorated function
|
546
546
|
if getattr(e, "_snowflake_ml_handled", False):
|
547
|
-
raise
|
547
|
+
raise
|
548
548
|
if isinstance(e, snowpark_exceptions.SnowparkClientException):
|
549
549
|
me = snowml_exceptions.SnowflakeMLException(
|
550
550
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR, original_exception=e
|
@@ -558,7 +558,9 @@ def send_api_usage_telemetry(
|
|
558
558
|
telemetry_args["error"] = repr(me)
|
559
559
|
telemetry_args["error_code"] = me.error_code
|
560
560
|
me.original_exception._snowflake_ml_handled = True # type: ignore[attr-defined]
|
561
|
-
if me
|
561
|
+
if e is not me:
|
562
|
+
raise # Directly raise non-wrapped exceptions to preserve original stacktrace
|
563
|
+
elif me.suppress_source_trace:
|
562
564
|
raise me.original_exception from None
|
563
565
|
else:
|
564
566
|
raise me.original_exception from e
|
@@ -19,6 +19,33 @@ class MissingOptionalDependency:
|
|
19
19
|
raise ImportError(f"Unable to import {self._dep_name}.")
|
20
20
|
|
21
21
|
|
22
|
+
def import_with_fallbacks(*targets: str) -> Any:
|
23
|
+
"""Import a module which may be located in different locations.
|
24
|
+
|
25
|
+
This method will iterate through the provided targets, returning the first available import target.
|
26
|
+
If none of the requested import targets are available, ImportError will be raised.
|
27
|
+
|
28
|
+
Args:
|
29
|
+
targets: Strings representing the target which needs to be imported. It should be a list of symbol name
|
30
|
+
joined by dot. Some valid examples:
|
31
|
+
- <some_package>
|
32
|
+
- <some_module>
|
33
|
+
- <some_package>.<some_module>
|
34
|
+
- <some_module>.<some_symbol>
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
The imported target.
|
38
|
+
|
39
|
+
Raises:
|
40
|
+
ImportError: None of the requested targets are available
|
41
|
+
"""
|
42
|
+
for target in targets:
|
43
|
+
result, success = import_or_get_dummy(target)
|
44
|
+
if success:
|
45
|
+
return result
|
46
|
+
raise ImportError(f"None of the requested targets could be imported. Requested: {', '.join(targets)}")
|
47
|
+
|
48
|
+
|
22
49
|
def import_or_get_dummy(target: str) -> Tuple[Any, bool]:
|
23
50
|
"""Try to import the the given target or return a dummy object.
|
24
51
|
|
@@ -43,6 +70,10 @@ def import_or_get_dummy(target: str) -> Tuple[Any, bool]:
|
|
43
70
|
except ImportError:
|
44
71
|
pass
|
45
72
|
|
73
|
+
# Don't try symbol resolution if target doesn't contain '.'
|
74
|
+
if "." not in target:
|
75
|
+
return (MissingOptionalDependency(target), False)
|
76
|
+
|
46
77
|
# Try to import the target as a symbol
|
47
78
|
try:
|
48
79
|
res = _try_import_symbol(target)
|
@@ -121,3 +121,16 @@ def cast_snowpark_dataframe_column_types(df: snowpark.DataFrame) -> snowpark.Dat
|
|
121
121
|
selected_cols.append(functions.col(src))
|
122
122
|
df = df.select(selected_cols)
|
123
123
|
return df
|
124
|
+
|
125
|
+
|
126
|
+
def is_single_query_snowpark_dataframe(df: snowpark.DataFrame) -> bool:
|
127
|
+
"""Check if dataframe only has a single query.
|
128
|
+
|
129
|
+
Args:
|
130
|
+
df: A snowpark dataframe.
|
131
|
+
|
132
|
+
Returns:
|
133
|
+
true if there is only on query in the dataframe and no post_actions,
|
134
|
+
false otherwise.
|
135
|
+
"""
|
136
|
+
return len(df.queries["queries"]) == 1 and len(df.queries["post_actions"]) == 0
|
@@ -198,7 +198,15 @@ def _record_batch_to_arrays(rb: pa.RecordBatch) -> Dict[str, npt.NDArray[Any]]:
|
|
198
198
|
for column, column_schema in zip(rb, rb.schema):
|
199
199
|
# zero_copy_only=False because of nans. Ideally nans should have been imputed in feature engineering.
|
200
200
|
array = column.to_numpy(zero_copy_only=False)
|
201
|
+
# If this column is a list, use the underlying type from the list values. Since this is just one column,
|
202
|
+
# there should only be one type within the list.
|
203
|
+
# TODO: Refactor to reduce data copies.
|
204
|
+
if isinstance(column_schema.type, pa.ListType):
|
205
|
+
# Update dtype of outer array:
|
206
|
+
array = np.array(array.tolist(), dtype=column_schema.type.value_type.to_pandas_dtype())
|
207
|
+
|
201
208
|
batch_dict[column_schema.name] = array
|
209
|
+
|
202
210
|
return batch_dict
|
203
211
|
|
204
212
|
|
@@ -159,7 +159,7 @@ class DataConnector:
|
|
159
159
|
func_params_to_log=["batch_size", "shuffle", "drop_last_batch"],
|
160
160
|
)
|
161
161
|
def to_torch_dataset(
|
162
|
-
self, *, batch_size: int =
|
162
|
+
self, *, batch_size: Optional[int] = None, shuffle: bool = False, drop_last_batch: bool = True
|
163
163
|
) -> "torch_data.IterableDataset": # type: ignore[type-arg]
|
164
164
|
"""Transform the Snowflake data into a PyTorch Iterable Dataset to be used with a DataLoader.
|
165
165
|
|
snowflake/ml/data/torch_utils.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Any, Dict, Iterator, List, Union
|
1
|
+
from typing import Any, Dict, Iterator, List, Optional, Union
|
2
2
|
|
3
3
|
import numpy as np
|
4
4
|
import numpy.typing as npt
|
@@ -14,17 +14,21 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
|
|
14
14
|
self,
|
15
15
|
ingestor: data_ingestor.DataIngestor,
|
16
16
|
*,
|
17
|
-
batch_size: int,
|
17
|
+
batch_size: Optional[int],
|
18
18
|
shuffle: bool = False,
|
19
19
|
drop_last: bool = False,
|
20
|
-
squeeze_outputs: bool = True
|
21
20
|
) -> None:
|
22
21
|
"""Not intended for direct usage. Use DataConnector.to_torch_dataset() instead"""
|
22
|
+
squeeze = False
|
23
|
+
if batch_size is None:
|
24
|
+
batch_size = 1
|
25
|
+
squeeze = True
|
26
|
+
|
23
27
|
self._ingestor = ingestor
|
24
28
|
self._batch_size = batch_size
|
25
29
|
self._shuffle = shuffle
|
26
30
|
self._drop_last = drop_last
|
27
|
-
self._squeeze_outputs =
|
31
|
+
self._squeeze_outputs = squeeze
|
28
32
|
|
29
33
|
def __iter__(self) -> Iterator[Dict[str, Union[npt.NDArray[Any], List[Any]]]]:
|
30
34
|
max_idx = 0
|
@@ -43,15 +47,7 @@ class TorchDatasetWrapper(torch.utils.data.IterableDataset[Dict[str, Any]]):
|
|
43
47
|
):
|
44
48
|
# Skip indices during multi-process data loading to prevent data duplication
|
45
49
|
if counter == filter_idx:
|
46
|
-
|
47
|
-
# and convert object arrays (e.g. strings) to lists
|
48
|
-
if self._squeeze_outputs:
|
49
|
-
yield {
|
50
|
-
k: (v.squeeze().tolist() if v.dtype == np.object_ else v.squeeze()) for k, v in batch.items()
|
51
|
-
}
|
52
|
-
else:
|
53
|
-
yield batch # type: ignore[misc]
|
54
|
-
|
50
|
+
yield {k: _preprocess_array(v, squeeze=self._squeeze_outputs) for k, v in batch.items()}
|
55
51
|
if counter < max_idx:
|
56
52
|
counter += 1
|
57
53
|
else:
|
@@ -65,4 +61,27 @@ class TorchDataPipeWrapper(TorchDatasetWrapper, torch.utils.data.IterDataPipe[Di
|
|
65
61
|
self, ingestor: data_ingestor.DataIngestor, *, batch_size: int, shuffle: bool = False, drop_last: bool = False
|
66
62
|
) -> None:
|
67
63
|
"""Not intended for direct usage. Use DataConnector.to_torch_datapipe() instead"""
|
68
|
-
super().__init__(ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last
|
64
|
+
super().__init__(ingestor, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last)
|
65
|
+
|
66
|
+
|
67
|
+
def _preprocess_array(arr: npt.NDArray[Any], squeeze: bool = False) -> Union[npt.NDArray[Any], List[np.object_]]:
|
68
|
+
"""Preprocesses batch column values."""
|
69
|
+
single_dimensional = arr.ndim < 2 and not arr.dtype == np.object_
|
70
|
+
|
71
|
+
# Squeeze away all extra dimensions. This is only used when batch_size = None.
|
72
|
+
if squeeze:
|
73
|
+
arr = arr.squeeze(axis=0)
|
74
|
+
|
75
|
+
# For single dimensional data,
|
76
|
+
if single_dimensional:
|
77
|
+
axis = 0 if arr.ndim == 0 else 1
|
78
|
+
arr = np.expand_dims(arr, axis=axis)
|
79
|
+
|
80
|
+
# Handle object arrays.
|
81
|
+
if arr.dtype == np.object_:
|
82
|
+
array_list = arr.tolist()
|
83
|
+
# If this is an array of arrays, convert the dtype to match the underlying array.
|
84
|
+
# Otherwise, if this is a numpy array of strings, convert the array to a list.
|
85
|
+
arr = np.array(array_list, dtype=arr.flat[0].dtype) if isinstance(arr.flat[0], np.ndarray) else array_list
|
86
|
+
|
87
|
+
return arr
|
@@ -6,15 +6,17 @@ from snowflake.snowpark import DataFrame, Session
|
|
6
6
|
|
7
7
|
|
8
8
|
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
-
def create_draft_feature_view(
|
9
|
+
def create_draft_feature_view(
|
10
|
+
session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
|
11
|
+
) -> FeatureView:
|
10
12
|
"""Create a feature view about airplane model."""
|
11
13
|
query = session.sql(
|
12
|
-
"""
|
14
|
+
f"""
|
13
15
|
select
|
14
16
|
PLANE_MODEL,
|
15
17
|
SEATING_CAPACITY
|
16
18
|
from
|
17
|
-
PLANE_MODEL_ATTRIBUTES
|
19
|
+
{database}.{schema}.PLANE_MODEL_ATTRIBUTES
|
18
20
|
"""
|
19
21
|
)
|
20
22
|
|
@@ -6,10 +6,12 @@ from snowflake.snowpark import DataFrame, Session
|
|
6
6
|
|
7
7
|
|
8
8
|
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
-
def create_draft_feature_view(
|
9
|
+
def create_draft_feature_view(
|
10
|
+
session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
|
11
|
+
) -> FeatureView:
|
10
12
|
"""Create a feature view about airport weather."""
|
11
13
|
query = session.sql(
|
12
|
-
"""
|
14
|
+
f"""
|
13
15
|
select
|
14
16
|
DATETIME_UTC AS TS,
|
15
17
|
AIRPORT_ZIP_CODE,
|
@@ -21,9 +23,9 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
21
23
|
sum(RAIN_MM_H) over (
|
22
24
|
partition by AIRPORT_ZIP_CODE
|
23
25
|
order by DATETIME_UTC
|
24
|
-
range between interval '
|
26
|
+
range between interval '60 minutes' preceding and current row
|
25
27
|
) RAIN_SUM_60M
|
26
|
-
from AIRPORT_WEATHER_STATION
|
28
|
+
from {database}.{schema}.AIRPORT_WEATHER_STATION
|
27
29
|
"""
|
28
30
|
)
|
29
31
|
|
@@ -37,6 +39,6 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
37
39
|
).attach_feature_desc(
|
38
40
|
{
|
39
41
|
"RAIN_SUM_30M": "The sum of rain fall over past 30 minutes for one zipcode.",
|
40
|
-
"RAIN_SUM_60M": "The sum of rain fall over past 1
|
42
|
+
"RAIN_SUM_60M": "The sum of rain fall over past 1 hour for one zipcode.",
|
41
43
|
}
|
42
44
|
)
|
@@ -8,7 +8,9 @@ from snowflake.snowpark import DataFrame, Session
|
|
8
8
|
|
9
9
|
|
10
10
|
# This function will be invoked by example_helper.py. Do not change the name.
|
11
|
-
def create_draft_feature_view(
|
11
|
+
def create_draft_feature_view(
|
12
|
+
session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
|
13
|
+
) -> FeatureView:
|
12
14
|
"""Create a feature view about trip station."""
|
13
15
|
query = session.sql(
|
14
16
|
f"""
|
@@ -17,7 +19,7 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
17
19
|
count(end_station_id) as f_count,
|
18
20
|
avg(end_station_latitude) as f_avg_latitude,
|
19
21
|
avg(end_station_longitude) as f_avg_longtitude
|
20
|
-
from {source_tables[0]}
|
22
|
+
from {database}.{schema}.{source_tables[0]}
|
21
23
|
group by end_station_id
|
22
24
|
"""
|
23
25
|
)
|
@@ -6,7 +6,9 @@ from snowflake.snowpark import DataFrame, Session, functions as F
|
|
6
6
|
|
7
7
|
|
8
8
|
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
-
def create_draft_feature_view(
|
9
|
+
def create_draft_feature_view(
|
10
|
+
session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
|
11
|
+
) -> FeatureView:
|
10
12
|
"""Create a feature view about trip."""
|
11
13
|
feature_df = source_dfs[0].select(
|
12
14
|
"trip_id",
|
@@ -66,7 +66,9 @@ class ExampleHelper:
|
|
66
66
|
continue
|
67
67
|
mod_path = f"{__package__}.{self._selected_example}.features.{f_name.rstrip('.py')}"
|
68
68
|
mod = importlib.import_module(mod_path)
|
69
|
-
fv = mod.create_draft_feature_view(
|
69
|
+
fv = mod.create_draft_feature_view(
|
70
|
+
self._session, self._source_dfs, self._source_tables, self._database_name, self._dataset_schema
|
71
|
+
)
|
70
72
|
fvs.append(fv)
|
71
73
|
|
72
74
|
return fvs
|
@@ -140,7 +142,7 @@ class ExampleHelper:
|
|
140
142
|
"""
|
141
143
|
).collect()
|
142
144
|
|
143
|
-
return [
|
145
|
+
return [schema_dict["destination_table_name"]]
|
144
146
|
|
145
147
|
def _load_parquet(self, schema_dict: Dict[str, str], temp_stage_name: str) -> List[str]:
|
146
148
|
regex_pattern = schema_dict["load_files_pattern"]
|
@@ -173,13 +175,14 @@ class ExampleHelper:
|
|
173
175
|
dest_table_name = (
|
174
176
|
f"{self._database_name}.{self._dataset_schema}.{schema_dict['destination_table_name']}"
|
175
177
|
)
|
178
|
+
result.append(schema_dict["destination_table_name"])
|
176
179
|
else:
|
177
180
|
regex_pattern = schema_dict["destination_table_name"]
|
178
181
|
dest_table_name = re.match(regex_pattern, file_name).group("table_name") # type: ignore[union-attr]
|
182
|
+
result.append(dest_table_name)
|
179
183
|
dest_table_name = f"{self._database_name}.{self._dataset_schema}.{dest_table_name}"
|
180
184
|
|
181
185
|
df.write.mode("overwrite").save_as_table(dest_table_name)
|
182
|
-
result.append(dest_table_name)
|
183
186
|
|
184
187
|
return result
|
185
188
|
|
@@ -8,7 +8,9 @@ from snowflake.snowpark import DataFrame, Session
|
|
8
8
|
|
9
9
|
|
10
10
|
# This function will be invoked by example_helper.py. Do not change the name.
|
11
|
-
def create_draft_feature_view(
|
11
|
+
def create_draft_feature_view(
|
12
|
+
session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
|
13
|
+
) -> FeatureView:
|
12
14
|
"""Create a draft feature view."""
|
13
15
|
feature_df = session.sql(
|
14
16
|
f"""
|
@@ -25,7 +27,7 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
25
27
|
order by TPEP_DROPOFF_DATETIME
|
26
28
|
range between interval '10 hours' preceding and current row
|
27
29
|
) AVG_FARE_10h
|
28
|
-
from {source_tables[0]}
|
30
|
+
from {database}.{schema}.{source_tables[0]}
|
29
31
|
"""
|
30
32
|
)
|
31
33
|
|
@@ -6,7 +6,9 @@ from snowflake.snowpark import DataFrame, Session
|
|
6
6
|
|
7
7
|
|
8
8
|
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
-
def create_draft_feature_view(
|
9
|
+
def create_draft_feature_view(
|
10
|
+
session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
|
11
|
+
) -> FeatureView:
|
10
12
|
"""Create a draft feature view."""
|
11
13
|
feature_df = session.sql(
|
12
14
|
f"""
|
@@ -16,7 +18,7 @@ def create_draft_feature_view(session: Session, source_dfs: List[DataFrame], sou
|
|
16
18
|
TRIP_DISTANCE,
|
17
19
|
FARE_AMOUNT
|
18
20
|
from
|
19
|
-
{source_tables[0]}
|
21
|
+
{database}.{schema}.{source_tables[0]}
|
20
22
|
"""
|
21
23
|
)
|
22
24
|
|
@@ -6,7 +6,9 @@ from snowflake.snowpark import DataFrame, Session, functions as F
|
|
6
6
|
|
7
7
|
|
8
8
|
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
-
def create_draft_feature_view(
|
9
|
+
def create_draft_feature_view(
|
10
|
+
session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
|
11
|
+
) -> FeatureView:
|
10
12
|
"""Create a feature view about trip station."""
|
11
13
|
feature_df = source_dfs[0].select(
|
12
14
|
"WINE_ID",
|
@@ -6,7 +6,9 @@ from snowflake.snowpark import DataFrame, Session
|
|
6
6
|
|
7
7
|
|
8
8
|
# This function will be invoked by example_helper.py. Do not change the name.
|
9
|
-
def create_draft_feature_view(
|
9
|
+
def create_draft_feature_view(
|
10
|
+
session: Session, source_dfs: List[DataFrame], source_tables: List[str], database: str, schema: str
|
11
|
+
) -> FeatureView:
|
10
12
|
"""Create a feature view about trip station."""
|
11
13
|
feature_df = source_dfs[0].select("WINE_ID", "SULPHATES", "ALCOHOL")
|
12
14
|
|
@@ -1886,8 +1886,7 @@ class FeatureStore:
|
|
1886
1886
|
if found_dts[0]["refresh_mode"] != "INCREMENTAL":
|
1887
1887
|
warnings.warn(
|
1888
1888
|
"Your pipeline won't be incrementally refreshed due to: "
|
1889
|
-
+ f"\"{found_dts[0]['refresh_mode_reason']}\".
|
1890
|
-
+ "It will likely incurr higher cost.",
|
1889
|
+
+ f"\"{found_dts[0]['refresh_mode_reason']}\".",
|
1891
1890
|
stacklevel=2,
|
1892
1891
|
category=UserWarning,
|
1893
1892
|
)
|
@@ -169,6 +169,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
169
169
|
desc: str = "",
|
170
170
|
warehouse: Optional[str] = None,
|
171
171
|
initialize: str = "ON_CREATE",
|
172
|
+
refresh_mode: str = "AUTO",
|
172
173
|
**_kwargs: Any,
|
173
174
|
) -> None:
|
174
175
|
"""
|
@@ -196,6 +197,9 @@ class FeatureView(lineage_node.LineageNode):
|
|
196
197
|
after you register the feature view. It supports ON_CREATE (default) or ON_SCHEDULE. ON_CREATE refreshes
|
197
198
|
the feature view synchronously at creation. ON_SCHEDULE refreshes the feature view at the next scheduled
|
198
199
|
refresh. It is only effective when refresh_freq is not None.
|
200
|
+
refresh_mode: The refresh mode of managed feature view. The value can be 'AUTO', 'FULL' or 'INCREMENETAL'.
|
201
|
+
For managed feature view, the default value is 'AUTO'. For static feature view it has no effect.
|
202
|
+
Check https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table for for details.
|
199
203
|
_kwargs: reserved kwargs for system generated args. NOTE: DO NOT USE.
|
200
204
|
|
201
205
|
Example::
|
@@ -242,7 +246,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
242
246
|
self._schema: Optional[SqlIdentifier] = None
|
243
247
|
self._initialize: str = initialize
|
244
248
|
self._warehouse: Optional[SqlIdentifier] = SqlIdentifier(warehouse) if warehouse is not None else None
|
245
|
-
self._refresh_mode: Optional[str] =
|
249
|
+
self._refresh_mode: Optional[str] = refresh_mode
|
246
250
|
self._refresh_mode_reason: Optional[str] = None
|
247
251
|
self._owner: Optional[str] = None
|
248
252
|
self._validate()
|
@@ -614,6 +614,102 @@ class ModelVersion(lineage_node.LineageNode):
|
|
614
614
|
version_name=sql_identifier.SqlIdentifier(version),
|
615
615
|
)
|
616
616
|
|
617
|
+
@overload
|
618
|
+
def create_service(
|
619
|
+
self,
|
620
|
+
*,
|
621
|
+
service_name: str,
|
622
|
+
image_build_compute_pool: Optional[str] = None,
|
623
|
+
service_compute_pool: str,
|
624
|
+
image_repo: str,
|
625
|
+
ingress_enabled: bool = False,
|
626
|
+
max_instances: int = 1,
|
627
|
+
cpu_requests: Optional[str] = None,
|
628
|
+
memory_requests: Optional[str] = None,
|
629
|
+
gpu_requests: Optional[str] = None,
|
630
|
+
num_workers: Optional[int] = None,
|
631
|
+
max_batch_rows: Optional[int] = None,
|
632
|
+
force_rebuild: bool = False,
|
633
|
+
build_external_access_integration: Optional[str] = None,
|
634
|
+
) -> str:
|
635
|
+
"""Create an inference service with the given spec.
|
636
|
+
|
637
|
+
Args:
|
638
|
+
service_name: The name of the service, can be fully qualified. If not fully qualified, the database or
|
639
|
+
schema of the model will be used.
|
640
|
+
image_build_compute_pool: The name of the compute pool used to build the model inference image. It uses
|
641
|
+
the service compute pool if None.
|
642
|
+
service_compute_pool: The name of the compute pool used to run the inference service.
|
643
|
+
image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
|
644
|
+
or schema of the model will be used.
|
645
|
+
ingress_enabled: If true, creates an service endpoint associated with the service. User must have
|
646
|
+
BIND SERVICE ENDPOINT privilege on the account.
|
647
|
+
max_instances: The maximum number of inference service instances to run. The same value it set to
|
648
|
+
MIN_INSTANCES property of the service.
|
649
|
+
cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
|
650
|
+
None, we attempt to utilize all the vCPU of the node.
|
651
|
+
memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
|
652
|
+
requires a unit (GiB, MiB). If None, we attempt to utilize all the memory of the node.
|
653
|
+
gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
|
654
|
+
if None.
|
655
|
+
num_workers: The number of workers to run the inference service for handling requests in parallel within an
|
656
|
+
instance of the service. By default, it is set to 2*vCPU+1 of the node for CPU based inference and 1 for
|
657
|
+
GPU based inference. For GPU based inference, please see best practices before playing with this value.
|
658
|
+
max_batch_rows: The maximum number of rows to batch for inference. Auto determined if None. Minimum 32.
|
659
|
+
force_rebuild: Whether to force a model inference image rebuild.
|
660
|
+
build_external_access_integration: (Deprecated) The external access integration for image build. This is
|
661
|
+
usually permitting access to conda & PyPI repositories.
|
662
|
+
"""
|
663
|
+
...
|
664
|
+
|
665
|
+
@overload
|
666
|
+
def create_service(
|
667
|
+
self,
|
668
|
+
*,
|
669
|
+
service_name: str,
|
670
|
+
image_build_compute_pool: Optional[str] = None,
|
671
|
+
service_compute_pool: str,
|
672
|
+
image_repo: str,
|
673
|
+
ingress_enabled: bool = False,
|
674
|
+
max_instances: int = 1,
|
675
|
+
cpu_requests: Optional[str] = None,
|
676
|
+
memory_requests: Optional[str] = None,
|
677
|
+
gpu_requests: Optional[str] = None,
|
678
|
+
num_workers: Optional[int] = None,
|
679
|
+
max_batch_rows: Optional[int] = None,
|
680
|
+
force_rebuild: bool = False,
|
681
|
+
build_external_access_integrations: Optional[List[str]] = None,
|
682
|
+
) -> str:
|
683
|
+
"""Create an inference service with the given spec.
|
684
|
+
|
685
|
+
Args:
|
686
|
+
service_name: The name of the service, can be fully qualified. If not fully qualified, the database or
|
687
|
+
schema of the model will be used.
|
688
|
+
image_build_compute_pool: The name of the compute pool used to build the model inference image. It uses
|
689
|
+
the service compute pool if None.
|
690
|
+
service_compute_pool: The name of the compute pool used to run the inference service.
|
691
|
+
image_repo: The name of the image repository, can be fully qualified. If not fully qualified, the database
|
692
|
+
or schema of the model will be used.
|
693
|
+
ingress_enabled: If true, creates an service endpoint associated with the service. User must have
|
694
|
+
BIND SERVICE ENDPOINT privilege on the account.
|
695
|
+
max_instances: The maximum number of inference service instances to run. The same value it set to
|
696
|
+
MIN_INSTANCES property of the service.
|
697
|
+
cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
|
698
|
+
None, we attempt to utilize all the vCPU of the node.
|
699
|
+
memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
|
700
|
+
requires a unit (GiB, MiB). If None, we attempt to utilize all the memory of the node.
|
701
|
+
gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
|
702
|
+
if None.
|
703
|
+
num_workers: The number of workers to run the inference service for handling requests in parallel within an
|
704
|
+
instance of the service. By default, it is set to 2*vCPU+1 of the node for CPU based inference and 1 for
|
705
|
+
GPU based inference. For GPU based inference, please see best practices before playing with this value.
|
706
|
+
max_batch_rows: The maximum number of rows to batch for inference. Auto determined if None. Minimum 32.
|
707
|
+
force_rebuild: Whether to force a model inference image rebuild.
|
708
|
+
build_external_access_integrations: The external access integrations for image build. This is usually
|
709
|
+
permitting access to conda & PyPI repositories.
|
710
|
+
"""
|
711
|
+
...
|
712
|
+
|
617
713
|
@telemetry.send_api_usage_telemetry(
|
618
714
|
project=_TELEMETRY_PROJECT,
|
619
715
|
subproject=_TELEMETRY_SUBPROJECT,
|
@@ -638,11 +734,14 @@ class ModelVersion(lineage_node.LineageNode):
|
|
638
734
|
image_repo: str,
|
639
735
|
ingress_enabled: bool = False,
|
640
736
|
max_instances: int = 1,
|
737
|
+
cpu_requests: Optional[str] = None,
|
738
|
+
memory_requests: Optional[str] = None,
|
641
739
|
gpu_requests: Optional[str] = None,
|
642
740
|
num_workers: Optional[int] = None,
|
643
741
|
max_batch_rows: Optional[int] = None,
|
644
742
|
force_rebuild: bool = False,
|
645
|
-
build_external_access_integration: str,
|
743
|
+
build_external_access_integration: Optional[str] = None,
|
744
|
+
build_external_access_integrations: Optional[List[str]] = None,
|
646
745
|
) -> str:
|
647
746
|
"""Create an inference service with the given spec.
|
648
747
|
|
@@ -658,6 +757,10 @@ class ModelVersion(lineage_node.LineageNode):
|
|
658
757
|
BIND SERVICE ENDPOINT privilege on the account.
|
659
758
|
max_instances: The maximum number of inference service instances to run. The same value it set to
|
660
759
|
MIN_INSTANCES property of the service.
|
760
|
+
cpu_requests: The cpu limit for CPU based inference. Can be an integer, fractional or string values. If
|
761
|
+
None, we attempt to utilize all the vCPU of the node.
|
762
|
+
memory_requests: The memory limit with for CPU based inference. Can be an integer or a fractional value, but
|
763
|
+
requires a unit (GiB, MiB). If None, we attempt to utilize all the memory of the node.
|
661
764
|
gpu_requests: The gpu limit for GPU based inference. Can be integer, fractional or string values. Use CPU
|
662
765
|
if None.
|
663
766
|
num_workers: The number of workers to run the inference service for handling requests in parallel within an
|
@@ -665,9 +768,14 @@ class ModelVersion(lineage_node.LineageNode):
|
|
665
768
|
GPU based inference. For GPU based inference, please see best practices before playing with this value.
|
666
769
|
max_batch_rows: The maximum number of rows to batch for inference. Auto determined if None. Minimum 32.
|
667
770
|
force_rebuild: Whether to force a model inference image rebuild.
|
668
|
-
build_external_access_integration: The external access integration for image build. This is
|
771
|
+
build_external_access_integration: (Deprecated) The external access integration for image build. This is
|
772
|
+
usually permitting access to conda & PyPI repositories.
|
773
|
+
build_external_access_integrations: The external access integrations for image build. This is usually
|
669
774
|
permitting access to conda & PyPI repositories.
|
670
775
|
|
776
|
+
Raises:
|
777
|
+
ValueError: Illegal external access integration arguments.
|
778
|
+
|
671
779
|
Returns:
|
672
780
|
Result information about service creation from server.
|
673
781
|
"""
|
@@ -675,6 +783,20 @@ class ModelVersion(lineage_node.LineageNode):
|
|
675
783
|
project=_TELEMETRY_PROJECT,
|
676
784
|
subproject=_TELEMETRY_SUBPROJECT,
|
677
785
|
)
|
786
|
+
if build_external_access_integration is not None:
|
787
|
+
msg = (
|
788
|
+
"`build_external_access_integration` is deprecated. "
|
789
|
+
"Please use `build_external_access_integrations` instead."
|
790
|
+
)
|
791
|
+
warnings.warn(msg, DeprecationWarning, stacklevel=2)
|
792
|
+
if build_external_access_integrations is not None:
|
793
|
+
msg = (
|
794
|
+
"`build_external_access_integration` and `build_external_access_integrations` cannot be set at the"
|
795
|
+
"same time. Please use `build_external_access_integrations` only."
|
796
|
+
)
|
797
|
+
raise ValueError(msg)
|
798
|
+
build_external_access_integrations = [build_external_access_integration]
|
799
|
+
|
678
800
|
service_db_id, service_schema_id, service_id = sql_identifier.parse_fully_qualified_name(service_name)
|
679
801
|
image_repo_db_id, image_repo_schema_id, image_repo_id = sql_identifier.parse_fully_qualified_name(image_repo)
|
680
802
|
return self._service_ops.create_service(
|
@@ -696,11 +818,17 @@ class ModelVersion(lineage_node.LineageNode):
|
|
696
818
|
image_repo_name=image_repo_id,
|
697
819
|
ingress_enabled=ingress_enabled,
|
698
820
|
max_instances=max_instances,
|
821
|
+
cpu_requests=cpu_requests,
|
822
|
+
memory_requests=memory_requests,
|
699
823
|
gpu_requests=gpu_requests,
|
700
824
|
num_workers=num_workers,
|
701
825
|
max_batch_rows=max_batch_rows,
|
702
826
|
force_rebuild=force_rebuild,
|
703
|
-
|
827
|
+
build_external_access_integrations=(
|
828
|
+
None
|
829
|
+
if build_external_access_integrations is None
|
830
|
+
else [sql_identifier.SqlIdentifier(eai) for eai in build_external_access_integrations]
|
831
|
+
),
|
704
832
|
statement_params=statement_params,
|
705
833
|
)
|
706
834
|
|
@@ -710,7 +838,7 @@ class ModelVersion(lineage_node.LineageNode):
|
|
710
838
|
)
|
711
839
|
def list_services(
|
712
840
|
self,
|
713
|
-
) ->
|
841
|
+
) -> pd.DataFrame:
|
714
842
|
"""List all the service names using this model version.
|
715
843
|
|
716
844
|
Returns:
|
@@ -722,12 +850,18 @@ class ModelVersion(lineage_node.LineageNode):
|
|
722
850
|
subproject=_TELEMETRY_SUBPROJECT,
|
723
851
|
)
|
724
852
|
|
725
|
-
return
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
853
|
+
return pd.DataFrame(
|
854
|
+
self._model_ops.list_inference_services(
|
855
|
+
database_name=None,
|
856
|
+
schema_name=None,
|
857
|
+
model_name=self._model_name,
|
858
|
+
version_name=self._version_name,
|
859
|
+
statement_params=statement_params,
|
860
|
+
),
|
861
|
+
columns=[
|
862
|
+
self._model_ops.INFERENCE_SERVICE_NAME_COL_NAME,
|
863
|
+
self._model_ops.INFERENCE_SERVICE_ENDPOINT_COL_NAME,
|
864
|
+
],
|
731
865
|
)
|
732
866
|
|
733
867
|
@telemetry.send_api_usage_telemetry(
|