snowflake-ml-python 1.8.2__py3-none-any.whl → 1.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +7 -1
- snowflake/cortex/_classify_text.py +3 -3
- snowflake/cortex/_complete.py +23 -24
- snowflake/cortex/_embed_text_1024.py +4 -4
- snowflake/cortex/_embed_text_768.py +4 -4
- snowflake/cortex/_finetune.py +8 -8
- snowflake/cortex/_util.py +8 -12
- snowflake/ml/_internal/env.py +4 -3
- snowflake/ml/_internal/env_utils.py +63 -34
- snowflake/ml/_internal/file_utils.py +10 -21
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +5 -7
- snowflake/ml/_internal/init_utils.py +2 -3
- snowflake/ml/_internal/lineage/lineage_utils.py +6 -6
- snowflake/ml/_internal/platform_capabilities.py +18 -16
- snowflake/ml/_internal/telemetry.py +39 -52
- snowflake/ml/_internal/type_utils.py +3 -3
- snowflake/ml/_internal/utils/db_utils.py +2 -2
- snowflake/ml/_internal/utils/identifier.py +10 -10
- snowflake/ml/_internal/utils/import_utils.py +2 -2
- snowflake/ml/_internal/utils/parallelize.py +7 -7
- snowflake/ml/_internal/utils/pkg_version_utils.py +11 -11
- snowflake/ml/_internal/utils/query_result_checker.py +4 -4
- snowflake/ml/_internal/utils/snowflake_env.py +28 -6
- snowflake/ml/_internal/utils/snowpark_dataframe_utils.py +2 -2
- snowflake/ml/_internal/utils/sql_identifier.py +3 -3
- snowflake/ml/_internal/utils/table_manager.py +9 -9
- snowflake/ml/data/_internal/arrow_ingestor.py +7 -7
- snowflake/ml/data/data_connector.py +15 -36
- snowflake/ml/data/data_ingestor.py +4 -15
- snowflake/ml/data/data_source.py +2 -2
- snowflake/ml/data/ingestor_utils.py +3 -3
- snowflake/ml/data/torch_utils.py +5 -5
- snowflake/ml/dataset/dataset.py +11 -11
- snowflake/ml/dataset/dataset_metadata.py +8 -8
- snowflake/ml/dataset/dataset_reader.py +7 -7
- snowflake/ml/feature_store/__init__.py +1 -1
- snowflake/ml/feature_store/access_manager.py +7 -7
- snowflake/ml/feature_store/entity.py +6 -6
- snowflake/ml/feature_store/examples/airline_features/entities.py +1 -3
- snowflake/ml/feature_store/examples/airline_features/features/plane_features.py +1 -3
- snowflake/ml/feature_store/examples/airline_features/features/weather_features.py +1 -3
- snowflake/ml/feature_store/examples/citibike_trip_features/entities.py +1 -3
- snowflake/ml/feature_store/examples/citibike_trip_features/features/station_feature.py +1 -3
- snowflake/ml/feature_store/examples/citibike_trip_features/features/trip_feature.py +1 -3
- snowflake/ml/feature_store/examples/example_helper.py +16 -16
- snowflake/ml/feature_store/examples/new_york_taxi_features/entities.py +1 -3
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/location_features.py +1 -3
- snowflake/ml/feature_store/examples/new_york_taxi_features/features/trip_features.py +1 -3
- snowflake/ml/feature_store/examples/wine_quality_features/entities.py +1 -3
- snowflake/ml/feature_store/examples/wine_quality_features/features/managed_wine_features.py +1 -3
- snowflake/ml/feature_store/examples/wine_quality_features/features/static_wine_features.py +1 -3
- snowflake/ml/feature_store/feature_store.py +52 -64
- snowflake/ml/feature_store/feature_view.py +24 -24
- snowflake/ml/fileset/embedded_stage_fs.py +5 -5
- snowflake/ml/fileset/fileset.py +5 -5
- snowflake/ml/fileset/sfcfs.py +13 -13
- snowflake/ml/fileset/stage_fs.py +15 -15
- snowflake/ml/jobs/_utils/constants.py +1 -1
- snowflake/ml/jobs/_utils/interop_utils.py +10 -10
- snowflake/ml/jobs/_utils/payload_utils.py +45 -46
- snowflake/ml/jobs/_utils/scripts/get_instance_ip.py +4 -4
- snowflake/ml/jobs/_utils/scripts/mljob_launcher.py +8 -5
- snowflake/ml/jobs/_utils/scripts/signal_workers.py +8 -8
- snowflake/ml/jobs/_utils/spec_utils.py +18 -29
- snowflake/ml/jobs/_utils/types.py +2 -2
- snowflake/ml/jobs/decorators.py +10 -5
- snowflake/ml/jobs/job.py +87 -30
- snowflake/ml/jobs/manager.py +86 -56
- snowflake/ml/lineage/lineage_node.py +5 -5
- snowflake/ml/model/_client/model/model_impl.py +3 -3
- snowflake/ml/model/_client/model/model_version_impl.py +103 -35
- snowflake/ml/model/_client/ops/metadata_ops.py +7 -7
- snowflake/ml/model/_client/ops/model_ops.py +41 -41
- snowflake/ml/model/_client/ops/service_ops.py +217 -32
- snowflake/ml/model/_client/service/model_deployment_spec.py +359 -65
- snowflake/ml/model/_client/service/model_deployment_spec_schema.py +69 -24
- snowflake/ml/model/_client/sql/model.py +8 -8
- snowflake/ml/model/_client/sql/model_version.py +26 -26
- snowflake/ml/model/_client/sql/service.py +17 -26
- snowflake/ml/model/_client/sql/stage.py +2 -2
- snowflake/ml/model/_client/sql/tag.py +6 -6
- snowflake/ml/model/_model_composer/model_composer.py +58 -32
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -16
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +14 -13
- snowflake/ml/model/_model_composer/model_method/model_method.py +3 -3
- snowflake/ml/model/_packager/model_env/model_env.py +28 -25
- snowflake/ml/model/_packager/model_handler.py +4 -4
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +47 -5
- snowflake/ml/model/_packager/model_handlers/catboost.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +9 -5
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +7 -21
- snowflake/ml/model/_packager/model_handlers/keras.py +4 -4
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +4 -14
- snowflake/ml/model/_packager/model_handlers/mlflow.py +3 -3
- snowflake/ml/model/_packager/model_handlers/pytorch.py +5 -6
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +5 -5
- snowflake/ml/model/_packager/model_handlers/sklearn.py +104 -46
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +3 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +11 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +6 -6
- snowflake/ml/model/_packager/model_handlers/xgboost.py +21 -22
- snowflake/ml/model/_packager/model_meta/model_blob_meta.py +2 -2
- snowflake/ml/model/_packager/model_meta/model_meta.py +39 -38
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +14 -11
- snowflake/ml/model/_packager/model_meta_migrator/base_migrator.py +3 -3
- snowflake/ml/model/_packager/model_meta_migrator/migrator_plans.py +3 -3
- snowflake/ml/model/_packager/model_meta_migrator/migrator_v1.py +4 -4
- snowflake/ml/model/_packager/model_packager.py +11 -9
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +32 -1
- snowflake/ml/model/_packager/model_runtime/model_runtime.py +4 -2
- snowflake/ml/model/_signatures/core.py +16 -24
- snowflake/ml/model/_signatures/dmatrix_handler.py +17 -4
- snowflake/ml/model/_signatures/utils.py +6 -6
- snowflake/ml/model/custom_model.py +24 -11
- snowflake/ml/model/model_signature.py +12 -23
- snowflake/ml/model/models/huggingface_pipeline.py +7 -4
- snowflake/ml/model/type_hints.py +3 -3
- snowflake/ml/modeling/_internal/estimator_utils.py +7 -7
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +6 -6
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +7 -7
- snowflake/ml/modeling/_internal/model_specifications.py +8 -10
- snowflake/ml/modeling/_internal/model_trainer.py +5 -5
- snowflake/ml/modeling/_internal/model_trainer_builder.py +6 -6
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +30 -30
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +13 -13
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +31 -31
- snowflake/ml/modeling/_internal/snowpark_implementations/xgboost_external_memory_trainer.py +19 -19
- snowflake/ml/modeling/_internal/transformer_protocols.py +17 -17
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +9 -1
- snowflake/ml/modeling/cluster/affinity_propagation.py +9 -1
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +9 -1
- snowflake/ml/modeling/cluster/birch.py +9 -1
- snowflake/ml/modeling/cluster/bisecting_k_means.py +9 -1
- snowflake/ml/modeling/cluster/dbscan.py +9 -1
- snowflake/ml/modeling/cluster/feature_agglomeration.py +9 -1
- snowflake/ml/modeling/cluster/k_means.py +9 -1
- snowflake/ml/modeling/cluster/mean_shift.py +9 -1
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +9 -1
- snowflake/ml/modeling/cluster/optics.py +9 -1
- snowflake/ml/modeling/cluster/spectral_biclustering.py +9 -1
- snowflake/ml/modeling/cluster/spectral_clustering.py +9 -1
- snowflake/ml/modeling/cluster/spectral_coclustering.py +9 -1
- snowflake/ml/modeling/compose/column_transformer.py +9 -1
- snowflake/ml/modeling/compose/transformed_target_regressor.py +9 -1
- snowflake/ml/modeling/covariance/elliptic_envelope.py +9 -1
- snowflake/ml/modeling/covariance/empirical_covariance.py +9 -1
- snowflake/ml/modeling/covariance/graphical_lasso.py +9 -1
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +9 -1
- snowflake/ml/modeling/covariance/ledoit_wolf.py +9 -1
- snowflake/ml/modeling/covariance/min_cov_det.py +9 -1
- snowflake/ml/modeling/covariance/oas.py +9 -1
- snowflake/ml/modeling/covariance/shrunk_covariance.py +9 -1
- snowflake/ml/modeling/decomposition/dictionary_learning.py +9 -1
- snowflake/ml/modeling/decomposition/factor_analysis.py +9 -1
- snowflake/ml/modeling/decomposition/fast_ica.py +9 -1
- snowflake/ml/modeling/decomposition/incremental_pca.py +9 -1
- snowflake/ml/modeling/decomposition/kernel_pca.py +9 -1
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +9 -1
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +9 -1
- snowflake/ml/modeling/decomposition/pca.py +9 -1
- snowflake/ml/modeling/decomposition/sparse_pca.py +9 -1
- snowflake/ml/modeling/decomposition/truncated_svd.py +9 -1
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +9 -1
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +9 -1
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +9 -1
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +9 -1
- snowflake/ml/modeling/ensemble/bagging_classifier.py +9 -1
- snowflake/ml/modeling/ensemble/bagging_regressor.py +9 -1
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +9 -1
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +9 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +9 -1
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +9 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +9 -1
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +9 -1
- snowflake/ml/modeling/ensemble/isolation_forest.py +9 -1
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +9 -1
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +9 -1
- snowflake/ml/modeling/ensemble/stacking_regressor.py +9 -1
- snowflake/ml/modeling/ensemble/voting_classifier.py +9 -1
- snowflake/ml/modeling/ensemble/voting_regressor.py +9 -1
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +9 -1
- snowflake/ml/modeling/feature_selection/select_fdr.py +9 -1
- snowflake/ml/modeling/feature_selection/select_fpr.py +9 -1
- snowflake/ml/modeling/feature_selection/select_fwe.py +9 -1
- snowflake/ml/modeling/feature_selection/select_k_best.py +9 -1
- snowflake/ml/modeling/feature_selection/select_percentile.py +9 -1
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +9 -1
- snowflake/ml/modeling/feature_selection/variance_threshold.py +9 -1
- snowflake/ml/modeling/framework/_utils.py +10 -10
- snowflake/ml/modeling/framework/base.py +32 -32
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +9 -1
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +9 -1
- snowflake/ml/modeling/impute/__init__.py +1 -1
- snowflake/ml/modeling/impute/iterative_imputer.py +9 -1
- snowflake/ml/modeling/impute/knn_imputer.py +9 -1
- snowflake/ml/modeling/impute/missing_indicator.py +9 -1
- snowflake/ml/modeling/impute/simple_imputer.py +5 -5
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +9 -1
- snowflake/ml/modeling/kernel_approximation/nystroem.py +9 -1
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +9 -1
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +9 -1
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +9 -1
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +9 -1
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +9 -1
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +9 -1
- snowflake/ml/modeling/linear_model/ard_regression.py +9 -1
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +9 -1
- snowflake/ml/modeling/linear_model/elastic_net.py +9 -1
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +9 -1
- snowflake/ml/modeling/linear_model/gamma_regressor.py +9 -1
- snowflake/ml/modeling/linear_model/huber_regressor.py +9 -1
- snowflake/ml/modeling/linear_model/lars.py +9 -1
- snowflake/ml/modeling/linear_model/lars_cv.py +9 -1
- snowflake/ml/modeling/linear_model/lasso.py +9 -1
- snowflake/ml/modeling/linear_model/lasso_cv.py +9 -1
- snowflake/ml/modeling/linear_model/lasso_lars.py +9 -1
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +9 -1
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +9 -1
- snowflake/ml/modeling/linear_model/linear_regression.py +9 -1
- snowflake/ml/modeling/linear_model/logistic_regression.py +9 -1
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +9 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +9 -1
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +9 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +9 -1
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +9 -1
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +9 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +9 -1
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +9 -1
- snowflake/ml/modeling/linear_model/perceptron.py +9 -1
- snowflake/ml/modeling/linear_model/poisson_regressor.py +9 -1
- snowflake/ml/modeling/linear_model/ransac_regressor.py +9 -1
- snowflake/ml/modeling/linear_model/ridge.py +9 -1
- snowflake/ml/modeling/linear_model/ridge_classifier.py +9 -1
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +9 -1
- snowflake/ml/modeling/linear_model/ridge_cv.py +9 -1
- snowflake/ml/modeling/linear_model/sgd_classifier.py +9 -1
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +9 -1
- snowflake/ml/modeling/linear_model/sgd_regressor.py +9 -1
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +9 -1
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +9 -1
- snowflake/ml/modeling/manifold/isomap.py +9 -1
- snowflake/ml/modeling/manifold/mds.py +9 -1
- snowflake/ml/modeling/manifold/spectral_embedding.py +9 -1
- snowflake/ml/modeling/manifold/tsne.py +9 -1
- snowflake/ml/modeling/metrics/__init__.py +1 -1
- snowflake/ml/modeling/metrics/classification.py +39 -39
- snowflake/ml/modeling/metrics/metrics_utils.py +12 -12
- snowflake/ml/modeling/metrics/ranking.py +7 -7
- snowflake/ml/modeling/metrics/regression.py +13 -13
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +9 -1
- snowflake/ml/modeling/mixture/gaussian_mixture.py +9 -1
- snowflake/ml/modeling/model_selection/__init__.py +1 -1
- snowflake/ml/modeling/model_selection/grid_search_cv.py +7 -7
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +7 -7
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +9 -1
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +9 -1
- snowflake/ml/modeling/multiclass/output_code_classifier.py +9 -1
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +9 -1
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +9 -1
- snowflake/ml/modeling/naive_bayes/complement_nb.py +9 -1
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +9 -1
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +9 -1
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +9 -1
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +9 -1
- snowflake/ml/modeling/neighbors/kernel_density.py +9 -1
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +9 -1
- snowflake/ml/modeling/neighbors/nearest_centroid.py +9 -1
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +9 -1
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +9 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +9 -1
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +9 -1
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +9 -1
- snowflake/ml/modeling/neural_network/mlp_classifier.py +9 -1
- snowflake/ml/modeling/neural_network/mlp_regressor.py +9 -1
- snowflake/ml/modeling/pipeline/__init__.py +1 -1
- snowflake/ml/modeling/pipeline/pipeline.py +18 -18
- snowflake/ml/modeling/preprocessing/__init__.py +1 -1
- snowflake/ml/modeling/preprocessing/k_bins_discretizer.py +13 -13
- snowflake/ml/modeling/preprocessing/max_abs_scaler.py +4 -4
- snowflake/ml/modeling/preprocessing/min_max_scaler.py +8 -8
- snowflake/ml/modeling/preprocessing/normalizer.py +0 -1
- snowflake/ml/modeling/preprocessing/one_hot_encoder.py +28 -28
- snowflake/ml/modeling/preprocessing/ordinal_encoder.py +9 -9
- snowflake/ml/modeling/preprocessing/polynomial_features.py +9 -1
- snowflake/ml/modeling/preprocessing/robust_scaler.py +7 -7
- snowflake/ml/modeling/preprocessing/standard_scaler.py +5 -5
- snowflake/ml/modeling/semi_supervised/label_propagation.py +9 -1
- snowflake/ml/modeling/semi_supervised/label_spreading.py +9 -1
- snowflake/ml/modeling/svm/linear_svc.py +9 -1
- snowflake/ml/modeling/svm/linear_svr.py +9 -1
- snowflake/ml/modeling/svm/nu_svc.py +9 -1
- snowflake/ml/modeling/svm/nu_svr.py +9 -1
- snowflake/ml/modeling/svm/svc.py +9 -1
- snowflake/ml/modeling/svm/svr.py +9 -1
- snowflake/ml/modeling/tree/decision_tree_classifier.py +9 -1
- snowflake/ml/modeling/tree/decision_tree_regressor.py +9 -1
- snowflake/ml/modeling/tree/extra_tree_classifier.py +9 -1
- snowflake/ml/modeling/tree/extra_tree_regressor.py +9 -1
- snowflake/ml/modeling/xgboost/xgb_classifier.py +9 -1
- snowflake/ml/modeling/xgboost/xgb_regressor.py +9 -1
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +9 -1
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +9 -1
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +26 -26
- snowflake/ml/monitoring/_manager/model_monitor_manager.py +5 -5
- snowflake/ml/monitoring/entities/model_monitor_config.py +6 -6
- snowflake/ml/monitoring/explain_visualize.py +286 -0
- snowflake/ml/registry/_manager/model_manager.py +55 -32
- snowflake/ml/registry/registry.py +39 -31
- snowflake/ml/utils/authentication.py +2 -2
- snowflake/ml/utils/connection_params.py +5 -5
- snowflake/ml/utils/sparse.py +5 -4
- snowflake/ml/utils/sql_client.py +1 -2
- snowflake/ml/version.py +2 -1
- {snowflake_ml_python-1.8.2.dist-info → snowflake_ml_python-1.8.4.dist-info}/METADATA +55 -14
- snowflake_ml_python-1.8.4.dist-info/RECORD +419 -0
- {snowflake_ml_python-1.8.2.dist-info → snowflake_ml_python-1.8.4.dist-info}/WHEEL +1 -1
- snowflake/ml/model/_packager/model_meta/_packaging_requirements.py +0 -1
- snowflake/ml/modeling/_internal/constants.py +0 -2
- snowflake_ml_python-1.8.2.dist-info/RECORD +0 -420
- {snowflake_ml_python-1.8.2.dist-info → snowflake_ml_python-1.8.4.dist-info}/licenses/LICENSE.txt +0 -0
- {snowflake_ml_python-1.8.2.dist-info → snowflake_ml_python-1.8.4.dist-info}/top_level.txt +0 -0
@@ -8,25 +8,13 @@ import re
|
|
8
8
|
import warnings
|
9
9
|
from dataclasses import dataclass
|
10
10
|
from enum import Enum
|
11
|
-
from typing import
|
12
|
-
Any,
|
13
|
-
Callable,
|
14
|
-
Dict,
|
15
|
-
List,
|
16
|
-
Literal,
|
17
|
-
Optional,
|
18
|
-
Tuple,
|
19
|
-
TypeVar,
|
20
|
-
Union,
|
21
|
-
cast,
|
22
|
-
overload,
|
23
|
-
)
|
11
|
+
from typing import Any, Callable, Literal, Optional, TypeVar, Union, cast, overload
|
24
12
|
|
25
13
|
import packaging.version as pkg_version
|
26
|
-
import snowflake.ml.version as snowml_version
|
27
14
|
from pytimeparse.timeparse import timeparse
|
28
15
|
from typing_extensions import Concatenate, ParamSpec
|
29
16
|
|
17
|
+
import snowflake.ml.version as snowml_version
|
30
18
|
from snowflake.ml import dataset
|
31
19
|
from snowflake.ml._internal import telemetry
|
32
20
|
from snowflake.ml._internal.exceptions import (
|
@@ -164,7 +152,7 @@ class _FeatureStoreConfig:
|
|
164
152
|
|
165
153
|
|
166
154
|
def switch_warehouse(
|
167
|
-
f: Callable[Concatenate[FeatureStore, _Args], _RT]
|
155
|
+
f: Callable[Concatenate[FeatureStore, _Args], _RT],
|
168
156
|
) -> Callable[Concatenate[FeatureStore, _Args], _RT]:
|
169
157
|
@functools.wraps(f)
|
170
158
|
def wrapper(self: FeatureStore, /, *args: _Args.args, **kargs: _Args.kwargs) -> _RT:
|
@@ -189,7 +177,7 @@ def dispatch_decorator() -> Callable[
|
|
189
177
|
Callable[Concatenate[FeatureStore, _Args], _RT],
|
190
178
|
]:
|
191
179
|
def decorator(
|
192
|
-
f: Callable[Concatenate[FeatureStore, _Args], _RT]
|
180
|
+
f: Callable[Concatenate[FeatureStore, _Args], _RT],
|
193
181
|
) -> Callable[Concatenate[FeatureStore, _Args], _RT]:
|
194
182
|
@telemetry.send_api_usage_telemetry(project=_PROJECT)
|
195
183
|
@switch_warehouse
|
@@ -500,7 +488,7 @@ class FeatureStore:
|
|
500
488
|
Example::
|
501
489
|
|
502
490
|
>>> fs = FeatureStore(...)
|
503
|
-
>>> # draft_fv is a local object that hasn't
|
491
|
+
>>> # draft_fv is a local object that hasn't materialized to Snowflake backend yet.
|
504
492
|
>>> feature_df = session.sql("select f_1, f_2 from source_table")
|
505
493
|
>>> draft_fv = FeatureView("my_fv", [entities], feature_df)
|
506
494
|
>>> print(draft_fv.status)
|
@@ -837,7 +825,7 @@ class FeatureStore:
|
|
837
825
|
entity_name = SqlIdentifier(entity_name)
|
838
826
|
return self._optimized_find_feature_views(entity_name, feature_view_name)
|
839
827
|
else:
|
840
|
-
output_values:
|
828
|
+
output_values: list[list[Any]] = []
|
841
829
|
for row, _ in self._get_fv_backend_representations(feature_view_name, prefix_match=True):
|
842
830
|
self._extract_feature_view_info(row, output_values)
|
843
831
|
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
@@ -861,7 +849,7 @@ class FeatureStore:
|
|
861
849
|
Example::
|
862
850
|
|
863
851
|
>>> fs = FeatureStore(...)
|
864
|
-
>>> # draft_fv is a local object that hasn't
|
852
|
+
>>> # draft_fv is a local object that hasn't materialized to Snowflake backend yet.
|
865
853
|
>>> draft_fv = FeatureView(
|
866
854
|
... name='foo',
|
867
855
|
... entities=[e1],
|
@@ -1353,10 +1341,10 @@ class FeatureStore:
|
|
1353
1341
|
def retrieve_feature_values(
|
1354
1342
|
self,
|
1355
1343
|
spine_df: DataFrame,
|
1356
|
-
features: Union[
|
1344
|
+
features: Union[list[Union[FeatureView, FeatureViewSlice]], list[str]],
|
1357
1345
|
*,
|
1358
1346
|
spine_timestamp_col: Optional[str] = None,
|
1359
|
-
exclude_columns: Optional[
|
1347
|
+
exclude_columns: Optional[list[str]] = None,
|
1360
1348
|
include_feature_view_timestamp_col: bool = False,
|
1361
1349
|
) -> DataFrame:
|
1362
1350
|
"""
|
@@ -1401,11 +1389,11 @@ class FeatureStore:
|
|
1401
1389
|
if len(features) == 0:
|
1402
1390
|
raise ValueError("features cannot be empty")
|
1403
1391
|
if isinstance(features[0], str):
|
1404
|
-
features = self._load_serialized_feature_views(cast(
|
1392
|
+
features = self._load_serialized_feature_views(cast(list[str], features))
|
1405
1393
|
|
1406
1394
|
df, _ = self._join_features(
|
1407
1395
|
spine_df,
|
1408
|
-
cast(
|
1396
|
+
cast(list[Union[FeatureView, FeatureViewSlice]], features),
|
1409
1397
|
spine_timestamp_col,
|
1410
1398
|
include_feature_view_timestamp_col,
|
1411
1399
|
)
|
@@ -1419,12 +1407,12 @@ class FeatureStore:
|
|
1419
1407
|
def generate_training_set(
|
1420
1408
|
self,
|
1421
1409
|
spine_df: DataFrame,
|
1422
|
-
features:
|
1410
|
+
features: list[Union[FeatureView, FeatureViewSlice]],
|
1423
1411
|
*,
|
1424
1412
|
save_as: Optional[str] = None,
|
1425
1413
|
spine_timestamp_col: Optional[str] = None,
|
1426
|
-
spine_label_cols: Optional[
|
1427
|
-
exclude_columns: Optional[
|
1414
|
+
spine_label_cols: Optional[list[str]] = None,
|
1415
|
+
exclude_columns: Optional[list[str]] = None,
|
1428
1416
|
include_feature_view_timestamp_col: bool = False,
|
1429
1417
|
) -> DataFrame:
|
1430
1418
|
"""
|
@@ -1515,12 +1503,12 @@ class FeatureStore:
|
|
1515
1503
|
self,
|
1516
1504
|
name: str,
|
1517
1505
|
spine_df: DataFrame,
|
1518
|
-
features:
|
1506
|
+
features: list[Union[FeatureView, FeatureViewSlice]],
|
1519
1507
|
*,
|
1520
1508
|
version: Optional[str] = None,
|
1521
1509
|
spine_timestamp_col: Optional[str] = None,
|
1522
|
-
spine_label_cols: Optional[
|
1523
|
-
exclude_columns: Optional[
|
1510
|
+
spine_label_cols: Optional[list[str]] = None,
|
1511
|
+
exclude_columns: Optional[list[str]] = None,
|
1524
1512
|
include_feature_view_timestamp_col: bool = False,
|
1525
1513
|
desc: str = "",
|
1526
1514
|
output_type: Literal["dataset"] = "dataset",
|
@@ -1532,13 +1520,13 @@ class FeatureStore:
|
|
1532
1520
|
self,
|
1533
1521
|
name: str,
|
1534
1522
|
spine_df: DataFrame,
|
1535
|
-
features:
|
1523
|
+
features: list[Union[FeatureView, FeatureViewSlice]],
|
1536
1524
|
*,
|
1537
1525
|
output_type: Literal["table"],
|
1538
1526
|
version: Optional[str] = None,
|
1539
1527
|
spine_timestamp_col: Optional[str] = None,
|
1540
|
-
spine_label_cols: Optional[
|
1541
|
-
exclude_columns: Optional[
|
1528
|
+
spine_label_cols: Optional[list[str]] = None,
|
1529
|
+
exclude_columns: Optional[list[str]] = None,
|
1542
1530
|
include_feature_view_timestamp_col: bool = False,
|
1543
1531
|
desc: str = "",
|
1544
1532
|
) -> DataFrame:
|
@@ -1549,12 +1537,12 @@ class FeatureStore:
|
|
1549
1537
|
self,
|
1550
1538
|
name: str,
|
1551
1539
|
spine_df: DataFrame,
|
1552
|
-
features:
|
1540
|
+
features: list[Union[FeatureView, FeatureViewSlice]],
|
1553
1541
|
*,
|
1554
1542
|
version: Optional[str] = None,
|
1555
1543
|
spine_timestamp_col: Optional[str] = None,
|
1556
|
-
spine_label_cols: Optional[
|
1557
|
-
exclude_columns: Optional[
|
1544
|
+
spine_label_cols: Optional[list[str]] = None,
|
1545
|
+
exclude_columns: Optional[list[str]] = None,
|
1558
1546
|
include_feature_view_timestamp_col: bool = False,
|
1559
1547
|
desc: str = "",
|
1560
1548
|
output_type: Literal["dataset", "table"] = "dataset",
|
@@ -1687,7 +1675,7 @@ class FeatureStore:
|
|
1687
1675
|
) from e
|
1688
1676
|
|
1689
1677
|
@dispatch_decorator()
|
1690
|
-
def load_feature_views_from_dataset(self, ds: dataset.Dataset) ->
|
1678
|
+
def load_feature_views_from_dataset(self, ds: dataset.Dataset) -> list[Union[FeatureView, FeatureViewSlice]]:
|
1691
1679
|
"""
|
1692
1680
|
Retrieve FeatureViews used during Dataset construction.
|
1693
1681
|
|
@@ -1798,7 +1786,7 @@ class FeatureStore:
|
|
1798
1786
|
)
|
1799
1787
|
return existing_fv
|
1800
1788
|
|
1801
|
-
def _recompose_join_keys(self, join_key: str) ->
|
1789
|
+
def _recompose_join_keys(self, join_key: str) -> list[str]:
|
1802
1790
|
# ALLOWED_VALUES in TAG will follow format ["key_1,key2,..."]
|
1803
1791
|
# since keys are already resolved following the SQL identifier rule on the write path,
|
1804
1792
|
# we simply parse the keys back and wrap them with quotes to preserve cases
|
@@ -1906,10 +1894,10 @@ class FeatureStore:
|
|
1906
1894
|
def _join_features(
|
1907
1895
|
self,
|
1908
1896
|
spine_df: DataFrame,
|
1909
|
-
features:
|
1897
|
+
features: list[Union[FeatureView, FeatureViewSlice]],
|
1910
1898
|
spine_timestamp_col: Optional[SqlIdentifier],
|
1911
1899
|
include_feature_view_timestamp_col: bool,
|
1912
|
-
) ->
|
1900
|
+
) -> tuple[DataFrame, list[SqlIdentifier]]:
|
1913
1901
|
for f in features:
|
1914
1902
|
f = f.feature_view_ref if isinstance(f, FeatureViewSlice) else f
|
1915
1903
|
if f.status == FeatureViewStatus.DRAFT:
|
@@ -2070,7 +2058,7 @@ class FeatureStore:
|
|
2070
2058
|
f_df: DataFrame,
|
2071
2059
|
f_table_name: str,
|
2072
2060
|
f_ts_col: SqlIdentifier,
|
2073
|
-
join_keys:
|
2061
|
+
join_keys: list[SqlIdentifier],
|
2074
2062
|
) -> str:
|
2075
2063
|
s_df = self._session.sql(s_query)
|
2076
2064
|
s_only_cols = [col for col in to_sql_identifiers(s_df.columns) if col not in [*join_keys, s_ts_col]]
|
@@ -2078,7 +2066,7 @@ class FeatureStore:
|
|
2078
2066
|
join_keys_str = ", ".join(join_keys)
|
2079
2067
|
temp_prefix = "_FS_TEMP_"
|
2080
2068
|
|
2081
|
-
def join_cols(cols:
|
2069
|
+
def join_cols(cols: list[SqlIdentifier], end_comma: bool, rename: bool, prefix: str = "") -> str:
|
2082
2070
|
if not cols:
|
2083
2071
|
return ""
|
2084
2072
|
cols = [f"{prefix}{col}" for col in cols] # type: ignore[misc]
|
@@ -2174,7 +2162,7 @@ class FeatureStore:
|
|
2174
2162
|
# TODO: SHOW DYNAMIC TABLES is very slow while other show objects are fast, investigate with DT in SNOW-902804.
|
2175
2163
|
def _get_fv_backend_representations(
|
2176
2164
|
self, object_name: Optional[SqlIdentifier], prefix_match: bool = False
|
2177
|
-
) ->
|
2165
|
+
) -> list[tuple[Row, _FeatureStoreObjTypes]]:
|
2178
2166
|
dynamic_table_results = [
|
2179
2167
|
(d, _FeatureStoreObjTypes.MANAGED_FEATURE_VIEW)
|
2180
2168
|
for d in self._find_object("DYNAMIC TABLES", object_name, prefix_match)
|
@@ -2232,18 +2220,18 @@ class FeatureStore:
|
|
2232
2220
|
filters = [lambda d: d["entityName"].startswith(feature_view_name.resolved())] if feature_view_name else None
|
2233
2221
|
res = self._lookup_tagged_objects(self._get_entity_name(entity_name), filters)
|
2234
2222
|
|
2235
|
-
output_values:
|
2223
|
+
output_values: list[list[Any]] = []
|
2236
2224
|
for r in res:
|
2237
2225
|
row = fv_maps[SqlIdentifier(r["entityName"], case_sensitive=True)]
|
2238
2226
|
self._extract_feature_view_info(row, output_values)
|
2239
2227
|
|
2240
2228
|
return self._session.create_dataframe(output_values, schema=_LIST_FEATURE_VIEW_SCHEMA)
|
2241
2229
|
|
2242
|
-
def _extract_feature_view_info(self, row: Row, output_values:
|
2230
|
+
def _extract_feature_view_info(self, row: Row, output_values: list[list[Any]]) -> None:
|
2243
2231
|
name, version = row["name"].split(_FEATURE_VIEW_NAME_DELIMITER)
|
2244
2232
|
fv_metadata, _ = self._lookup_feature_view_metadata(row, FeatureView._get_physical_name(name, version))
|
2245
2233
|
|
2246
|
-
values:
|
2234
|
+
values: list[Any] = []
|
2247
2235
|
values.append(name)
|
2248
2236
|
values.append(version)
|
2249
2237
|
values.append(row["database_name"])
|
@@ -2259,7 +2247,7 @@ class FeatureStore:
|
|
2259
2247
|
values.append(json.dumps(self._extract_cluster_by_columns(row["cluster_by"])) if "cluster_by" in row else None)
|
2260
2248
|
output_values.append(values)
|
2261
2249
|
|
2262
|
-
def _lookup_feature_view_metadata(self, row: Row, fv_name: str) ->
|
2250
|
+
def _lookup_feature_view_metadata(self, row: Row, fv_name: str) -> tuple[_FeatureViewMetadata, str]:
|
2263
2251
|
if len(row["text"]) == 0:
|
2264
2252
|
# NOTE: if this is a shared feature view, then text column will be empty due to privacy constraints.
|
2265
2253
|
# So instead of looking at original query text, we will obtain metadata by querying the tag value.
|
@@ -2288,7 +2276,7 @@ class FeatureStore:
|
|
2288
2276
|
query = m.group("query")
|
2289
2277
|
return (fv_metadata, query)
|
2290
2278
|
|
2291
|
-
def _compose_feature_view(self, row: Row, obj_type: _FeatureStoreObjTypes, entity_list:
|
2279
|
+
def _compose_feature_view(self, row: Row, obj_type: _FeatureStoreObjTypes, entity_list: list[Row]) -> FeatureView:
|
2292
2280
|
def find_and_compose_entity(name: str) -> Entity:
|
2293
2281
|
name = SqlIdentifier(name).resolved()
|
2294
2282
|
for e in entity_list:
|
@@ -2374,7 +2362,7 @@ class FeatureStore:
|
|
2374
2362
|
)
|
2375
2363
|
return fv
|
2376
2364
|
|
2377
|
-
def _fetch_column_descs(self, obj_type: str, obj_name: SqlIdentifier) ->
|
2365
|
+
def _fetch_column_descs(self, obj_type: str, obj_name: SqlIdentifier) -> dict[str, str]:
|
2378
2366
|
res = self._session.sql(f"DESC {obj_type} {self._get_fully_qualified_name(obj_name)}").collect(
|
2379
2367
|
statement_params=self._telemetry_stmp
|
2380
2368
|
)
|
@@ -2390,7 +2378,7 @@ class FeatureStore:
|
|
2390
2378
|
object_type: str,
|
2391
2379
|
object_name: Optional[SqlIdentifier],
|
2392
2380
|
prefix_match: bool = False,
|
2393
|
-
) ->
|
2381
|
+
) -> list[Row]:
|
2394
2382
|
"""Try to find an object by given type and name pattern.
|
2395
2383
|
|
2396
2384
|
Args:
|
@@ -2443,9 +2431,9 @@ class FeatureStore:
|
|
2443
2431
|
return result
|
2444
2432
|
|
2445
2433
|
def _load_serialized_feature_views(
|
2446
|
-
self, serialized_feature_views:
|
2447
|
-
) ->
|
2448
|
-
results:
|
2434
|
+
self, serialized_feature_views: list[str]
|
2435
|
+
) -> list[Union[FeatureView, FeatureViewSlice]]:
|
2436
|
+
results: list[Union[FeatureView, FeatureViewSlice]] = []
|
2449
2437
|
for obj in serialized_feature_views:
|
2450
2438
|
try:
|
2451
2439
|
obj_type = json.loads(obj)[_FEATURE_OBJ_TYPE]
|
@@ -2461,14 +2449,14 @@ class FeatureStore:
|
|
2461
2449
|
return results
|
2462
2450
|
|
2463
2451
|
def _load_compact_feature_views(
|
2464
|
-
self, compact_feature_views:
|
2465
|
-
) ->
|
2466
|
-
results:
|
2452
|
+
self, compact_feature_views: list[str]
|
2453
|
+
) -> list[Union[FeatureView, FeatureViewSlice]]:
|
2454
|
+
results: list[Union[FeatureView, FeatureViewSlice]] = []
|
2467
2455
|
for obj in compact_feature_views:
|
2468
2456
|
results.append(FeatureView._load_from_compact_repr(self._session, obj))
|
2469
2457
|
return results
|
2470
2458
|
|
2471
|
-
def _exclude_columns(self, df: DataFrame, exclude_columns:
|
2459
|
+
def _exclude_columns(self, df: DataFrame, exclude_columns: list[str]) -> DataFrame:
|
2472
2460
|
exclude_columns = to_sql_identifiers(exclude_columns) # type: ignore[assignment]
|
2473
2461
|
df_cols = to_sql_identifiers(df.columns)
|
2474
2462
|
for col in exclude_columns:
|
@@ -2501,8 +2489,8 @@ class FeatureStore:
|
|
2501
2489
|
)
|
2502
2490
|
|
2503
2491
|
def _filter_results(
|
2504
|
-
self, results:
|
2505
|
-
) ->
|
2492
|
+
self, results: list[dict[str, str]], filter_fns: Optional[list[Callable[[dict[str, str]], bool]]] = None
|
2493
|
+
) -> list[dict[str, str]]:
|
2506
2494
|
if filter_fns is None:
|
2507
2495
|
return results
|
2508
2496
|
|
@@ -2513,8 +2501,8 @@ class FeatureStore:
|
|
2513
2501
|
return filtered_results
|
2514
2502
|
|
2515
2503
|
def _lookup_tags(
|
2516
|
-
self, domain: str, obj_name: str, filter_fns: Optional[
|
2517
|
-
) ->
|
2504
|
+
self, domain: str, obj_name: str, filter_fns: Optional[list[Callable[[dict[str, str]], bool]]] = None
|
2505
|
+
) -> list[dict[str, str]]:
|
2518
2506
|
"""
|
2519
2507
|
Lookup tag values for a given object, optionally apply filters on the results.
|
2520
2508
|
|
@@ -2552,8 +2540,8 @@ class FeatureStore:
|
|
2552
2540
|
) from e
|
2553
2541
|
|
2554
2542
|
def _lookup_tagged_objects(
|
2555
|
-
self, tag_name: str, filter_fns: Optional[
|
2556
|
-
) ->
|
2543
|
+
self, tag_name: str, filter_fns: Optional[list[Callable[[dict[str, str]], bool]]] = None
|
2544
|
+
) -> list[dict[str, str]]:
|
2557
2545
|
"""
|
2558
2546
|
Lookup objects based on specified tag name, optionally apply filters on the results.
|
2559
2547
|
|
@@ -2589,7 +2577,7 @@ class FeatureStore:
|
|
2589
2577
|
original_exception=RuntimeError(f"Failed to lookup tagged objects for {tag_name}: {e}"),
|
2590
2578
|
) from e
|
2591
2579
|
|
2592
|
-
def _collapse_object_versions(self) ->
|
2580
|
+
def _collapse_object_versions(self) -> list[pkg_version.Version]:
|
2593
2581
|
try:
|
2594
2582
|
res = self._lookup_tagged_objects(_FEATURE_STORE_OBJECT_TAG)
|
2595
2583
|
except Exception:
|
@@ -2636,7 +2624,7 @@ class FeatureStore:
|
|
2636
2624
|
return feature_view
|
2637
2625
|
|
2638
2626
|
@staticmethod
|
2639
|
-
def _extract_cluster_by_columns(cluster_by_clause: str) ->
|
2627
|
+
def _extract_cluster_by_columns(cluster_by_clause: str) -> list[str]:
|
2640
2628
|
# Use regex to extract elements inside the parentheses.
|
2641
2629
|
match = re.search(r"\((.*?)\)", cluster_by_clause)
|
2642
2630
|
if match:
|
@@ -6,7 +6,7 @@ import warnings
|
|
6
6
|
from collections import OrderedDict
|
7
7
|
from dataclasses import asdict, dataclass
|
8
8
|
from enum import Enum
|
9
|
-
from typing import Any,
|
9
|
+
from typing import Any, Optional, Union
|
10
10
|
|
11
11
|
from snowflake.ml._internal.exceptions import (
|
12
12
|
error_codes,
|
@@ -49,7 +49,7 @@ _RESULT_SCAN_QUERY_PATTERN = re.compile(
|
|
49
49
|
class _FeatureViewMetadata:
|
50
50
|
"""Represent metadata tracked on top of FV backend object"""
|
51
51
|
|
52
|
-
entities:
|
52
|
+
entities: list[str]
|
53
53
|
timestamp_col: str
|
54
54
|
|
55
55
|
def to_json(self) -> str:
|
@@ -73,7 +73,7 @@ class _CompactRepresentation:
|
|
73
73
|
sch: str
|
74
74
|
name: str
|
75
75
|
version: str
|
76
|
-
feature_indices: Optional[
|
76
|
+
feature_indices: Optional[list[int]] = None
|
77
77
|
|
78
78
|
def to_json(self) -> str:
|
79
79
|
return json.dumps(asdict(self))
|
@@ -110,7 +110,7 @@ class FeatureViewStatus(Enum):
|
|
110
110
|
@dataclass(frozen=True)
|
111
111
|
class FeatureViewSlice:
|
112
112
|
feature_view_ref: FeatureView
|
113
|
-
names:
|
113
|
+
names: list[SqlIdentifier]
|
114
114
|
|
115
115
|
def __repr__(self) -> str:
|
116
116
|
states = (f"{k}={v}" for k, v in vars(self).items())
|
@@ -148,7 +148,7 @@ class FeatureViewSlice:
|
|
148
148
|
feature_indices=self._feature_names_to_indices(),
|
149
149
|
)
|
150
150
|
|
151
|
-
def _feature_names_to_indices(self) ->
|
151
|
+
def _feature_names_to_indices(self) -> list[int]:
|
152
152
|
name_to_indices_map = {name: idx for idx, name in enumerate(self.feature_view_ref.feature_names)}
|
153
153
|
return [name_to_indices_map[n] for n in self.names]
|
154
154
|
|
@@ -161,7 +161,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
161
161
|
def __init__(
|
162
162
|
self,
|
163
163
|
name: str,
|
164
|
-
entities:
|
164
|
+
entities: list[Entity],
|
165
165
|
feature_df: DataFrame,
|
166
166
|
*,
|
167
167
|
timestamp_col: Optional[str] = None,
|
@@ -170,7 +170,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
170
170
|
warehouse: Optional[str] = None,
|
171
171
|
initialize: str = "ON_CREATE",
|
172
172
|
refresh_mode: str = "AUTO",
|
173
|
-
cluster_by: Optional[
|
173
|
+
cluster_by: Optional[list[str]] = None,
|
174
174
|
**_kwargs: Any,
|
175
175
|
) -> None:
|
176
176
|
"""
|
@@ -198,7 +198,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
198
198
|
after you register the feature view. It supports ON_CREATE (default) or ON_SCHEDULE. ON_CREATE refreshes
|
199
199
|
the feature view synchronously at creation. ON_SCHEDULE refreshes the feature view at the next scheduled
|
200
200
|
refresh. It is only effective when refresh_freq is not None.
|
201
|
-
refresh_mode: The refresh mode of managed feature view. The value can be 'AUTO', 'FULL' or '
|
201
|
+
refresh_mode: The refresh mode of managed feature view. The value can be 'AUTO', 'FULL' or 'INCREMENTAL'.
|
202
202
|
For managed feature view, the default value is 'AUTO'. For static feature view it has no effect.
|
203
203
|
Check https://docs.snowflake.com/en/sql-reference/sql/create-dynamic-table for for details.
|
204
204
|
cluster_by: Columns to cluster the feature view by.
|
@@ -209,7 +209,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
209
209
|
Example::
|
210
210
|
|
211
211
|
>>> fs = FeatureStore(...)
|
212
|
-
>>> # draft_fv is a local object that hasn't
|
212
|
+
>>> # draft_fv is a local object that hasn't materialized to Snowflake backend yet.
|
213
213
|
>>> feature_df = session.sql("select f_1, f_2 from source_table")
|
214
214
|
>>> draft_fv = FeatureView(
|
215
215
|
... name="my_fv",
|
@@ -232,7 +232,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
232
232
|
"""
|
233
233
|
|
234
234
|
self._name: SqlIdentifier = SqlIdentifier(name)
|
235
|
-
self._entities:
|
235
|
+
self._entities: list[Entity] = entities
|
236
236
|
self._feature_df: DataFrame = feature_df
|
237
237
|
self._timestamp_col: Optional[SqlIdentifier] = (
|
238
238
|
SqlIdentifier(timestamp_col) if timestamp_col is not None else None
|
@@ -254,7 +254,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
254
254
|
self._refresh_mode: Optional[str] = refresh_mode
|
255
255
|
self._refresh_mode_reason: Optional[str] = None
|
256
256
|
self._owner: Optional[str] = None
|
257
|
-
self._cluster_by:
|
257
|
+
self._cluster_by: list[SqlIdentifier] = (
|
258
258
|
[SqlIdentifier(col) for col in cluster_by] if cluster_by is not None else self._get_default_cluster_by()
|
259
259
|
)
|
260
260
|
|
@@ -264,7 +264,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
264
264
|
|
265
265
|
self._validate()
|
266
266
|
|
267
|
-
def slice(self, names:
|
267
|
+
def slice(self, names: list[str]) -> FeatureViewSlice:
|
268
268
|
"""
|
269
269
|
Select a subset of features within the FeatureView.
|
270
270
|
|
@@ -343,7 +343,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
343
343
|
raise RuntimeError(f"FeatureView {self.name} has not been registered.")
|
344
344
|
return f"{self._database}.{self._schema}.{FeatureView._get_physical_name(self.name, self.version)}"
|
345
345
|
|
346
|
-
def attach_feature_desc(self, descs:
|
346
|
+
def attach_feature_desc(self, descs: dict[str, str]) -> FeatureView:
|
347
347
|
"""
|
348
348
|
Associate feature level descriptions to the FeatureView.
|
349
349
|
|
@@ -396,7 +396,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
396
396
|
return self._name
|
397
397
|
|
398
398
|
@property
|
399
|
-
def entities(self) ->
|
399
|
+
def entities(self) -> list[Entity]:
|
400
400
|
return self._entities
|
401
401
|
|
402
402
|
@property
|
@@ -408,7 +408,7 @@ class FeatureView(lineage_node.LineageNode):
|
|
408
408
|
return self._timestamp_col
|
409
409
|
|
410
410
|
@property
|
411
|
-
def cluster_by(self) -> Optional[
|
411
|
+
def cluster_by(self) -> Optional[list[SqlIdentifier]]:
|
412
412
|
return self._cluster_by
|
413
413
|
|
414
414
|
@property
|
@@ -463,11 +463,11 @@ class FeatureView(lineage_node.LineageNode):
|
|
463
463
|
return self._status
|
464
464
|
|
465
465
|
@property
|
466
|
-
def feature_names(self) ->
|
466
|
+
def feature_names(self) -> list[SqlIdentifier]:
|
467
467
|
return list(self._feature_desc.keys()) if self._feature_desc is not None else []
|
468
468
|
|
469
469
|
@property
|
470
|
-
def feature_descs(self) -> Optional[
|
470
|
+
def feature_descs(self) -> Optional[dict[SqlIdentifier, str]]:
|
471
471
|
return self._feature_desc
|
472
472
|
|
473
473
|
def list_columns(self) -> DataFrame:
|
@@ -687,7 +687,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
687
687
|
if self._initialize not in ["ON_CREATE", "ON_SCHEDULE"]:
|
688
688
|
raise ValueError("'initialize' only supports ON_CREATE or ON_SCHEDULE.")
|
689
689
|
|
690
|
-
def _get_column_names(self) -> Optional[
|
690
|
+
def _get_column_names(self) -> Optional[list[SqlIdentifier]]:
|
691
691
|
try:
|
692
692
|
return to_sql_identifiers(self._infer_schema_df.columns)
|
693
693
|
except SnowparkSQLException as e:
|
@@ -699,7 +699,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
699
699
|
)
|
700
700
|
return None
|
701
701
|
|
702
|
-
def _get_feature_names(self) -> Optional[
|
702
|
+
def _get_feature_names(self) -> Optional[list[SqlIdentifier]]:
|
703
703
|
join_keys = [k for e in self._entities for k in e.join_keys]
|
704
704
|
ts_col = [self._timestamp_col] if self._timestamp_col is not None else []
|
705
705
|
feature_names = self._get_column_names()
|
@@ -733,7 +733,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
733
733
|
and self._owner == other._owner
|
734
734
|
)
|
735
735
|
|
736
|
-
def _to_dict(self) ->
|
736
|
+
def _to_dict(self) -> dict[str, str]:
|
737
737
|
fv_dict = self.__dict__.copy()
|
738
738
|
if "_feature_df" in fv_dict:
|
739
739
|
fv_dict.pop("_feature_df")
|
@@ -898,13 +898,13 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
898
898
|
@staticmethod
|
899
899
|
def _construct_feature_view(
|
900
900
|
name: str,
|
901
|
-
entities:
|
901
|
+
entities: list[Entity],
|
902
902
|
feature_df: DataFrame,
|
903
903
|
timestamp_col: Optional[str],
|
904
904
|
desc: str,
|
905
905
|
version: str,
|
906
906
|
status: FeatureViewStatus,
|
907
|
-
feature_descs:
|
907
|
+
feature_descs: dict[str, str],
|
908
908
|
refresh_freq: Optional[str],
|
909
909
|
database: str,
|
910
910
|
schema: str,
|
@@ -915,7 +915,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
915
915
|
owner: Optional[str],
|
916
916
|
infer_schema_df: Optional[DataFrame],
|
917
917
|
session: Session,
|
918
|
-
cluster_by: Optional[
|
918
|
+
cluster_by: Optional[list[str]] = None,
|
919
919
|
) -> FeatureView:
|
920
920
|
fv = FeatureView(
|
921
921
|
name=name,
|
@@ -944,7 +944,7 @@ Got {len(self._feature_df.queries['queries'])}: {self._feature_df.queries['queri
|
|
944
944
|
return fv
|
945
945
|
|
946
946
|
#
|
947
|
-
def _get_default_cluster_by(self) ->
|
947
|
+
def _get_default_cluster_by(self) -> list[SqlIdentifier]:
|
948
948
|
"""
|
949
949
|
Get default columns to cluster the feature view by.
|
950
950
|
Default cluster_by columns are join keys from entities and timestamp_col if it exists
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import re
|
2
2
|
from collections import defaultdict
|
3
|
-
from typing import Any,
|
3
|
+
from typing import Any, Optional
|
4
4
|
|
5
5
|
from snowflake import snowpark
|
6
6
|
from snowflake.connector import connection
|
@@ -75,8 +75,8 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
|
75
75
|
return stage_path
|
76
76
|
|
77
77
|
def _fetch_presigned_urls(
|
78
|
-
self, files:
|
79
|
-
) ->
|
78
|
+
self, files: list[str], url_lifetime: float = stage_fs._PRESIGNED_URL_LIFETIME_SEC
|
79
|
+
) -> list[tuple[str, str]]:
|
80
80
|
"""Fetch presigned urls for the given files."""
|
81
81
|
# SnowURL requires full snow://<domain>/<entity>/versions/<version> as the stage path arg to get_presigned_Url
|
82
82
|
versions_dict = defaultdict(list)
|
@@ -85,7 +85,7 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
|
85
85
|
assert match is not None and match.group("relpath") is not None
|
86
86
|
versions_dict[match.group("version")].append(match.group("relpath"))
|
87
87
|
try:
|
88
|
-
async_jobs:
|
88
|
+
async_jobs: list[snowpark.AsyncJob] = []
|
89
89
|
for version, version_files in versions_dict.items():
|
90
90
|
for file in version_files:
|
91
91
|
stage_loc = f"{self.stage_name}/versions/{version}"
|
@@ -100,7 +100,7 @@ class SFEmbeddedStageFileSystem(stage_fs.SFStageFileSystem):
|
|
100
100
|
),
|
101
101
|
)
|
102
102
|
async_jobs.append(query_result)
|
103
|
-
presigned_urls:
|
103
|
+
presigned_urls: list[tuple[str, str]] = [
|
104
104
|
(r["NAME"], r["URL"]) for job in async_jobs for r in stage_fs._resolve_async_job(job)
|
105
105
|
]
|
106
106
|
return presigned_urls
|
snowflake/ml/fileset/fileset.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
import functools
|
2
2
|
import inspect
|
3
|
-
from typing import Any, Callable,
|
3
|
+
from typing import Any, Callable, Optional
|
4
4
|
|
5
5
|
from typing_extensions import deprecated
|
6
6
|
|
@@ -121,7 +121,7 @@ class FileSet:
|
|
121
121
|
cache_type="bytes",
|
122
122
|
block_size=2 * TARGET_FILE_SIZE,
|
123
123
|
)
|
124
|
-
self._files:
|
124
|
+
self._files: list[str] = []
|
125
125
|
self._is_deleted = False
|
126
126
|
|
127
127
|
_get_fileset_query_id_or_raise(self.files(), self._fileset_absolute_path())
|
@@ -273,7 +273,7 @@ class FileSet:
|
|
273
273
|
"""Get the name of the FileSet."""
|
274
274
|
return self._name
|
275
275
|
|
276
|
-
def _list_files(self) ->
|
276
|
+
def _list_files(self) -> list[str]:
|
277
277
|
"""Private helper function that lists all files in this fileset and caches the results for subsequent use."""
|
278
278
|
if self._files:
|
279
279
|
return self._files
|
@@ -303,7 +303,7 @@ class FileSet:
|
|
303
303
|
)
|
304
304
|
@snowpark._internal.utils.private_preview(version="0.2.0")
|
305
305
|
@_raise_if_deleted
|
306
|
-
def files(self) ->
|
306
|
+
def files(self) -> list[str]:
|
307
307
|
"""Get the list of stage file paths in the current FileSet.
|
308
308
|
|
309
309
|
The stage file paths follows the sfc protocol.
|
@@ -473,7 +473,7 @@ class FileSet:
|
|
473
473
|
return
|
474
474
|
|
475
475
|
|
476
|
-
def _get_fileset_query_id_or_raise(files:
|
476
|
+
def _get_fileset_query_id_or_raise(files: list[str], fileset_absolute_path: str) -> Optional[str]:
|
477
477
|
"""Obtain the query ID used to generate the FileSet stage files.
|
478
478
|
|
479
479
|
If the input stage files are not generated by the same query, an error will be raised.
|