snowflake-ml-python 1.3.0__py3-none-any.whl → 1.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/ml/_internal/file_utils.py +3 -3
- snowflake/ml/_internal/human_readable_id/adjectives.txt +128 -0
- snowflake/ml/_internal/human_readable_id/animals.txt +128 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator.py +40 -0
- snowflake/ml/_internal/human_readable_id/hrid_generator_base.py +135 -0
- snowflake/ml/_internal/telemetry.py +11 -2
- snowflake/ml/_internal/utils/formatting.py +1 -1
- snowflake/ml/feature_store/feature_store.py +15 -106
- snowflake/ml/fileset/sfcfs.py +4 -3
- snowflake/ml/fileset/stage_fs.py +18 -0
- snowflake/ml/model/_api.py +9 -9
- snowflake/ml/model/_client/model/model_version_impl.py +20 -15
- snowflake/ml/model/_deploy_client/image_builds/docker_context.py +3 -9
- snowflake/ml/model/_deploy_client/image_builds/server_image_builder.py +3 -5
- snowflake/ml/model/_deploy_client/snowservice/deploy.py +7 -6
- snowflake/ml/model/_model_composer/model_composer.py +10 -8
- snowflake/ml/model/_model_composer/model_method/function_generator.py +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +2 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +2 -2
- snowflake/ml/model/_model_composer/model_runtime/_runtime_requirements.py +1 -1
- snowflake/ml/model/_packager/model_handlers/_base.py +2 -2
- snowflake/ml/model/_packager/model_handlers/_utils.py +5 -5
- snowflake/ml/model/_packager/model_handlers/custom.py +7 -7
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +2 -2
- snowflake/ml/model/_packager/model_handlers/llm.py +1 -1
- snowflake/ml/model/_packager/model_handlers/mlflow.py +1 -1
- snowflake/ml/model/_packager/model_handlers/pytorch.py +13 -10
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +214 -0
- snowflake/ml/model/_packager/model_handlers/sklearn.py +6 -6
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +15 -3
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +8 -8
- snowflake/ml/model/_packager/model_handlers/torchscript.py +7 -7
- snowflake/ml/model/_packager/model_handlers/xgboost.py +8 -8
- snowflake/ml/model/_packager/model_meta/_core_requirements.py +1 -1
- snowflake/ml/model/_packager/model_packager.py +8 -6
- snowflake/ml/model/custom_model.py +3 -1
- snowflake/ml/model/type_hints.py +13 -0
- snowflake/ml/modeling/_internal/estimator_utils.py +61 -1
- snowflake/ml/modeling/_internal/local_implementations/pandas_handlers.py +4 -43
- snowflake/ml/modeling/_internal/local_implementations/pandas_trainer.py +4 -4
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +21 -17
- snowflake/ml/modeling/_internal/model_specifications.py +3 -1
- snowflake/ml/modeling/_internal/model_trainer.py +2 -2
- snowflake/ml/modeling/_internal/snowpark_implementations/distributed_hpo_trainer.py +547 -1
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py +67 -114
- snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py +9 -9
- snowflake/ml/modeling/_internal/transformer_protocols.py +2 -3
- snowflake/ml/modeling/calibration/calibrated_classifier_cv.py +33 -61
- snowflake/ml/modeling/cluster/affinity_propagation.py +33 -61
- snowflake/ml/modeling/cluster/agglomerative_clustering.py +33 -61
- snowflake/ml/modeling/cluster/birch.py +33 -61
- snowflake/ml/modeling/cluster/bisecting_k_means.py +33 -61
- snowflake/ml/modeling/cluster/dbscan.py +33 -61
- snowflake/ml/modeling/cluster/feature_agglomeration.py +33 -61
- snowflake/ml/modeling/cluster/k_means.py +33 -61
- snowflake/ml/modeling/cluster/mean_shift.py +33 -61
- snowflake/ml/modeling/cluster/mini_batch_k_means.py +33 -61
- snowflake/ml/modeling/cluster/optics.py +33 -61
- snowflake/ml/modeling/cluster/spectral_biclustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_clustering.py +33 -61
- snowflake/ml/modeling/cluster/spectral_coclustering.py +33 -61
- snowflake/ml/modeling/compose/column_transformer.py +33 -61
- snowflake/ml/modeling/compose/transformed_target_regressor.py +33 -61
- snowflake/ml/modeling/covariance/elliptic_envelope.py +33 -61
- snowflake/ml/modeling/covariance/empirical_covariance.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso.py +33 -61
- snowflake/ml/modeling/covariance/graphical_lasso_cv.py +33 -61
- snowflake/ml/modeling/covariance/ledoit_wolf.py +33 -61
- snowflake/ml/modeling/covariance/min_cov_det.py +33 -61
- snowflake/ml/modeling/covariance/oas.py +33 -61
- snowflake/ml/modeling/covariance/shrunk_covariance.py +33 -61
- snowflake/ml/modeling/decomposition/dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/factor_analysis.py +33 -61
- snowflake/ml/modeling/decomposition/fast_ica.py +33 -61
- snowflake/ml/modeling/decomposition/incremental_pca.py +33 -61
- snowflake/ml/modeling/decomposition/kernel_pca.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_dictionary_learning.py +33 -61
- snowflake/ml/modeling/decomposition/mini_batch_sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/pca.py +33 -61
- snowflake/ml/modeling/decomposition/sparse_pca.py +33 -61
- snowflake/ml/modeling/decomposition/truncated_svd.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/linear_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/discriminant_analysis/quadratic_discriminant_analysis.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/ada_boost_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/bagging_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/extra_trees_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/hist_gradient_boosting_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/isolation_forest.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/random_forest_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/stacking_regressor.py +33 -61
- snowflake/ml/modeling/ensemble/voting_classifier.py +33 -61
- snowflake/ml/modeling/ensemble/voting_regressor.py +33 -61
- snowflake/ml/modeling/feature_selection/generic_univariate_select.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fdr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fpr.py +33 -61
- snowflake/ml/modeling/feature_selection/select_fwe.py +33 -61
- snowflake/ml/modeling/feature_selection/select_k_best.py +33 -61
- snowflake/ml/modeling/feature_selection/select_percentile.py +33 -61
- snowflake/ml/modeling/feature_selection/sequential_feature_selector.py +33 -61
- snowflake/ml/modeling/feature_selection/variance_threshold.py +33 -61
- snowflake/ml/modeling/framework/base.py +55 -5
- snowflake/ml/modeling/gaussian_process/gaussian_process_classifier.py +33 -61
- snowflake/ml/modeling/gaussian_process/gaussian_process_regressor.py +33 -61
- snowflake/ml/modeling/impute/iterative_imputer.py +33 -61
- snowflake/ml/modeling/impute/knn_imputer.py +33 -61
- snowflake/ml/modeling/impute/missing_indicator.py +33 -61
- snowflake/ml/modeling/impute/simple_imputer.py +4 -15
- snowflake/ml/modeling/kernel_approximation/additive_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/nystroem.py +33 -61
- snowflake/ml/modeling/kernel_approximation/polynomial_count_sketch.py +33 -61
- snowflake/ml/modeling/kernel_approximation/rbf_sampler.py +33 -61
- snowflake/ml/modeling/kernel_approximation/skewed_chi2_sampler.py +33 -61
- snowflake/ml/modeling/kernel_ridge/kernel_ridge.py +33 -61
- snowflake/ml/modeling/lightgbm/lgbm_classifier.py +36 -63
- snowflake/ml/modeling/lightgbm/lgbm_regressor.py +36 -63
- snowflake/ml/modeling/linear_model/ard_regression.py +33 -61
- snowflake/ml/modeling/linear_model/bayesian_ridge.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/gamma_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/huber_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/lars.py +33 -61
- snowflake/ml/modeling/linear_model/lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_cv.py +33 -61
- snowflake/ml/modeling/linear_model/lasso_lars_ic.py +33 -61
- snowflake/ml/modeling/linear_model/linear_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression.py +33 -61
- snowflake/ml/modeling/linear_model/logistic_regression_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_elastic_net_cv.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso.py +33 -61
- snowflake/ml/modeling/linear_model/multi_task_lasso_cv.py +33 -61
- snowflake/ml/modeling/linear_model/orthogonal_matching_pursuit.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/passive_aggressive_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/perceptron.py +33 -61
- snowflake/ml/modeling/linear_model/poisson_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ransac_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/ridge.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_classifier_cv.py +33 -61
- snowflake/ml/modeling/linear_model/ridge_cv.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_classifier.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_one_class_svm.py +33 -61
- snowflake/ml/modeling/linear_model/sgd_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/theil_sen_regressor.py +33 -61
- snowflake/ml/modeling/linear_model/tweedie_regressor.py +33 -61
- snowflake/ml/modeling/manifold/isomap.py +33 -61
- snowflake/ml/modeling/manifold/mds.py +33 -61
- snowflake/ml/modeling/manifold/spectral_embedding.py +33 -61
- snowflake/ml/modeling/manifold/tsne.py +33 -61
- snowflake/ml/modeling/mixture/bayesian_gaussian_mixture.py +33 -61
- snowflake/ml/modeling/mixture/gaussian_mixture.py +33 -61
- snowflake/ml/modeling/model_selection/grid_search_cv.py +39 -57
- snowflake/ml/modeling/model_selection/randomized_search_cv.py +26 -57
- snowflake/ml/modeling/multiclass/one_vs_one_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/one_vs_rest_classifier.py +33 -61
- snowflake/ml/modeling/multiclass/output_code_classifier.py +33 -61
- snowflake/ml/modeling/naive_bayes/bernoulli_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/categorical_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/complement_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/gaussian_nb.py +33 -61
- snowflake/ml/modeling/naive_bayes/multinomial_nb.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/k_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neighbors/kernel_density.py +33 -61
- snowflake/ml/modeling/neighbors/local_outlier_factor.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_centroid.py +33 -61
- snowflake/ml/modeling/neighbors/nearest_neighbors.py +33 -61
- snowflake/ml/modeling/neighbors/neighborhood_components_analysis.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_classifier.py +33 -61
- snowflake/ml/modeling/neighbors/radius_neighbors_regressor.py +33 -61
- snowflake/ml/modeling/neural_network/bernoulli_rbm.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_classifier.py +33 -61
- snowflake/ml/modeling/neural_network/mlp_regressor.py +33 -61
- snowflake/ml/modeling/preprocessing/polynomial_features.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_propagation.py +33 -61
- snowflake/ml/modeling/semi_supervised/label_spreading.py +33 -61
- snowflake/ml/modeling/svm/linear_svc.py +33 -61
- snowflake/ml/modeling/svm/linear_svr.py +33 -61
- snowflake/ml/modeling/svm/nu_svc.py +33 -61
- snowflake/ml/modeling/svm/nu_svr.py +33 -61
- snowflake/ml/modeling/svm/svc.py +33 -61
- snowflake/ml/modeling/svm/svr.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/decision_tree_regressor.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_classifier.py +33 -61
- snowflake/ml/modeling/tree/extra_tree_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgb_regressor.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +33 -61
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +33 -61
- snowflake/ml/registry/_manager/model_manager.py +6 -2
- snowflake/ml/registry/model_registry.py +100 -27
- snowflake/ml/registry/registry.py +6 -2
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/METADATA +43 -7
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/RECORD +211 -206
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/WHEEL +0 -0
- {snowflake_ml_python-1.3.0.dist-info → snowflake_ml_python-1.4.0.dist-info}/top_level.txt +0 -0
@@ -157,14 +157,14 @@ def zip_python_package(zipfile_path: str, package_name: str, ignore_generated_py
|
|
157
157
|
arcname = base_arcname / path_info.name
|
158
158
|
if not _able_ascii_encode(str(arcname)):
|
159
159
|
raise ValueError(f"File name {arcname} cannot be encoded using ASCII. Please rename.")
|
160
|
-
zf.writestr(str(arcname), path_info.read_bytes())
|
160
|
+
zf.writestr(str(arcname), path_info.read_bytes())
|
161
161
|
elif path_info.is_dir():
|
162
162
|
arcname = base_arcname / path_info.name
|
163
163
|
zf.writestr(str(arcname) + "/", "")
|
164
|
-
for sub_path_info in path_info.iterdir():
|
164
|
+
for sub_path_info in path_info.iterdir():
|
165
165
|
_add_to_zip(zf, sub_path_info, arcname)
|
166
166
|
|
167
|
-
for sub_path_info in importlib_resources.files(package_name).iterdir():
|
167
|
+
for sub_path_info in importlib_resources.files(package_name).iterdir():
|
168
168
|
_add_to_zip(zf, sub_path_info, base_arcname)
|
169
169
|
|
170
170
|
|
@@ -0,0 +1,128 @@
|
|
1
|
+
afraid
|
2
|
+
ancient
|
3
|
+
angry
|
4
|
+
average
|
5
|
+
bad
|
6
|
+
big
|
7
|
+
bitter
|
8
|
+
black
|
9
|
+
blue
|
10
|
+
brave
|
11
|
+
breezy
|
12
|
+
bright
|
13
|
+
brown
|
14
|
+
calm
|
15
|
+
chatty
|
16
|
+
chilly
|
17
|
+
clever
|
18
|
+
cold
|
19
|
+
cowardly
|
20
|
+
cuddly
|
21
|
+
curly
|
22
|
+
curvy
|
23
|
+
dangerous
|
24
|
+
dry
|
25
|
+
dull
|
26
|
+
empty
|
27
|
+
evil
|
28
|
+
fast
|
29
|
+
fat
|
30
|
+
fluffy
|
31
|
+
foolish
|
32
|
+
fresh
|
33
|
+
friendly
|
34
|
+
funny
|
35
|
+
gentle
|
36
|
+
giant
|
37
|
+
good
|
38
|
+
great
|
39
|
+
green
|
40
|
+
grumpy
|
41
|
+
happy
|
42
|
+
hard
|
43
|
+
heavy
|
44
|
+
helpless
|
45
|
+
honest
|
46
|
+
horrible
|
47
|
+
hot
|
48
|
+
hungry
|
49
|
+
itchy
|
50
|
+
jolly
|
51
|
+
kind
|
52
|
+
lazy
|
53
|
+
light
|
54
|
+
little
|
55
|
+
loud
|
56
|
+
lovely
|
57
|
+
lucky
|
58
|
+
massive
|
59
|
+
mean
|
60
|
+
mighty
|
61
|
+
modern
|
62
|
+
moody
|
63
|
+
nasty
|
64
|
+
neat
|
65
|
+
nervous
|
66
|
+
new
|
67
|
+
nice
|
68
|
+
odd
|
69
|
+
old
|
70
|
+
orange
|
71
|
+
ordinary
|
72
|
+
perfect
|
73
|
+
pink
|
74
|
+
plastic
|
75
|
+
polite
|
76
|
+
popular
|
77
|
+
pretty
|
78
|
+
proud
|
79
|
+
purple
|
80
|
+
quick
|
81
|
+
quiet
|
82
|
+
rare
|
83
|
+
red
|
84
|
+
rotten
|
85
|
+
rude
|
86
|
+
selfish
|
87
|
+
serious
|
88
|
+
shaggy
|
89
|
+
sharp
|
90
|
+
short
|
91
|
+
shy
|
92
|
+
silent
|
93
|
+
silly
|
94
|
+
slimy
|
95
|
+
slippery
|
96
|
+
smart
|
97
|
+
smooth
|
98
|
+
soft
|
99
|
+
sour
|
100
|
+
spicy
|
101
|
+
splendid
|
102
|
+
spotty
|
103
|
+
stale
|
104
|
+
strange
|
105
|
+
strong
|
106
|
+
stupid
|
107
|
+
sweet
|
108
|
+
swift
|
109
|
+
tall
|
110
|
+
tame
|
111
|
+
tasty
|
112
|
+
tender
|
113
|
+
terrible
|
114
|
+
thin
|
115
|
+
tidy
|
116
|
+
tiny
|
117
|
+
tough
|
118
|
+
tricky
|
119
|
+
ugly
|
120
|
+
warm
|
121
|
+
weak
|
122
|
+
wet
|
123
|
+
wicked
|
124
|
+
wise
|
125
|
+
witty
|
126
|
+
wonderful
|
127
|
+
yellow
|
128
|
+
young
|
@@ -0,0 +1,128 @@
|
|
1
|
+
anaconda
|
2
|
+
ant
|
3
|
+
ape
|
4
|
+
baboon
|
5
|
+
badger
|
6
|
+
bat
|
7
|
+
bear
|
8
|
+
bird
|
9
|
+
bobcat
|
10
|
+
bulldog
|
11
|
+
bullfrog
|
12
|
+
camel
|
13
|
+
canary
|
14
|
+
capybara
|
15
|
+
cat
|
16
|
+
catfish
|
17
|
+
cheetah
|
18
|
+
chicken
|
19
|
+
chipmunk
|
20
|
+
cobra
|
21
|
+
cougar
|
22
|
+
cow
|
23
|
+
crab
|
24
|
+
deer
|
25
|
+
dingo
|
26
|
+
dodo
|
27
|
+
dog
|
28
|
+
dolphin
|
29
|
+
donkey
|
30
|
+
dragon
|
31
|
+
dragonfly
|
32
|
+
duck
|
33
|
+
eagle
|
34
|
+
earwig
|
35
|
+
eel
|
36
|
+
egret
|
37
|
+
elephant
|
38
|
+
emu
|
39
|
+
falcon
|
40
|
+
fireant
|
41
|
+
firefox
|
42
|
+
fish
|
43
|
+
fly
|
44
|
+
fox
|
45
|
+
frog
|
46
|
+
gazelle
|
47
|
+
gecko
|
48
|
+
gibbon
|
49
|
+
giraffe
|
50
|
+
goat
|
51
|
+
goose
|
52
|
+
gorilla
|
53
|
+
grasshopper
|
54
|
+
horse
|
55
|
+
hound
|
56
|
+
husky
|
57
|
+
impala
|
58
|
+
insect
|
59
|
+
jackal
|
60
|
+
jaguar
|
61
|
+
jellyfish
|
62
|
+
kangaroo
|
63
|
+
kiwi
|
64
|
+
ladybug
|
65
|
+
leech
|
66
|
+
leopard
|
67
|
+
llama
|
68
|
+
liger
|
69
|
+
lion
|
70
|
+
lionfish
|
71
|
+
lizard
|
72
|
+
lobster
|
73
|
+
mayfly
|
74
|
+
mamba
|
75
|
+
mole
|
76
|
+
monkey
|
77
|
+
moose
|
78
|
+
moth
|
79
|
+
mouse
|
80
|
+
mule
|
81
|
+
newt
|
82
|
+
octopus
|
83
|
+
otter
|
84
|
+
owl
|
85
|
+
panda
|
86
|
+
panther
|
87
|
+
parrot
|
88
|
+
penguin
|
89
|
+
pig
|
90
|
+
puma
|
91
|
+
pug
|
92
|
+
python
|
93
|
+
quail
|
94
|
+
rabbit
|
95
|
+
ram
|
96
|
+
rat
|
97
|
+
ray
|
98
|
+
rattlesnake
|
99
|
+
robin
|
100
|
+
salmon
|
101
|
+
seahorse
|
102
|
+
seal
|
103
|
+
shark
|
104
|
+
sheep
|
105
|
+
shrimp
|
106
|
+
skunk
|
107
|
+
sloth
|
108
|
+
snail
|
109
|
+
snake
|
110
|
+
squid
|
111
|
+
starfish
|
112
|
+
stingray
|
113
|
+
swan
|
114
|
+
termite
|
115
|
+
tiger
|
116
|
+
treefrog
|
117
|
+
turkey
|
118
|
+
turtle
|
119
|
+
vampirebat
|
120
|
+
walrus
|
121
|
+
warthog
|
122
|
+
wasp
|
123
|
+
wolverine
|
124
|
+
wombat
|
125
|
+
worm
|
126
|
+
yak
|
127
|
+
yeti
|
128
|
+
zebra
|
@@ -0,0 +1,40 @@
|
|
1
|
+
"""Implement a generator for human readable ID (HRID).
|
2
|
+
|
3
|
+
The original idea for this comes from Asana where it is documented on their
|
4
|
+
blog:
|
5
|
+
http://blog.asana.com/2011/09/6-sad-squid-snuggle-softly/
|
6
|
+
|
7
|
+
There are other partial implementations of this and can be found here:
|
8
|
+
Node.js: https://github.com/linus/greg
|
9
|
+
Java: https://github.com/PerWiklander/IdentifierSentence
|
10
|
+
|
11
|
+
In this module you will find:
|
12
|
+
|
13
|
+
HRID16: An implementation of HRIDBase for 16 bit integers.
|
14
|
+
|
15
|
+
The list used here is coming from:
|
16
|
+
https://git.coolaj86.com/coolaj86/human-readable-ids.js
|
17
|
+
"""
|
18
|
+
|
19
|
+
import random
|
20
|
+
|
21
|
+
import importlib_resources
|
22
|
+
|
23
|
+
from snowflake.ml._internal import human_readable_id
|
24
|
+
from snowflake.ml._internal.human_readable_id import hrid_generator_base
|
25
|
+
|
26
|
+
|
27
|
+
class HRID16(hrid_generator_base.HRIDBase):
|
28
|
+
"""An implementation of HRIDBase for 16 bit integers."""
|
29
|
+
|
30
|
+
def __id_generator__(self) -> int:
|
31
|
+
return int(random.getrandbits(16))
|
32
|
+
|
33
|
+
__hrid_structure__ = ("adjective", "animal", "number")
|
34
|
+
__hrid_words__ = dict(
|
35
|
+
number=tuple(str(x) for x in range(1, 5)),
|
36
|
+
adjective=tuple(
|
37
|
+
importlib_resources.files(human_readable_id).joinpath("adjectives.txt").read_text("utf-8").split()
|
38
|
+
),
|
39
|
+
animal=tuple(importlib_resources.files(human_readable_id).joinpath("animals.txt").read_text("utf-8").split()),
|
40
|
+
)
|
@@ -0,0 +1,135 @@
|
|
1
|
+
"""Implement a generator for human readable ID (HRID).
|
2
|
+
|
3
|
+
The original idea for this comes from Asana where it is documented on their
|
4
|
+
blog:
|
5
|
+
http://blog.asana.com/2011/09/6-sad-squid-snuggle-softly/
|
6
|
+
|
7
|
+
There are other partial implementations of this and can be found here:
|
8
|
+
Node.js: https://github.com/linus/greg
|
9
|
+
Java: https://github.com/PerWiklander/IdentifierSentence
|
10
|
+
|
11
|
+
In this module you will find:
|
12
|
+
|
13
|
+
HRIDBase: The base class for all human readable id.
|
14
|
+
"""
|
15
|
+
|
16
|
+
import math
|
17
|
+
from abc import ABC, abstractmethod
|
18
|
+
from typing import Dict, List, Tuple
|
19
|
+
|
20
|
+
|
21
|
+
class HRIDBase(ABC):
|
22
|
+
"""The base class for all all human readable id.
|
23
|
+
|
24
|
+
This provides all of the necessary helper functionality to turn IDs into
|
25
|
+
HRIDs and HRIDs into IDs. ID typically is a random int, while HRID is a corresponding short string.
|
26
|
+
"""
|
27
|
+
|
28
|
+
@abstractmethod
|
29
|
+
def __id_generator__(self) -> int:
|
30
|
+
"""The generator to use to generate new IDs. The implementer needs to provide this."""
|
31
|
+
pass
|
32
|
+
|
33
|
+
__hrid_structure__: Tuple[str, ...]
|
34
|
+
"""The HRID structure to be generated. The implementer needs to provide this."""
|
35
|
+
|
36
|
+
__hrid_words__: Dict[str, Tuple[str, ...]]
|
37
|
+
"""The mapping between the HRID parts and the words to use. The implementer needs to provide this."""
|
38
|
+
|
39
|
+
__separator__ = "_"
|
40
|
+
|
41
|
+
def __init__(self) -> None:
|
42
|
+
self._part_n_words = dict()
|
43
|
+
self._part_bits = dict()
|
44
|
+
for part in self.__hrid_structure__:
|
45
|
+
n_words = len(self.__hrid_words__[part])
|
46
|
+
self._part_n_words[part] = n_words
|
47
|
+
if not (n_words > 0 and ((n_words & (n_words - 1)) == 0)):
|
48
|
+
raise ValueError(f"{part} part has {n_words} words, which is not a power of 2")
|
49
|
+
self._part_bits[part] = int(math.log(self._part_n_words[part], 2))
|
50
|
+
self.__total_bits__ = sum(v for v in self._part_bits.values())
|
51
|
+
|
52
|
+
def hrid_to_id(self, hrid: str) -> int:
|
53
|
+
"""Take the HRID and convert it the ID.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
hrid: The HRID to convert into an ID
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
The ID represented by the HRID
|
60
|
+
"""
|
61
|
+
idxs = self._hrid_to_idxs(hrid)
|
62
|
+
id = 0
|
63
|
+
for i in range(len(idxs)):
|
64
|
+
part = self.__hrid_structure__[i]
|
65
|
+
id = (id << self._part_bits[part]) + idxs[i]
|
66
|
+
return id
|
67
|
+
|
68
|
+
def id_to_hrid(self, id: int) -> str:
|
69
|
+
"""Take the ID and convert it a HRID.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
id: The ID to convert into a HRID
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
The HRID represented by the ID
|
76
|
+
"""
|
77
|
+
idxs = self._id_to_idxs(id)
|
78
|
+
hrid = []
|
79
|
+
for i in range(len(self.__hrid_structure__)):
|
80
|
+
part = self.__hrid_structure__[i]
|
81
|
+
values = self.__hrid_words__[part]
|
82
|
+
hrid.append(str(values[idxs[i]]))
|
83
|
+
return self.__separator__.join(hrid)
|
84
|
+
|
85
|
+
def generate(self) -> Tuple[int, str]:
|
86
|
+
"""Generate an ID and the corresponding HRID.
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
A tuple containing the id and the HRID
|
90
|
+
"""
|
91
|
+
id = self.__id_generator__()
|
92
|
+
hrid = self.id_to_hrid(id)
|
93
|
+
return (id, hrid)
|
94
|
+
|
95
|
+
def _id_to_idxs(self, id: int) -> List[int]:
|
96
|
+
"""Take the ID and convert it to indices into the HRID words.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
id: The ID to convert into indices
|
100
|
+
|
101
|
+
Returns:
|
102
|
+
A list of indices into the HRID words
|
103
|
+
"""
|
104
|
+
shift = self.__total_bits__
|
105
|
+
idxs = []
|
106
|
+
for part in self.__hrid_structure__:
|
107
|
+
shift -= self._part_bits[part]
|
108
|
+
mask = (self._part_n_words[part] - 1) << shift
|
109
|
+
idxs.append((id & mask) >> shift)
|
110
|
+
return idxs
|
111
|
+
|
112
|
+
def _hrid_to_idxs(self, hrid: str) -> List[int]:
|
113
|
+
"""Take the HRID and convert it to indices into the HRID words.
|
114
|
+
|
115
|
+
Args:
|
116
|
+
hrid: The HRID to convert into indices
|
117
|
+
|
118
|
+
Raises:
|
119
|
+
ValueError: Raised when the input does not meet the structure.
|
120
|
+
|
121
|
+
Returns:
|
122
|
+
A list of indices into the HRID words
|
123
|
+
"""
|
124
|
+
split_hrid = hrid.split(self.__separator__)
|
125
|
+
if len(split_hrid) != len(self.__hrid_structure__):
|
126
|
+
raise ValueError(
|
127
|
+
("The hrid must have {} parts and be of the form {}").format(
|
128
|
+
len(self.__hrid_structure__), self.__hrid_structure__
|
129
|
+
)
|
130
|
+
)
|
131
|
+
idxs = []
|
132
|
+
for i in range(len(self.__hrid_structure__)):
|
133
|
+
part = self.__hrid_structure__[i]
|
134
|
+
idxs.append(self.__hrid_words__[part].index(split_hrid[i]))
|
135
|
+
return idxs
|
@@ -32,6 +32,13 @@ from snowflake.snowpark._internal import utils
|
|
32
32
|
_log_counter = 0
|
33
33
|
_FLUSH_SIZE = 10
|
34
34
|
|
35
|
+
# Prepopulate allowed connection types for type checking later since getattr is slow on large modules
|
36
|
+
_CONNECTION_TYPES = {
|
37
|
+
conn_type: getattr(connector, conn_type)
|
38
|
+
for conn_type in ["SnowflakeConnection", "StoredProcConnection"]
|
39
|
+
if hasattr(connector, conn_type)
|
40
|
+
}
|
41
|
+
|
35
42
|
_Args = ParamSpec("_Args")
|
36
43
|
_ReturnValue = TypeVar("_ReturnValue")
|
37
44
|
|
@@ -321,8 +328,10 @@ def send_api_usage_telemetry(
|
|
321
328
|
if conn_attr_name:
|
322
329
|
# raise AttributeError if conn attribute does not exist in `self`
|
323
330
|
conn = operator.attrgetter(conn_attr_name)(args[0])
|
324
|
-
if not isinstance(conn, connector.SnowflakeConnection):
|
325
|
-
raise TypeError(
|
331
|
+
if not isinstance(conn, _CONNECTION_TYPES.get(type(conn).__name__, connector.SnowflakeConnection)):
|
332
|
+
raise TypeError(
|
333
|
+
f"Expected a conn object of type {' or '.join(_CONNECTION_TYPES.keys())} but got {type(conn)}"
|
334
|
+
)
|
326
335
|
# get an active session
|
327
336
|
else:
|
328
337
|
try:
|
@@ -1,7 +1,7 @@
|
|
1
1
|
"""String formatting utilities for general use in the SnowML Reposiory.
|
2
2
|
|
3
3
|
This file contains a collection of utilities that help with formatting strings. Functionality is not limited to tests
|
4
|
-
only. Anything that is
|
4
|
+
only. Anything that is reusable across different modules and related to string formatting should go here.
|
5
5
|
"""
|
6
6
|
|
7
7
|
import re
|
@@ -233,13 +233,16 @@ class FeatureStore:
|
|
233
233
|
self._default_warehouse = warehouse
|
234
234
|
|
235
235
|
@dispatch_decorator(prpr_version="1.0.8")
|
236
|
-
def register_entity(self, entity: Entity) ->
|
236
|
+
def register_entity(self, entity: Entity) -> Entity:
|
237
237
|
"""
|
238
238
|
Register Entity in the FeatureStore.
|
239
239
|
|
240
240
|
Args:
|
241
241
|
entity: Entity object to register.
|
242
242
|
|
243
|
+
Returns:
|
244
|
+
A registered entity object.
|
245
|
+
|
243
246
|
Raises:
|
244
247
|
SnowflakeMLException: [ValueError] Entity with same name is already registered.
|
245
248
|
SnowflakeMLException: [RuntimeError] Failed to find resources.
|
@@ -269,15 +272,18 @@ class FeatureStore:
|
|
269
272
|
error_code=error_codes.INTERNAL_SNOWPARK_ERROR,
|
270
273
|
original_exception=RuntimeError(f"Failed to register entity `{entity.name}`: {e}."),
|
271
274
|
) from e
|
275
|
+
|
272
276
|
logger.info(f"Registered Entity {entity}.")
|
273
277
|
|
278
|
+
return self.get_entity(entity.name)
|
279
|
+
|
274
280
|
# TODO: add support to update column desc once SNOW-894249 is fixed
|
275
281
|
@dispatch_decorator(prpr_version="1.0.8")
|
276
282
|
def register_feature_view(
|
277
283
|
self,
|
278
284
|
feature_view: FeatureView,
|
279
285
|
version: str,
|
280
|
-
block: bool =
|
286
|
+
block: bool = True,
|
281
287
|
override: bool = False,
|
282
288
|
) -> FeatureView:
|
283
289
|
"""
|
@@ -297,7 +303,7 @@ class FeatureStore:
|
|
297
303
|
version: version of the registered FeatureView.
|
298
304
|
NOTE: Version only accepts letters, numbers and underscore. Also version will be capitalized.
|
299
305
|
block: Specify whether the FeatureView backend materialization should be blocking or not. If blocking then
|
300
|
-
the API will wait until the initial FeatureView data is generated.
|
306
|
+
the API will wait until the initial FeatureView data is generated. Default to true.
|
301
307
|
override: Override the existing FeatureView with same version. This is the same as dropping the FeatureView
|
302
308
|
first then recreate. NOTE: there will be backfill cost associated if the FeatureView is being
|
303
309
|
continuously maintained.
|
@@ -525,104 +531,6 @@ class FeatureStore:
|
|
525
531
|
|
526
532
|
return self._compose_feature_view(results[0], self.list_entities().collect())
|
527
533
|
|
528
|
-
@dispatch_decorator(prpr_version="1.0.8")
|
529
|
-
def merge_features(
|
530
|
-
self,
|
531
|
-
features: List[Union[FeatureView, FeatureViewSlice]],
|
532
|
-
name: str,
|
533
|
-
desc: str = "",
|
534
|
-
) -> FeatureView:
|
535
|
-
"""
|
536
|
-
Merge multiple registered FeatureView or FeatureViewSlice to form a new FeatureView.
|
537
|
-
This is typically used to add new features to existing FeatureViews since registered FeatureView is immutable.
|
538
|
-
The FeatureViews or FeatureViewSlices to merge should have same Entity and timestamp column setup.
|
539
|
-
|
540
|
-
Args:
|
541
|
-
features: List of FeatureViews or FeatureViewSlices to merge
|
542
|
-
name: name of the new constructed FeatureView
|
543
|
-
desc: description of the new constructed FeatureView
|
544
|
-
|
545
|
-
Returns:
|
546
|
-
a new FeatureView with features merged.
|
547
|
-
|
548
|
-
Raises:
|
549
|
-
SnowflakeMLException: [ValueError] Features length is not valid or if Entitis and timestamp_col is
|
550
|
-
inconsistent.
|
551
|
-
SnowflakeMLException: [ValueError] FeatureView has not been registered.
|
552
|
-
SnowflakeMLException: [ValueError] FeatureView merge failed.
|
553
|
-
"""
|
554
|
-
name = SqlIdentifier(name)
|
555
|
-
|
556
|
-
if len(features) < 2:
|
557
|
-
raise snowml_exceptions.SnowflakeMLException(
|
558
|
-
error_code=error_codes.INVALID_ARGUMENT,
|
559
|
-
original_exception=ValueError("features should have at least two entries"),
|
560
|
-
)
|
561
|
-
|
562
|
-
left = features[0]
|
563
|
-
left_columns = None
|
564
|
-
if isinstance(left, FeatureViewSlice):
|
565
|
-
left_columns = ", ".join(left.names)
|
566
|
-
left = left.feature_view_ref
|
567
|
-
|
568
|
-
if left.status == FeatureViewStatus.DRAFT:
|
569
|
-
raise snowml_exceptions.SnowflakeMLException(
|
570
|
-
error_code=error_codes.NOT_FOUND,
|
571
|
-
original_exception=ValueError(f"FeatureView {left.name} has not been registered."),
|
572
|
-
)
|
573
|
-
|
574
|
-
join_keys = [k for e in left.entities for k in e.join_keys]
|
575
|
-
|
576
|
-
ts_col_expr = "" if left.timestamp_col is None else f" , {left.timestamp_col}"
|
577
|
-
left_columns = "*" if left_columns is None else f"{', '.join(join_keys)}, {left_columns}{ts_col_expr}"
|
578
|
-
left_df = self._session.sql(f"SELECT {left_columns} FROM {left.fully_qualified_name()}")
|
579
|
-
|
580
|
-
for right in features[1:]:
|
581
|
-
right_columns = None
|
582
|
-
if isinstance(right, FeatureViewSlice):
|
583
|
-
right_columns = ", ".join(right.names)
|
584
|
-
right = right.feature_view_ref
|
585
|
-
|
586
|
-
if left.entities != right.entities:
|
587
|
-
raise snowml_exceptions.SnowflakeMLException(
|
588
|
-
error_code=error_codes.INVALID_ARGUMENT,
|
589
|
-
original_exception=ValueError(
|
590
|
-
f"Cannot merge FeatureView {left.name} and {right.name} with different Entities: "
|
591
|
-
f"{left.entities} vs {right.entities}" # noqa: E501
|
592
|
-
),
|
593
|
-
)
|
594
|
-
if left.timestamp_col != right.timestamp_col:
|
595
|
-
raise snowml_exceptions.SnowflakeMLException(
|
596
|
-
error_code=error_codes.INVALID_ARGUMENT,
|
597
|
-
original_exception=ValueError(
|
598
|
-
f"Cannot merge FeatureView {left.name} and {right.name} with different timestamp_col: "
|
599
|
-
f"{left.timestamp_col} vs {right.timestamp_col}" # noqa: E501
|
600
|
-
),
|
601
|
-
)
|
602
|
-
if right.status == FeatureViewStatus.DRAFT:
|
603
|
-
raise snowml_exceptions.SnowflakeMLException(
|
604
|
-
error_code=error_codes.NOT_FOUND,
|
605
|
-
original_exception=ValueError(f"FeatureView {right.name} has not been registered."),
|
606
|
-
)
|
607
|
-
|
608
|
-
right_columns = "*" if right_columns is None else f"{', '.join(join_keys)}, {right_columns}"
|
609
|
-
exclude_ts_expr = (
|
610
|
-
"" if right.timestamp_col is None or right_columns != "*" else f"EXCLUDE {right.timestamp_col}"
|
611
|
-
)
|
612
|
-
right_df = self._session.sql(
|
613
|
-
f"SELECT {right_columns} {exclude_ts_expr} FROM {right.fully_qualified_name()}"
|
614
|
-
)
|
615
|
-
|
616
|
-
left_df = left_df.join(right=right_df, on=join_keys)
|
617
|
-
|
618
|
-
return FeatureView(
|
619
|
-
name=name,
|
620
|
-
entities=left.entities,
|
621
|
-
feature_df=left_df,
|
622
|
-
timestamp_col=left.timestamp_col,
|
623
|
-
desc=desc,
|
624
|
-
)
|
625
|
-
|
626
534
|
@dispatch_decorator(prpr_version="1.0.8")
|
627
535
|
def resume_feature_view(self, feature_view: FeatureView) -> FeatureView:
|
628
536
|
"""
|
@@ -1056,10 +964,7 @@ class FeatureStore:
|
|
1056
964
|
WAREHOUSE = {warehouse}
|
1057
965
|
AS {feature_view.query}
|
1058
966
|
"""
|
1059
|
-
self._session.sql(query).collect(statement_params=self._telemetry_stmp)
|
1060
|
-
self._session.sql(f"ALTER DYNAMIC TABLE {fully_qualified_name} REFRESH").collect(
|
1061
|
-
block=block, statement_params=self._telemetry_stmp
|
1062
|
-
)
|
967
|
+
self._session.sql(query).collect(block=block, statement_params=self._telemetry_stmp)
|
1063
968
|
|
1064
969
|
if schedule_task:
|
1065
970
|
try:
|
@@ -1092,6 +997,10 @@ class FeatureStore:
|
|
1092
997
|
),
|
1093
998
|
) from e
|
1094
999
|
|
1000
|
+
if block:
|
1001
|
+
self._check_dynamic_table_refresh_mode(feature_view_name)
|
1002
|
+
|
1003
|
+
def _check_dynamic_table_refresh_mode(self, feature_view_name: SqlIdentifier) -> None:
|
1095
1004
|
found_dts = self._find_object("DYNAMIC TABLES", feature_view_name)
|
1096
1005
|
if len(found_dts) != 1:
|
1097
1006
|
raise snowml_exceptions.SnowflakeMLException(
|
@@ -1161,7 +1070,7 @@ class FeatureStore:
|
|
1161
1070
|
def _validate_entity_exists(self, name: SqlIdentifier) -> bool:
|
1162
1071
|
full_entity_tag_name = self._get_entity_name(name)
|
1163
1072
|
found_rows = self._find_object("TAGS", full_entity_tag_name)
|
1164
|
-
return len(found_rows)
|
1073
|
+
return len(found_rows) == 1
|
1165
1074
|
|
1166
1075
|
def _join_features(
|
1167
1076
|
self,
|
snowflake/ml/fileset/sfcfs.py
CHANGED
@@ -90,11 +90,12 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
90
90
|
) from e
|
91
91
|
|
92
92
|
if sf_connection:
|
93
|
-
self.
|
93
|
+
self._session = snowpark.Session.builder.config("connection", sf_connection).create()
|
94
94
|
elif snowpark_session:
|
95
|
-
self.
|
95
|
+
self._session = snowpark_session
|
96
96
|
else:
|
97
97
|
raise ValueError("Either sf_connection or snowpark_session has to be non-empty!")
|
98
|
+
self._conn = self._session._conn._conn # Telemetry wrappers expect connection under `conn_attr_name="_conn"``
|
98
99
|
self._kwargs = kwargs
|
99
100
|
self._stage_fs_set: Dict[Tuple[str, str, str], stage_fs.SFStageFileSystem] = {}
|
100
101
|
|
@@ -168,7 +169,7 @@ class SFFileSystem(fsspec.AbstractFileSystem):
|
|
168
169
|
stage_fs_key = (sf_file_path.database, sf_file_path.schema, sf_file_path.stage)
|
169
170
|
if stage_fs_key not in self._stage_fs_set:
|
170
171
|
cnt_stage_fs = stage_fs.SFStageFileSystem(
|
171
|
-
|
172
|
+
snowpark_session=self._session,
|
172
173
|
db=sf_file_path.database,
|
173
174
|
schema=sf_file_path.schema,
|
174
175
|
stage=sf_file_path.stage,
|