snowflake-ml-python 1.7.2__py3-none-any.whl → 1.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- snowflake/cortex/__init__.py +16 -8
- snowflake/cortex/_classify_text.py +12 -1
- snowflake/cortex/_complete.py +82 -13
- snowflake/cortex/_embed_text_1024.py +9 -2
- snowflake/cortex/_embed_text_768.py +9 -2
- snowflake/cortex/_extract_answer.py +9 -2
- snowflake/cortex/_sentiment.py +9 -2
- snowflake/cortex/_summarize.py +9 -2
- snowflake/cortex/_translate.py +9 -2
- snowflake/ml/_internal/env_utils.py +7 -52
- snowflake/ml/_internal/utils/identifier.py +4 -2
- snowflake/ml/data/__init__.py +3 -0
- snowflake/ml/data/_internal/arrow_ingestor.py +4 -4
- snowflake/ml/data/data_connector.py +53 -11
- snowflake/ml/data/data_ingestor.py +2 -1
- snowflake/ml/data/torch_utils.py +18 -5
- snowflake/ml/feature_store/examples/example_helper.py +2 -1
- snowflake/ml/fileset/fileset.py +18 -18
- snowflake/ml/model/_client/model/model_version_impl.py +5 -3
- snowflake/ml/model/_client/ops/model_ops.py +2 -6
- snowflake/ml/model/_client/sql/model_version.py +11 -0
- snowflake/ml/model/_model_composer/model_composer.py +8 -3
- snowflake/ml/model/_model_composer/model_manifest/model_manifest.py +20 -1
- snowflake/ml/model/_model_composer/model_manifest/model_manifest_schema.py +1 -0
- snowflake/ml/model/_model_composer/model_method/constants.py +1 -0
- snowflake/ml/model/_model_composer/model_method/function_generator.py +2 -0
- snowflake/ml/model/_model_composer/model_method/infer_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_partitioned.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/infer_table_function.py_template +1 -1
- snowflake/ml/model/_model_composer/model_method/model_method.py +9 -1
- snowflake/ml/model/_model_composer/model_user_file/model_user_file.py +27 -0
- snowflake/ml/model/_packager/model_handlers/_utils.py +27 -2
- snowflake/ml/model/_packager/model_handlers/catboost.py +3 -3
- snowflake/ml/model/_packager/model_handlers/huggingface_pipeline.py +5 -1
- snowflake/ml/model/_packager/model_handlers/lightgbm.py +5 -3
- snowflake/ml/model/_packager/model_handlers/sentence_transformers.py +55 -20
- snowflake/ml/model/_packager/model_handlers/sklearn.py +9 -10
- snowflake/ml/model/_packager/model_handlers/snowmlmodel.py +66 -28
- snowflake/ml/model/_packager/model_handlers/tensorflow.py +70 -17
- snowflake/ml/model/_packager/model_handlers/xgboost.py +3 -3
- snowflake/ml/model/_packager/model_meta/model_meta.py +3 -0
- snowflake/ml/model/_packager/model_meta/model_meta_schema.py +6 -1
- snowflake/ml/model/_packager/model_runtime/_snowml_inference_alternative_requirements.py +2 -2
- snowflake/ml/model/_packager/model_task/model_task_utils.py +3 -2
- snowflake/ml/model/_signatures/pandas_handler.py +1 -1
- snowflake/ml/model/_signatures/snowpark_handler.py +8 -2
- snowflake/ml/model/type_hints.py +1 -0
- snowflake/ml/modeling/_internal/model_trainer_builder.py +0 -8
- snowflake/ml/modeling/_internal/model_transformer_builder.py +0 -13
- snowflake/ml/modeling/pipeline/pipeline.py +6 -176
- snowflake/ml/modeling/xgboost/xgb_classifier.py +161 -88
- snowflake/ml/modeling/xgboost/xgb_regressor.py +160 -85
- snowflake/ml/modeling/xgboost/xgbrf_classifier.py +160 -85
- snowflake/ml/modeling/xgboost/xgbrf_regressor.py +160 -85
- snowflake/ml/monitoring/_client/model_monitor_sql_client.py +4 -4
- snowflake/ml/registry/_manager/model_manager.py +70 -33
- snowflake/ml/registry/registry.py +41 -22
- snowflake/ml/version.py +1 -1
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/METADATA +38 -9
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/RECORD +63 -67
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/WHEEL +1 -1
- snowflake/ml/_internal/utils/retryable_http.py +0 -39
- snowflake/ml/fileset/parquet_parser.py +0 -170
- snowflake/ml/fileset/tf_dataset.py +0 -88
- snowflake/ml/fileset/torch_datapipe.py +0 -57
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_handlers.py +0 -151
- snowflake/ml/modeling/_internal/ml_runtime_implementations/ml_runtime_trainer.py +0 -66
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/LICENSE.txt +0 -0
- {snowflake_ml_python-1.7.2.dist-info → snowflake_ml_python-1.7.3.dist-info}/top_level.txt +0 -0
snowflake/cortex/__init__.py
CHANGED
@@ -1,24 +1,32 @@
|
|
1
|
-
from snowflake.cortex._classify_text import ClassifyText
|
2
|
-
from snowflake.cortex._complete import Complete, CompleteOptions
|
3
|
-
from snowflake.cortex._embed_text_768 import EmbedText768
|
4
|
-
from snowflake.cortex._embed_text_1024 import EmbedText1024
|
5
|
-
from snowflake.cortex._extract_answer import ExtractAnswer
|
1
|
+
from snowflake.cortex._classify_text import ClassifyText, classify_text
|
2
|
+
from snowflake.cortex._complete import Complete, CompleteOptions, complete
|
3
|
+
from snowflake.cortex._embed_text_768 import EmbedText768, embed_text_768
|
4
|
+
from snowflake.cortex._embed_text_1024 import EmbedText1024, embed_text_1024
|
5
|
+
from snowflake.cortex._extract_answer import ExtractAnswer, extract_answer
|
6
6
|
from snowflake.cortex._finetune import Finetune, FinetuneJob, FinetuneStatus
|
7
|
-
from snowflake.cortex._sentiment import Sentiment
|
8
|
-
from snowflake.cortex._summarize import Summarize
|
9
|
-
from snowflake.cortex._translate import Translate
|
7
|
+
from snowflake.cortex._sentiment import Sentiment, sentiment
|
8
|
+
from snowflake.cortex._summarize import Summarize, summarize
|
9
|
+
from snowflake.cortex._translate import Translate, translate
|
10
10
|
|
11
11
|
__all__ = [
|
12
12
|
"ClassifyText",
|
13
|
+
"classify_text",
|
13
14
|
"Complete",
|
15
|
+
"complete",
|
14
16
|
"CompleteOptions",
|
15
17
|
"EmbedText768",
|
18
|
+
"embed_text_768",
|
16
19
|
"EmbedText1024",
|
20
|
+
"embed_text_1024",
|
17
21
|
"ExtractAnswer",
|
22
|
+
"extract_answer",
|
18
23
|
"Finetune",
|
19
24
|
"FinetuneJob",
|
20
25
|
"FinetuneStatus",
|
21
26
|
"Sentiment",
|
27
|
+
"sentiment",
|
22
28
|
"Summarize",
|
29
|
+
"summarize",
|
23
30
|
"Translate",
|
31
|
+
"translate",
|
24
32
|
]
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from typing import List, Optional, Union, cast
|
2
2
|
|
3
|
+
from typing_extensions import deprecated
|
4
|
+
|
3
5
|
from snowflake import snowpark
|
4
6
|
from snowflake.cortex._util import CORTEX_FUNCTIONS_TELEMETRY_PROJECT, call_sql_function
|
5
7
|
from snowflake.ml._internal import telemetry
|
@@ -8,7 +10,7 @@ from snowflake.ml._internal import telemetry
|
|
8
10
|
@telemetry.send_api_usage_telemetry(
|
9
11
|
project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT,
|
10
12
|
)
|
11
|
-
def
|
13
|
+
def classify_text(
|
12
14
|
str_input: Union[str, snowpark.Column],
|
13
15
|
categories: Union[List[str], snowpark.Column],
|
14
16
|
session: Optional[snowpark.Session] = None,
|
@@ -34,3 +36,12 @@ def _classify_text_impl(
|
|
34
36
|
session: Optional[snowpark.Session] = None,
|
35
37
|
) -> Union[str, snowpark.Column]:
|
36
38
|
return cast(Union[str, snowpark.Column], call_sql_function(function, session, str_input, categories))
|
39
|
+
|
40
|
+
|
41
|
+
ClassifyText = deprecated(
|
42
|
+
"ClassifyText() is deprecated and will be removed in a future release. Please use classify_text() instead."
|
43
|
+
)(
|
44
|
+
telemetry.send_api_usage_telemetry(
|
45
|
+
project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT,
|
46
|
+
)(classify_text)
|
47
|
+
)
|
snowflake/cortex/_complete.py
CHANGED
@@ -6,7 +6,7 @@ from typing import Any, Callable, Dict, Iterator, List, Optional, TypedDict, Uni
|
|
6
6
|
from urllib.parse import urlunparse
|
7
7
|
|
8
8
|
import requests
|
9
|
-
from typing_extensions import NotRequired
|
9
|
+
from typing_extensions import NotRequired, deprecated
|
10
10
|
|
11
11
|
from snowflake import snowpark
|
12
12
|
from snowflake.cortex._sse_client import SSEClient
|
@@ -127,8 +127,26 @@ def _xp_dict_to_response(raw_resp: Dict[str, Any]) -> requests.Response:
|
|
127
127
|
response.status_code = int(raw_resp["status"])
|
128
128
|
response.headers = raw_resp["headers"]
|
129
129
|
|
130
|
+
request_id = None
|
131
|
+
for key, value in raw_resp["headers"].items():
|
132
|
+
# Note: there is some whitespace in the headers making it not possible
|
133
|
+
# to directly index the header reliably.
|
134
|
+
if key.strip().lower() == "x-snowflake-request-id":
|
135
|
+
request_id = value
|
136
|
+
break
|
137
|
+
|
130
138
|
data = raw_resp["content"]
|
131
|
-
|
139
|
+
try:
|
140
|
+
data = json.loads(data)
|
141
|
+
except json.JSONDecodeError:
|
142
|
+
raise ValueError(f"Request failed (request id: {request_id})")
|
143
|
+
|
144
|
+
if response.status_code < 200 or response.status_code >= 300:
|
145
|
+
if "message" not in data:
|
146
|
+
raise ValueError(f"Request failed (request id: {request_id})")
|
147
|
+
message = data["message"]
|
148
|
+
raise ValueError(f"Request failed: {message} (request id: {request_id})")
|
149
|
+
|
132
150
|
# Convert the dictionary to a string format that resembles the SSE event format
|
133
151
|
# For example, if the dict is {'event': 'message', 'data': 'your data'}, it should be formatted like this:
|
134
152
|
sse_format_data = ""
|
@@ -144,6 +162,7 @@ def _xp_dict_to_response(raw_resp: Dict[str, Any]) -> requests.Response:
|
|
144
162
|
|
145
163
|
@retry
|
146
164
|
def _call_complete_xp(
|
165
|
+
snow_api_xp_request_handler: Optional[Callable[..., Dict[str, Any]]],
|
147
166
|
model: str,
|
148
167
|
prompt: Union[str, List[ConversationMessage]],
|
149
168
|
options: Optional[CompleteOptions] = None,
|
@@ -151,9 +170,8 @@ def _call_complete_xp(
|
|
151
170
|
) -> requests.Response:
|
152
171
|
headers = _make_common_request_headers()
|
153
172
|
body = _make_request_body(model, prompt, options)
|
154
|
-
|
155
|
-
|
156
|
-
raw_resp = _snowflake.send_snow_api_request("POST", _REST_COMPLETE_URL, {}, headers, body, {}, deadline)
|
173
|
+
assert snow_api_xp_request_handler is not None
|
174
|
+
raw_resp = snow_api_xp_request_handler("POST", _REST_COMPLETE_URL, {}, headers, body, {}, deadline)
|
157
175
|
return _xp_dict_to_response(raw_resp)
|
158
176
|
|
159
177
|
|
@@ -218,17 +236,26 @@ def _complete_call_sql_function_snowpark(
|
|
218
236
|
|
219
237
|
|
220
238
|
def _complete_non_streaming_immediate(
|
239
|
+
snow_api_xp_request_handler: Optional[Callable[..., Dict[str, Any]]],
|
221
240
|
model: str,
|
222
241
|
prompt: Union[str, List[ConversationMessage]],
|
223
242
|
options: Optional[CompleteOptions],
|
224
243
|
session: Optional[snowpark.Session] = None,
|
225
244
|
deadline: Optional[float] = None,
|
226
245
|
) -> str:
|
227
|
-
response = _complete_rest(
|
246
|
+
response = _complete_rest(
|
247
|
+
snow_api_xp_request_handler=snow_api_xp_request_handler,
|
248
|
+
model=model,
|
249
|
+
prompt=prompt,
|
250
|
+
options=options,
|
251
|
+
session=session,
|
252
|
+
deadline=deadline,
|
253
|
+
)
|
228
254
|
return "".join(response)
|
229
255
|
|
230
256
|
|
231
257
|
def _complete_non_streaming_impl(
|
258
|
+
snow_api_xp_request_handler: Optional[Callable[..., Dict[str, Any]]],
|
232
259
|
function: str,
|
233
260
|
model: Union[str, snowpark.Column],
|
234
261
|
prompt: Union[str, List[ConversationMessage], snowpark.Column],
|
@@ -246,19 +273,31 @@ def _complete_non_streaming_impl(
|
|
246
273
|
if isinstance(options, snowpark.Column):
|
247
274
|
raise ValueError("'options' cannot be a snowpark.Column when 'prompt' is a string.")
|
248
275
|
return _complete_non_streaming_immediate(
|
249
|
-
|
276
|
+
snow_api_xp_request_handler=snow_api_xp_request_handler,
|
277
|
+
model=model,
|
278
|
+
prompt=prompt,
|
279
|
+
options=options,
|
280
|
+
session=session,
|
281
|
+
deadline=deadline,
|
250
282
|
)
|
251
283
|
|
252
284
|
|
253
285
|
def _complete_rest(
|
286
|
+
snow_api_xp_request_handler: Optional[Callable[..., Dict[str, Any]]],
|
254
287
|
model: str,
|
255
288
|
prompt: Union[str, List[ConversationMessage]],
|
256
289
|
options: Optional[CompleteOptions] = None,
|
257
290
|
session: Optional[snowpark.Session] = None,
|
258
291
|
deadline: Optional[float] = None,
|
259
292
|
) -> Iterator[str]:
|
260
|
-
if
|
261
|
-
response = _call_complete_xp(
|
293
|
+
if snow_api_xp_request_handler is not None:
|
294
|
+
response = _call_complete_xp(
|
295
|
+
snow_api_xp_request_handler=snow_api_xp_request_handler,
|
296
|
+
model=model,
|
297
|
+
prompt=prompt,
|
298
|
+
options=options,
|
299
|
+
deadline=deadline,
|
300
|
+
)
|
262
301
|
else:
|
263
302
|
response = _call_complete_rest(model=model, prompt=prompt, options=options, session=session, deadline=deadline)
|
264
303
|
assert response.status_code >= 200 and response.status_code < 300
|
@@ -268,10 +307,11 @@ def _complete_rest(
|
|
268
307
|
def _complete_impl(
|
269
308
|
model: Union[str, snowpark.Column],
|
270
309
|
prompt: Union[str, List[ConversationMessage], snowpark.Column],
|
310
|
+
snow_api_xp_request_handler: Optional[Callable[..., Dict[str, Any]]] = None,
|
311
|
+
function: str = "snowflake.cortex.complete",
|
271
312
|
options: Optional[CompleteOptions] = None,
|
272
313
|
session: Optional[snowpark.Session] = None,
|
273
314
|
stream: bool = False,
|
274
|
-
function: str = "snowflake.cortex.complete",
|
275
315
|
timeout: Optional[float] = None,
|
276
316
|
deadline: Optional[float] = None,
|
277
317
|
) -> Union[str, Iterator[str], snowpark.Column]:
|
@@ -284,14 +324,29 @@ def _complete_impl(
|
|
284
324
|
raise ValueError("in REST mode, 'model' must be a string")
|
285
325
|
if not isinstance(prompt, str) and not isinstance(prompt, List):
|
286
326
|
raise ValueError("in REST mode, 'prompt' must be a string or a list of ConversationMessage")
|
287
|
-
return _complete_rest(
|
288
|
-
|
327
|
+
return _complete_rest(
|
328
|
+
snow_api_xp_request_handler=snow_api_xp_request_handler,
|
329
|
+
model=model,
|
330
|
+
prompt=prompt,
|
331
|
+
options=options,
|
332
|
+
session=session,
|
333
|
+
deadline=deadline,
|
334
|
+
)
|
335
|
+
return _complete_non_streaming_impl(
|
336
|
+
snow_api_xp_request_handler=snow_api_xp_request_handler,
|
337
|
+
function=function,
|
338
|
+
model=model,
|
339
|
+
prompt=prompt,
|
340
|
+
options=options,
|
341
|
+
session=session,
|
342
|
+
deadline=deadline,
|
343
|
+
)
|
289
344
|
|
290
345
|
|
291
346
|
@telemetry.send_api_usage_telemetry(
|
292
347
|
project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT,
|
293
348
|
)
|
294
|
-
def
|
349
|
+
def complete(
|
295
350
|
model: Union[str, snowpark.Column],
|
296
351
|
prompt: Union[str, List[ConversationMessage], snowpark.Column],
|
297
352
|
*,
|
@@ -319,10 +374,19 @@ def Complete(
|
|
319
374
|
Returns:
|
320
375
|
A column of string responses.
|
321
376
|
"""
|
377
|
+
|
378
|
+
# Set the XP snow api function, if available.
|
379
|
+
snow_api_xp_request_handler = None
|
380
|
+
if is_in_stored_procedure(): # type: ignore[no-untyped-call]
|
381
|
+
import _snowflake
|
382
|
+
|
383
|
+
snow_api_xp_request_handler = _snowflake.send_snow_api_request
|
384
|
+
|
322
385
|
try:
|
323
386
|
return _complete_impl(
|
324
387
|
model,
|
325
388
|
prompt,
|
389
|
+
snow_api_xp_request_handler=snow_api_xp_request_handler,
|
326
390
|
options=options,
|
327
391
|
session=session,
|
328
392
|
stream=stream,
|
@@ -331,3 +395,8 @@ def Complete(
|
|
331
395
|
)
|
332
396
|
except ValueError as err:
|
333
397
|
raise err
|
398
|
+
|
399
|
+
|
400
|
+
Complete = deprecated("Complete() is deprecated and will be removed in a future release. Use complete() instead")(
|
401
|
+
telemetry.send_api_usage_telemetry(project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT)(complete)
|
402
|
+
)
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from typing import List, Optional, Union, cast
|
2
2
|
|
3
|
+
from typing_extensions import deprecated
|
4
|
+
|
3
5
|
from snowflake import snowpark
|
4
6
|
from snowflake.cortex._util import CORTEX_FUNCTIONS_TELEMETRY_PROJECT, call_sql_function
|
5
7
|
from snowflake.ml._internal import telemetry
|
@@ -8,12 +10,12 @@ from snowflake.ml._internal import telemetry
|
|
8
10
|
@telemetry.send_api_usage_telemetry(
|
9
11
|
project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT,
|
10
12
|
)
|
11
|
-
def
|
13
|
+
def embed_text_1024(
|
12
14
|
model: Union[str, snowpark.Column],
|
13
15
|
text: Union[str, snowpark.Column],
|
14
16
|
session: Optional[snowpark.Session] = None,
|
15
17
|
) -> Union[List[float], snowpark.Column]:
|
16
|
-
"""
|
18
|
+
"""Calls into the LLM inference service to embed the text.
|
17
19
|
|
18
20
|
Args:
|
19
21
|
model: A Column of strings representing the model to use for embedding. The value
|
@@ -35,3 +37,8 @@ def _embed_text_1024_impl(
|
|
35
37
|
session: Optional[snowpark.Session] = None,
|
36
38
|
) -> Union[List[float], snowpark.Column]:
|
37
39
|
return cast(Union[List[float], snowpark.Column], call_sql_function(function, session, model, text))
|
40
|
+
|
41
|
+
|
42
|
+
EmbedText1024 = deprecated(
|
43
|
+
"EmbedText1024() is deprecated and will be removed in a future release. Use embed_text_1024() instead"
|
44
|
+
)(telemetry.send_api_usage_telemetry(project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT)(embed_text_1024))
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from typing import List, Optional, Union, cast
|
2
2
|
|
3
|
+
from typing_extensions import deprecated
|
4
|
+
|
3
5
|
from snowflake import snowpark
|
4
6
|
from snowflake.cortex._util import CORTEX_FUNCTIONS_TELEMETRY_PROJECT, call_sql_function
|
5
7
|
from snowflake.ml._internal import telemetry
|
@@ -8,12 +10,12 @@ from snowflake.ml._internal import telemetry
|
|
8
10
|
@telemetry.send_api_usage_telemetry(
|
9
11
|
project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT,
|
10
12
|
)
|
11
|
-
def
|
13
|
+
def embed_text_768(
|
12
14
|
model: Union[str, snowpark.Column],
|
13
15
|
text: Union[str, snowpark.Column],
|
14
16
|
session: Optional[snowpark.Session] = None,
|
15
17
|
) -> Union[List[float], snowpark.Column]:
|
16
|
-
"""
|
18
|
+
"""Calls into the LLM inference service to embed the text.
|
17
19
|
|
18
20
|
Args:
|
19
21
|
model: A Column of strings representing the model to use for embedding. The value
|
@@ -35,3 +37,8 @@ def _embed_text_768_impl(
|
|
35
37
|
session: Optional[snowpark.Session] = None,
|
36
38
|
) -> Union[List[float], snowpark.Column]:
|
37
39
|
return cast(Union[List[float], snowpark.Column], call_sql_function(function, session, model, text))
|
40
|
+
|
41
|
+
|
42
|
+
EmbedText768 = deprecated(
|
43
|
+
"EmbedText768() is deprecated and will be removed in a future release. Use embed_text_768() instead"
|
44
|
+
)(telemetry.send_api_usage_telemetry(project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT)(embed_text_768))
|
@@ -1,5 +1,7 @@
|
|
1
1
|
from typing import Optional, Union, cast
|
2
2
|
|
3
|
+
from typing_extensions import deprecated
|
4
|
+
|
3
5
|
from snowflake import snowpark
|
4
6
|
from snowflake.cortex._util import CORTEX_FUNCTIONS_TELEMETRY_PROJECT, call_sql_function
|
5
7
|
from snowflake.ml._internal import telemetry
|
@@ -8,12 +10,12 @@ from snowflake.ml._internal import telemetry
|
|
8
10
|
@telemetry.send_api_usage_telemetry(
|
9
11
|
project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT,
|
10
12
|
)
|
11
|
-
def
|
13
|
+
def extract_answer(
|
12
14
|
from_text: Union[str, snowpark.Column],
|
13
15
|
question: Union[str, snowpark.Column],
|
14
16
|
session: Optional[snowpark.Session] = None,
|
15
17
|
) -> Union[str, snowpark.Column]:
|
16
|
-
"""
|
18
|
+
"""Calls into the LLM inference service to extract an answer from within specified text.
|
17
19
|
|
18
20
|
Args:
|
19
21
|
from_text: A Column of strings representing input text.
|
@@ -34,3 +36,8 @@ def _extract_answer_impl(
|
|
34
36
|
session: Optional[snowpark.Session] = None,
|
35
37
|
) -> Union[str, snowpark.Column]:
|
36
38
|
return cast(Union[str, snowpark.Column], call_sql_function(function, session, from_text, question))
|
39
|
+
|
40
|
+
|
41
|
+
ExtractAnswer = deprecated(
|
42
|
+
"ExtractAnswer() is deprecated and will be removed in a future release. Use extract_answer() instead"
|
43
|
+
)(telemetry.send_api_usage_telemetry(project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT)(extract_answer))
|
snowflake/cortex/_sentiment.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
from typing import Optional, Union, cast
|
2
2
|
|
3
|
+
from typing_extensions import deprecated
|
4
|
+
|
3
5
|
from snowflake import snowpark
|
4
6
|
from snowflake.cortex._util import CORTEX_FUNCTIONS_TELEMETRY_PROJECT, call_sql_function
|
5
7
|
from snowflake.ml._internal import telemetry
|
@@ -8,10 +10,10 @@ from snowflake.ml._internal import telemetry
|
|
8
10
|
@telemetry.send_api_usage_telemetry(
|
9
11
|
project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT,
|
10
12
|
)
|
11
|
-
def
|
13
|
+
def sentiment(
|
12
14
|
text: Union[str, snowpark.Column], session: Optional[snowpark.Session] = None
|
13
15
|
) -> Union[float, snowpark.Column]:
|
14
|
-
"""
|
16
|
+
"""Calls into the LLM inference service to perform sentiment analysis on the input text.
|
15
17
|
|
16
18
|
Args:
|
17
19
|
text: A Column of text strings to send to the LLM.
|
@@ -31,3 +33,8 @@ def _sentiment_impl(
|
|
31
33
|
if isinstance(output, snowpark.Column):
|
32
34
|
return output
|
33
35
|
return float(cast(str, output))
|
36
|
+
|
37
|
+
|
38
|
+
Sentiment = deprecated("Sentiment() is deprecated and will be removed in a future release. Use sentiment() instead")(
|
39
|
+
telemetry.send_api_usage_telemetry(project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT)(sentiment)
|
40
|
+
)
|
snowflake/cortex/_summarize.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
from typing import Optional, Union, cast
|
2
2
|
|
3
|
+
from typing_extensions import deprecated
|
4
|
+
|
3
5
|
from snowflake import snowpark
|
4
6
|
from snowflake.cortex._util import CORTEX_FUNCTIONS_TELEMETRY_PROJECT, call_sql_function
|
5
7
|
from snowflake.ml._internal import telemetry
|
@@ -8,11 +10,11 @@ from snowflake.ml._internal import telemetry
|
|
8
10
|
@telemetry.send_api_usage_telemetry(
|
9
11
|
project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT,
|
10
12
|
)
|
11
|
-
def
|
13
|
+
def summarize(
|
12
14
|
text: Union[str, snowpark.Column],
|
13
15
|
session: Optional[snowpark.Session] = None,
|
14
16
|
) -> Union[str, snowpark.Column]:
|
15
|
-
"""
|
17
|
+
"""Calls into the LLM inference service to summarize the input text.
|
16
18
|
|
17
19
|
Args:
|
18
20
|
text: A Column of strings to summarize.
|
@@ -31,3 +33,8 @@ def _summarize_impl(
|
|
31
33
|
session: Optional[snowpark.Session] = None,
|
32
34
|
) -> Union[str, snowpark.Column]:
|
33
35
|
return cast(Union[str, snowpark.Column], call_sql_function(function, session, text))
|
36
|
+
|
37
|
+
|
38
|
+
Summarize = deprecated("Summarize() is deprecated and will be removed in a future release. Use summarize() instead")(
|
39
|
+
telemetry.send_api_usage_telemetry(project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT)(summarize)
|
40
|
+
)
|
snowflake/cortex/_translate.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
from typing import Optional, Union, cast
|
2
2
|
|
3
|
+
from typing_extensions import deprecated
|
4
|
+
|
3
5
|
from snowflake import snowpark
|
4
6
|
from snowflake.cortex._util import CORTEX_FUNCTIONS_TELEMETRY_PROJECT, call_sql_function
|
5
7
|
from snowflake.ml._internal import telemetry
|
@@ -8,13 +10,13 @@ from snowflake.ml._internal import telemetry
|
|
8
10
|
@telemetry.send_api_usage_telemetry(
|
9
11
|
project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT,
|
10
12
|
)
|
11
|
-
def
|
13
|
+
def translate(
|
12
14
|
text: Union[str, snowpark.Column],
|
13
15
|
from_language: Union[str, snowpark.Column],
|
14
16
|
to_language: Union[str, snowpark.Column],
|
15
17
|
session: Optional[snowpark.Session] = None,
|
16
18
|
) -> Union[str, snowpark.Column]:
|
17
|
-
"""
|
19
|
+
"""Calls into the LLM inference service to perform translation.
|
18
20
|
|
19
21
|
Args:
|
20
22
|
text: A Column of strings to translate.
|
@@ -37,3 +39,8 @@ def _translate_impl(
|
|
37
39
|
session: Optional[snowpark.Session] = None,
|
38
40
|
) -> Union[str, snowpark.Column]:
|
39
41
|
return cast(Union[str, snowpark.Column], call_sql_function(function, session, text, from_language, to_language))
|
42
|
+
|
43
|
+
|
44
|
+
Translate = deprecated("Translate() is deprecated and will be removed in a future release. Use translate() instead")(
|
45
|
+
telemetry.send_api_usage_telemetry(project=CORTEX_FUNCTIONS_TELEMETRY_PROJECT)(translate)
|
46
|
+
)
|
@@ -15,7 +15,6 @@ import snowflake.connector
|
|
15
15
|
from snowflake.ml._internal import env as snowml_env
|
16
16
|
from snowflake.ml._internal.utils import query_result_checker
|
17
17
|
from snowflake.snowpark import context, exceptions, session
|
18
|
-
from snowflake.snowpark._internal import utils as snowpark_utils
|
19
18
|
|
20
19
|
|
21
20
|
class CONDA_OS(Enum):
|
@@ -344,55 +343,6 @@ def relax_requirement_version(req: requirements.Requirement) -> requirements.Req
|
|
344
343
|
return new_req
|
345
344
|
|
346
345
|
|
347
|
-
def get_matched_package_versions_in_snowflake_conda_channel(
|
348
|
-
req: requirements.Requirement,
|
349
|
-
python_version: str = snowml_env.PYTHON_VERSION,
|
350
|
-
conda_os: CONDA_OS = CONDA_OS.LINUX_64,
|
351
|
-
) -> List[version.Version]:
|
352
|
-
"""Search the snowflake anaconda channel for packages that matches the specifier. Note that this will be the
|
353
|
-
source of truth for checking whether a package indeed exists in Snowflake conda channel.
|
354
|
-
|
355
|
-
Given that a package comes in different architectures, we only check for the Linux x86_64 architecture and assume
|
356
|
-
the package exists in other architectures. If such an assumption does not hold true for a certain package, the
|
357
|
-
caller should specify the architecture to search for.
|
358
|
-
|
359
|
-
Args:
|
360
|
-
req: Requirement specifier.
|
361
|
-
python_version: A string of python version where model is run.
|
362
|
-
conda_os: Specified platform to search availability of the package.
|
363
|
-
|
364
|
-
Returns:
|
365
|
-
List of package versions that meet the requirement specifier.
|
366
|
-
"""
|
367
|
-
# Move the retryable_http import here as when UDF import this file, it won't have the "requests" dependency.
|
368
|
-
from snowflake.ml._internal.utils import retryable_http
|
369
|
-
|
370
|
-
assert not snowpark_utils.is_in_stored_procedure() # type: ignore[no-untyped-call]
|
371
|
-
|
372
|
-
url = f"{SNOWFLAKE_CONDA_CHANNEL_URL}/{conda_os.value}/repodata.json"
|
373
|
-
|
374
|
-
if req.name not in _SNOWFLAKE_CONDA_PACKAGE_CACHE:
|
375
|
-
try:
|
376
|
-
http_client = retryable_http.get_http_client()
|
377
|
-
parsed_python_version = version.Version(python_version)
|
378
|
-
python_version_build_str = f"py{parsed_python_version.major}{parsed_python_version.minor}"
|
379
|
-
repodata = http_client.get(url).json()
|
380
|
-
assert isinstance(repodata, dict)
|
381
|
-
packages_info = repodata["packages"]
|
382
|
-
assert isinstance(packages_info, dict)
|
383
|
-
version_list = [
|
384
|
-
version.parse(package_info["version"])
|
385
|
-
for package_info in packages_info.values()
|
386
|
-
if package_info["name"] == req.name and python_version_build_str in package_info["build"]
|
387
|
-
]
|
388
|
-
_SNOWFLAKE_CONDA_PACKAGE_CACHE[req.name] = version_list
|
389
|
-
except Exception:
|
390
|
-
pass
|
391
|
-
|
392
|
-
matched_versions = list(req.specifier.filter(set(_SNOWFLAKE_CONDA_PACKAGE_CACHE.get(req.name, []))))
|
393
|
-
return matched_versions
|
394
|
-
|
395
|
-
|
396
346
|
def get_matched_package_versions_in_information_schema_with_active_session(
|
397
347
|
reqs: List[requirements.Requirement], python_version: str
|
398
348
|
) -> Dict[str, List[version.Version]]:
|
@@ -404,7 +354,10 @@ def get_matched_package_versions_in_information_schema_with_active_session(
|
|
404
354
|
|
405
355
|
|
406
356
|
def get_matched_package_versions_in_information_schema(
|
407
|
-
session: session.Session,
|
357
|
+
session: session.Session,
|
358
|
+
reqs: List[requirements.Requirement],
|
359
|
+
python_version: str,
|
360
|
+
statement_params: Optional[Dict[str, Any]] = None,
|
408
361
|
) -> Dict[str, List[version.Version]]:
|
409
362
|
"""Look up the information_schema table to check if a package with the specified specifier exists in the Snowflake
|
410
363
|
Conda channel. Note that this is not the source of truth due to the potential delay caused by a package that might
|
@@ -414,6 +367,7 @@ def get_matched_package_versions_in_information_schema(
|
|
414
367
|
session: Snowflake connection session.
|
415
368
|
reqs: List of requirement specifiers.
|
416
369
|
python_version: A string of python version where model is run.
|
370
|
+
statement_params: Optional statement parameters.
|
417
371
|
|
418
372
|
Returns:
|
419
373
|
A Dict, whose key is the package name, and value is a list of versions match the requirements.
|
@@ -451,8 +405,9 @@ def get_matched_package_versions_in_information_schema(
|
|
451
405
|
query_result_checker.SqlResultValidator(
|
452
406
|
session=session,
|
453
407
|
query=sql,
|
408
|
+
statement_params=statement_params,
|
454
409
|
)
|
455
|
-
.has_column("VERSION")
|
410
|
+
.has_column("VERSION", allow_empty=True)
|
456
411
|
.has_dimensions(expected_rows=None, expected_cols=2)
|
457
412
|
.validate()
|
458
413
|
)
|
@@ -158,8 +158,10 @@ def parse_schema_level_object_identifier(
|
|
158
158
|
res = _SF_SCHEMA_LEVEL_OBJECT_RE.fullmatch(object_name)
|
159
159
|
if not res:
|
160
160
|
raise ValueError(
|
161
|
-
"Invalid
|
162
|
-
|
161
|
+
f"Invalid object name `{object_name}` cannot be parsed as a SQL identifier. "
|
162
|
+
"Alphanumeric characters and underscores are permitted. "
|
163
|
+
"See https://docs.snowflake.com/en/sql-reference/identifiers-syntax for "
|
164
|
+
"more information."
|
163
165
|
)
|
164
166
|
return (
|
165
167
|
res.group("db"),
|
snowflake/ml/data/__init__.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
+
from pkgutil import extend_path
|
2
|
+
|
1
3
|
from .data_connector import DataConnector
|
2
4
|
from .data_ingestor import DataIngestor, DataIngestorType
|
3
5
|
from .data_source import DataFrameInfo, DatasetInfo, DataSource
|
4
6
|
|
5
7
|
__all__ = ["DataConnector", "DataSource", "DataFrameInfo", "DatasetInfo", "DataIngestor", "DataIngestorType"]
|
8
|
+
__path__ = extend_path(__path__, __name__)
|
@@ -2,7 +2,7 @@ import collections
|
|
2
2
|
import logging
|
3
3
|
import os
|
4
4
|
import time
|
5
|
-
from typing import Any, Deque, Dict, Iterator, List, Optional, Union
|
5
|
+
from typing import Any, Deque, Dict, Iterator, List, Optional, Sequence, Union
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import numpy.typing as npt
|
@@ -47,7 +47,7 @@ class ArrowIngestor(data_ingestor.DataIngestor):
|
|
47
47
|
def __init__(
|
48
48
|
self,
|
49
49
|
session: snowpark.Session,
|
50
|
-
data_sources:
|
50
|
+
data_sources: Sequence[data_source.DataSource],
|
51
51
|
format: Optional[str] = None,
|
52
52
|
**kwargs: Any,
|
53
53
|
) -> None:
|
@@ -60,14 +60,14 @@ class ArrowIngestor(data_ingestor.DataIngestor):
|
|
60
60
|
kwargs: Miscellaneous arguments passed to underlying PyArrow Dataset initializer.
|
61
61
|
"""
|
62
62
|
self._session = session
|
63
|
-
self._data_sources = data_sources
|
63
|
+
self._data_sources = list(data_sources)
|
64
64
|
self._format = format
|
65
65
|
self._kwargs = kwargs
|
66
66
|
|
67
67
|
self._schema: Optional[pa.Schema] = None
|
68
68
|
|
69
69
|
@classmethod
|
70
|
-
def from_sources(cls, session: snowpark.Session, sources:
|
70
|
+
def from_sources(cls, session: snowpark.Session, sources: Sequence[data_source.DataSource]) -> "ArrowIngestor":
|
71
71
|
return cls(session, sources)
|
72
72
|
|
73
73
|
@property
|