snowpark-connect 0.30.1__py3-none-any.whl → 0.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +200 -102
- snowflake/snowpark_connect/column_qualifier.py +47 -0
- snowflake/snowpark_connect/config.py +51 -16
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
- snowflake/snowpark_connect/error/error_codes.py +50 -0
- snowflake/snowpark_connect/error/error_utils.py +142 -22
- snowflake/snowpark_connect/error/exceptions.py +13 -4
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +9 -3
- snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
- snowflake/snowpark_connect/execute_plan/utils.py +5 -1
- snowflake/snowpark_connect/expression/function_defaults.py +9 -2
- snowflake/snowpark_connect/expression/literal.py +7 -1
- snowflake/snowpark_connect/expression/map_cast.py +17 -5
- snowflake/snowpark_connect/expression/map_expression.py +53 -8
- snowflake/snowpark_connect/expression/map_extension.py +37 -11
- snowflake/snowpark_connect/expression/map_sql_expression.py +102 -32
- snowflake/snowpark_connect/expression/map_udf.py +10 -2
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +38 -14
- snowflake/snowpark_connect/expression/map_unresolved_function.py +1476 -292
- snowflake/snowpark_connect/expression/map_unresolved_star.py +14 -8
- snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
- snowflake/snowpark_connect/expression/map_window_function.py +18 -3
- snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +38 -13
- snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
- snowflake/snowpark_connect/relation/io_utils.py +6 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +8 -5
- snowflake/snowpark_connect/relation/map_catalog.py +5 -1
- snowflake/snowpark_connect/relation/map_column_ops.py +92 -59
- snowflake/snowpark_connect/relation/map_extension.py +38 -17
- snowflake/snowpark_connect/relation/map_join.py +26 -12
- snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
- snowflake/snowpark_connect/relation/map_relation.py +33 -7
- snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
- snowflake/snowpark_connect/relation/map_sql.py +124 -25
- snowflake/snowpark_connect/relation/map_stats.py +5 -1
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/map_udtf.py +14 -4
- snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
- snowflake/snowpark_connect/relation/read/map_read.py +15 -3
- snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
- snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
- snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
- snowflake/snowpark_connect/relation/read/map_read_table.py +21 -8
- snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
- snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
- snowflake/snowpark_connect/relation/stage_locator.py +5 -1
- snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
- snowflake/snowpark_connect/relation/write/map_write.py +160 -48
- snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
- snowflake/snowpark_connect/resources_initializer.py +5 -1
- snowflake/snowpark_connect/server.py +73 -21
- snowflake/snowpark_connect/type_mapping.py +90 -20
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/context.py +42 -1
- snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
- snowflake/snowpark_connect/utils/env_utils.py +5 -1
- snowflake/snowpark_connect/utils/identifiers.py +11 -3
- snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
- snowflake/snowpark_connect/utils/profiling.py +25 -8
- snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
- snowflake/snowpark_connect/utils/session.py +24 -4
- snowflake/snowpark_connect/utils/telemetry.py +6 -0
- snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
- snowflake/snowpark_connect/utils/udf_cache.py +5 -3
- snowflake/snowpark_connect/utils/udf_helper.py +20 -6
- snowflake/snowpark_connect/utils/udf_utils.py +4 -4
- snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
- snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/METADATA +7 -3
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/RECORD +85 -85
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.30.1.data → snowpark_connect-0.32.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.32.0.dist-info}/top_level.txt +0 -0
|
@@ -38,6 +38,8 @@ from snowflake.snowpark.types import (
|
|
|
38
38
|
TimeType,
|
|
39
39
|
_NumericType,
|
|
40
40
|
)
|
|
41
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
42
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
41
43
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
42
44
|
DATA_SOURCE_SQL_COMMENT,
|
|
43
45
|
Connection,
|
|
@@ -147,9 +149,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
147
149
|
or upper_bound is not None
|
|
148
150
|
or num_partitions is not None
|
|
149
151
|
):
|
|
150
|
-
|
|
152
|
+
exception = ValueError(
|
|
151
153
|
"when column is not specified, lower_bound, upper_bound, num_partitions are expected to be None"
|
|
152
154
|
)
|
|
155
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
156
|
+
raise exception
|
|
153
157
|
if table is not None:
|
|
154
158
|
partitioned_queries = []
|
|
155
159
|
table_query = f"SELECT * FROM {table}"
|
|
@@ -160,24 +164,32 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
160
164
|
elif query is not None:
|
|
161
165
|
partitioned_queries = [query]
|
|
162
166
|
else:
|
|
163
|
-
|
|
167
|
+
exception = ValueError("table or query is not specified")
|
|
168
|
+
attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
|
|
169
|
+
raise exception
|
|
164
170
|
else:
|
|
165
171
|
if lower_bound is None or upper_bound is None or num_partitions is None:
|
|
166
|
-
|
|
172
|
+
exception = ValueError(
|
|
167
173
|
"when column is specified, lower_bound, upper_bound, num_partitions must be specified"
|
|
168
174
|
)
|
|
175
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
176
|
+
raise exception
|
|
169
177
|
|
|
170
178
|
column_type = None
|
|
171
179
|
for field in struct_schema.fields:
|
|
172
180
|
if field.name.lower() == column.lower():
|
|
173
181
|
column_type = field.datatype
|
|
174
182
|
if column_type is None:
|
|
175
|
-
|
|
183
|
+
exception = ValueError("Column does not exist")
|
|
184
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
185
|
+
raise exception
|
|
176
186
|
|
|
177
187
|
if not isinstance(column_type, _NumericType) and not isinstance(
|
|
178
188
|
column_type, DateType
|
|
179
189
|
):
|
|
180
|
-
|
|
190
|
+
exception = ValueError(f"unsupported type {column_type}")
|
|
191
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
192
|
+
raise exception
|
|
181
193
|
spark_column_name = f'"{column}"'
|
|
182
194
|
partitioned_queries = self._generate_partition(
|
|
183
195
|
table,
|
|
@@ -240,7 +252,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
240
252
|
)
|
|
241
253
|
query_thread_executor.shutdown(wait=False)
|
|
242
254
|
upload_thread_executor.shutdown(wait=False)
|
|
243
|
-
|
|
255
|
+
exception = future.result()
|
|
256
|
+
attach_custom_error_code(
|
|
257
|
+
exception, ErrorCodes.INTERNAL_ERROR
|
|
258
|
+
)
|
|
259
|
+
raise exception
|
|
244
260
|
else:
|
|
245
261
|
path = future.result()
|
|
246
262
|
if not path:
|
|
@@ -266,7 +282,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
266
282
|
)
|
|
267
283
|
query_thread_executor.shutdown(wait=False)
|
|
268
284
|
upload_thread_executor.shutdown(wait=False)
|
|
269
|
-
|
|
285
|
+
exception = f.result()
|
|
286
|
+
attach_custom_error_code(
|
|
287
|
+
exception, ErrorCodes.INTERNAL_ERROR
|
|
288
|
+
)
|
|
289
|
+
raise exception
|
|
270
290
|
finally:
|
|
271
291
|
close_connection(conn)
|
|
272
292
|
|
|
@@ -283,7 +303,9 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
283
303
|
elif query is not None:
|
|
284
304
|
sql = f"SELECT * FROM ({query}) WHERE 1=0"
|
|
285
305
|
else:
|
|
286
|
-
|
|
306
|
+
exception = ValueError("table or query is not specified")
|
|
307
|
+
attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
|
|
308
|
+
raise exception
|
|
287
309
|
|
|
288
310
|
cursor = conn.cursor()
|
|
289
311
|
cursor.execute(sql)
|
|
@@ -301,7 +323,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
301
323
|
dt = parser.parse(value)
|
|
302
324
|
return int(dt.replace(tzinfo=pytz.UTC).timestamp())
|
|
303
325
|
else:
|
|
304
|
-
|
|
326
|
+
exception = TypeError(
|
|
327
|
+
f"unsupported column type for partition: {column_type}"
|
|
328
|
+
)
|
|
329
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
330
|
+
raise exception
|
|
305
331
|
|
|
306
332
|
# this function is only used in data source API for SQL server
|
|
307
333
|
def _to_external_value(self, value: Union[int, str, float], column_type: DataType):
|
|
@@ -311,7 +337,11 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
311
337
|
# TODO: SNOW-1909315: support timezone
|
|
312
338
|
return datetime.datetime.fromtimestamp(value, tz=pytz.UTC)
|
|
313
339
|
else:
|
|
314
|
-
|
|
340
|
+
exception = TypeError(
|
|
341
|
+
f"unsupported column type for partition: {column_type}"
|
|
342
|
+
)
|
|
343
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
344
|
+
raise exception
|
|
315
345
|
|
|
316
346
|
def _to_snowpark_type(self, schema: Tuple[tuple]) -> StructType:
|
|
317
347
|
fields = []
|
|
@@ -339,7 +369,9 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
339
369
|
case jaydebeapi.BINARY:
|
|
340
370
|
field = StructField(name, BinaryType(), is_nullable)
|
|
341
371
|
case _:
|
|
342
|
-
|
|
372
|
+
exception = ValueError(f"unsupported type: {dbapi_type}")
|
|
373
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
374
|
+
raise exception
|
|
343
375
|
|
|
344
376
|
fields.append(field)
|
|
345
377
|
return StructType(fields)
|
|
@@ -359,7 +391,9 @@ class JdbcDataFrameReader(DataFrameReader):
|
|
|
359
391
|
processed_lower_bound = self._to_internal_value(lower_bound, column_type)
|
|
360
392
|
processed_upper_bound = self._to_internal_value(upper_bound, column_type)
|
|
361
393
|
if processed_lower_bound > processed_upper_bound:
|
|
362
|
-
|
|
394
|
+
exception = ValueError("lower_bound cannot be greater than upper_bound")
|
|
395
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
396
|
+
raise exception
|
|
363
397
|
|
|
364
398
|
if processed_lower_bound == processed_upper_bound or num_partitions <= 1:
|
|
365
399
|
return [select_query]
|
|
@@ -665,4 +699,6 @@ def get_jdbc_dialect(url: str) -> JdbcDialect:
|
|
|
665
699
|
for jdbc_dialect in jdbc_dialects:
|
|
666
700
|
if jdbc_dialect.can_handle(url):
|
|
667
701
|
return jdbc_dialect
|
|
668
|
-
|
|
702
|
+
exception = ValueError(f"Unsupported JDBC datasource: {url}")
|
|
703
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
704
|
+
raise exception
|
|
@@ -15,6 +15,8 @@ from snowflake import snowpark
|
|
|
15
15
|
from snowflake.snowpark.types import StructType
|
|
16
16
|
from snowflake.snowpark_connect.config import global_config
|
|
17
17
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
18
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
19
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
18
20
|
from snowflake.snowpark_connect.relation.io_utils import (
|
|
19
21
|
convert_file_prefix_path,
|
|
20
22
|
get_compression_for_source_and_options,
|
|
@@ -159,12 +161,20 @@ def map_read(
|
|
|
159
161
|
options[DBTABLE_OPTION], session, rel.common.plan_id
|
|
160
162
|
)
|
|
161
163
|
case other:
|
|
162
|
-
|
|
164
|
+
exception = SnowparkConnectNotImplementedError(
|
|
163
165
|
f"UNSUPPORTED FORMAT {other} WITH NO PATH"
|
|
164
166
|
)
|
|
167
|
+
attach_custom_error_code(
|
|
168
|
+
exception, ErrorCodes.UNSUPPORTED_OPERATION
|
|
169
|
+
)
|
|
170
|
+
raise exception
|
|
165
171
|
case other:
|
|
166
172
|
# TODO: Empty data source
|
|
167
|
-
|
|
173
|
+
exception = SnowparkConnectNotImplementedError(
|
|
174
|
+
f"Unsupported read type: {other}"
|
|
175
|
+
)
|
|
176
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
177
|
+
raise exception
|
|
168
178
|
|
|
169
179
|
return df_cache_map_put_if_absent(
|
|
170
180
|
(get_session_id(), rel.common.plan_id),
|
|
@@ -274,9 +284,11 @@ def _read_file(
|
|
|
274
284
|
|
|
275
285
|
return map_read_text(rel, schema, session, paths)
|
|
276
286
|
case _:
|
|
277
|
-
|
|
287
|
+
exception = SnowparkConnectNotImplementedError(
|
|
278
288
|
f"Unsupported format: {read_format}"
|
|
279
289
|
)
|
|
290
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
291
|
+
raise exception
|
|
280
292
|
|
|
281
293
|
|
|
282
294
|
def _skip_upload(path: str, read_format: str):
|
|
@@ -12,6 +12,8 @@ from snowflake import snowpark
|
|
|
12
12
|
from snowflake.snowpark.dataframe_reader import DataFrameReader
|
|
13
13
|
from snowflake.snowpark.types import StringType, StructField, StructType
|
|
14
14
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
15
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
16
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
15
17
|
from snowflake.snowpark_connect.relation.read.map_read import CsvReaderConfig
|
|
16
18
|
from snowflake.snowpark_connect.relation.read.metadata_utils import (
|
|
17
19
|
add_filename_metadata_to_reader,
|
|
@@ -43,9 +45,11 @@ def map_read_csv(
|
|
|
43
45
|
|
|
44
46
|
if rel.read.is_streaming is True:
|
|
45
47
|
# TODO: Structured streaming implementation.
|
|
46
|
-
|
|
48
|
+
exception = SnowparkConnectNotImplementedError(
|
|
47
49
|
"Streaming is not supported for CSV files."
|
|
48
50
|
)
|
|
51
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
52
|
+
raise exception
|
|
49
53
|
else:
|
|
50
54
|
snowpark_options = options.convert_to_snowpark_args()
|
|
51
55
|
parse_header = snowpark_options.get("PARSE_HEADER", False)
|
|
@@ -188,14 +192,18 @@ def read_data(
|
|
|
188
192
|
|
|
189
193
|
if schema is not None:
|
|
190
194
|
if len(schema.fields) != len(non_metadata_fields):
|
|
191
|
-
|
|
195
|
+
exception = Exception(f"csv load from {filename} failed.")
|
|
196
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_CAST)
|
|
197
|
+
raise exception
|
|
192
198
|
if raw_options.get("enforceSchema", "True").lower() == "false":
|
|
193
199
|
for i in range(len(schema.fields)):
|
|
194
200
|
if (
|
|
195
201
|
schema.fields[i].name != non_metadata_fields[i].name
|
|
196
202
|
and f'"{schema.fields[i].name}"' != non_metadata_fields[i].name
|
|
197
203
|
):
|
|
198
|
-
|
|
204
|
+
exception = Exception("CSV header does not conform to the schema")
|
|
205
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
206
|
+
raise exception
|
|
199
207
|
return df
|
|
200
208
|
|
|
201
209
|
headers = get_header_names(
|
|
@@ -9,6 +9,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
|
9
9
|
from snowflake import snowpark
|
|
10
10
|
from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
|
|
11
11
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
12
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
13
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
12
14
|
from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDataFrameReader
|
|
13
15
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
14
16
|
Connection,
|
|
@@ -28,7 +30,9 @@ def create_connection(jdbc_options: dict[str, str]) -> Connection:
|
|
|
28
30
|
return jaydebeapi.connect(driver, url, jdbc_options)
|
|
29
31
|
except Exception as e:
|
|
30
32
|
jpype.detachThreadFromJVM()
|
|
31
|
-
|
|
33
|
+
exception = Exception(f"Error connecting JDBC datasource: {e}")
|
|
34
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
35
|
+
raise exception
|
|
32
36
|
|
|
33
37
|
|
|
34
38
|
def close_connection(conn: Connection) -> None:
|
|
@@ -70,17 +74,23 @@ def map_read_jdbc(
|
|
|
70
74
|
dbtable = None
|
|
71
75
|
|
|
72
76
|
if not dbtable and not query:
|
|
73
|
-
|
|
77
|
+
exception = ValueError("Include dbtable or query is required option")
|
|
78
|
+
attach_custom_error_code(exception, ErrorCodes.INSUFFICIENT_INPUT)
|
|
79
|
+
raise exception
|
|
74
80
|
|
|
75
81
|
if query is not None and dbtable is not None:
|
|
76
|
-
|
|
82
|
+
exception = ValueError(
|
|
77
83
|
"Not allowed to specify dbtable and query options at the same time"
|
|
78
84
|
)
|
|
85
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
86
|
+
raise exception
|
|
79
87
|
|
|
80
88
|
if query is not None and partition_column is not None:
|
|
81
|
-
|
|
89
|
+
exception = ValueError(
|
|
82
90
|
"Not allowed to specify partitionColumn and query options at the same time"
|
|
83
91
|
)
|
|
92
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
93
|
+
raise exception
|
|
84
94
|
|
|
85
95
|
try:
|
|
86
96
|
df = JdbcDataFrameReader(session, jdbc_options).jdbc_read_dbapi(
|
|
@@ -105,4 +115,6 @@ def map_read_jdbc(
|
|
|
105
115
|
snowpark_column_types=[f.datatype for f in df.schema.fields],
|
|
106
116
|
)
|
|
107
117
|
except Exception as e:
|
|
108
|
-
|
|
118
|
+
exception = Exception(f"Error accessing JDBC datasource for read: {e}")
|
|
119
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
120
|
+
raise exception
|
|
@@ -28,6 +28,8 @@ from snowflake.snowpark.types import (
|
|
|
28
28
|
TimestampType,
|
|
29
29
|
)
|
|
30
30
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
31
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
32
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
31
33
|
from snowflake.snowpark_connect.relation.read.map_read import JsonReaderConfig
|
|
32
34
|
from snowflake.snowpark_connect.relation.read.metadata_utils import (
|
|
33
35
|
add_filename_metadata_to_reader,
|
|
@@ -64,9 +66,11 @@ def map_read_json(
|
|
|
64
66
|
|
|
65
67
|
if rel.read.is_streaming is True:
|
|
66
68
|
# TODO: Structured streaming implementation.
|
|
67
|
-
|
|
69
|
+
exception = SnowparkConnectNotImplementedError(
|
|
68
70
|
"Streaming is not supported for JSON files."
|
|
69
71
|
)
|
|
72
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
73
|
+
raise exception
|
|
70
74
|
else:
|
|
71
75
|
snowpark_options = options.convert_to_snowpark_args()
|
|
72
76
|
raw_options = rel.read.data_source.options
|
|
@@ -363,9 +367,11 @@ def construct_row_by_schema(
|
|
|
363
367
|
content.get(col_name, None), sf.datatype, snowpark_options
|
|
364
368
|
)
|
|
365
369
|
else:
|
|
366
|
-
|
|
370
|
+
exception = SnowparkConnectNotImplementedError(
|
|
367
371
|
f"JSON construct {str(content)} to StructType failed"
|
|
368
372
|
)
|
|
373
|
+
attach_custom_error_code(exception, ErrorCodes.TYPE_MISMATCH)
|
|
374
|
+
raise exception
|
|
369
375
|
return result
|
|
370
376
|
elif isinstance(schema, ArrayType):
|
|
371
377
|
result = []
|
|
@@ -22,6 +22,8 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
|
|
|
22
22
|
from snowflake.snowpark.column import METADATA_FILENAME
|
|
23
23
|
from snowflake.snowpark.types import DataType, DoubleType, IntegerType, StringType
|
|
24
24
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
25
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
26
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
25
27
|
from snowflake.snowpark_connect.relation.read.metadata_utils import (
|
|
26
28
|
add_filename_metadata_to_reader,
|
|
27
29
|
)
|
|
@@ -44,9 +46,11 @@ def map_read_parquet(
|
|
|
44
46
|
"""Read a Parquet file into a Snowpark DataFrame."""
|
|
45
47
|
|
|
46
48
|
if rel.read.is_streaming is True:
|
|
47
|
-
|
|
49
|
+
exception = SnowparkConnectNotImplementedError(
|
|
48
50
|
"Streaming is not supported for Parquet files."
|
|
49
51
|
)
|
|
52
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
53
|
+
raise exception
|
|
50
54
|
|
|
51
55
|
snowpark_options = options.convert_to_snowpark_args()
|
|
52
56
|
raw_options = rel.read.data_source.options
|
|
@@ -155,10 +159,14 @@ def _discover_partition_columns(
|
|
|
155
159
|
if i not in dir_level_to_column_name:
|
|
156
160
|
dir_level_to_column_name[i] = key
|
|
157
161
|
elif dir_level_to_column_name[i] != key:
|
|
158
|
-
|
|
162
|
+
exception = ValueError(
|
|
159
163
|
f"Conflicting partition column names detected: '{dir_level_to_column_name[i]}' and '{key}' "
|
|
160
164
|
f"at the same directory level"
|
|
161
165
|
)
|
|
166
|
+
attach_custom_error_code(
|
|
167
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
168
|
+
)
|
|
169
|
+
raise exception
|
|
162
170
|
|
|
163
171
|
partition_columns_values[key].add(value)
|
|
164
172
|
|
|
@@ -166,10 +174,12 @@ def _discover_partition_columns(
|
|
|
166
174
|
for level in sorted(dir_level_to_column_name.keys()):
|
|
167
175
|
col_name = dir_level_to_column_name[level]
|
|
168
176
|
if col_name in seen_columns:
|
|
169
|
-
|
|
177
|
+
exception = ValueError(
|
|
170
178
|
f"Found partition column '{col_name}' at multiple directory levels. "
|
|
171
179
|
f"A partition column can only appear at a single level."
|
|
172
180
|
)
|
|
181
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
182
|
+
raise exception
|
|
173
183
|
seen_columns.add(col_name)
|
|
174
184
|
|
|
175
185
|
ordered_columns = [
|
|
@@ -9,6 +9,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
|
9
9
|
|
|
10
10
|
from snowflake import snowpark
|
|
11
11
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
12
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
13
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
12
14
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
13
15
|
SnowparkConnectNotImplementedError,
|
|
14
16
|
)
|
|
@@ -30,7 +32,9 @@ def map_read_socket(
|
|
|
30
32
|
host = options.get("host", None)
|
|
31
33
|
port = options.get("port", None)
|
|
32
34
|
if not host or not port:
|
|
33
|
-
|
|
35
|
+
exception = ValueError("Host and port must be provided in options.")
|
|
36
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
37
|
+
raise exception
|
|
34
38
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
35
39
|
try:
|
|
36
40
|
s.connect((host, int(port)))
|
|
@@ -56,8 +60,12 @@ def map_read_socket(
|
|
|
56
60
|
snowpark_column_names=[snowpark_cname],
|
|
57
61
|
)
|
|
58
62
|
except OSError as e:
|
|
59
|
-
|
|
63
|
+
exception = Exception(f"Error connecting to {host}:{port} - {e}")
|
|
64
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
65
|
+
raise exception
|
|
60
66
|
else:
|
|
61
|
-
|
|
67
|
+
exception = SnowparkConnectNotImplementedError(
|
|
62
68
|
"Socket reads are only supported in streaming mode."
|
|
63
69
|
)
|
|
70
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
71
|
+
raise exception
|
|
@@ -16,8 +16,11 @@ from snowflake.snowpark_connect.column_name_handler import (
|
|
|
16
16
|
ColumnNameMap,
|
|
17
17
|
make_column_names_snowpark_compatible,
|
|
18
18
|
)
|
|
19
|
+
from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
|
|
19
20
|
from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
|
|
20
21
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
22
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
23
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
21
24
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
22
25
|
rename_columns_as_snowflake_standard,
|
|
23
26
|
)
|
|
@@ -56,7 +59,7 @@ def post_process_df(
|
|
|
56
59
|
spark_column_names=true_names,
|
|
57
60
|
snowpark_column_names=snowpark_column_names,
|
|
58
61
|
snowpark_column_types=[f.datatype for f in df.schema.fields],
|
|
59
|
-
column_qualifiers=[name_parts
|
|
62
|
+
column_qualifiers=[{ColumnQualifier(tuple(name_parts))} for _ in true_names]
|
|
60
63
|
if source_table_name
|
|
61
64
|
else None,
|
|
62
65
|
)
|
|
@@ -64,9 +67,11 @@ def post_process_df(
|
|
|
64
67
|
# Check if this is a table/view not found error
|
|
65
68
|
# Snowflake error codes: 002003 (42S02) - Object does not exist or not authorized
|
|
66
69
|
if hasattr(e, "sql_error_code") and e.sql_error_code == 2003:
|
|
67
|
-
|
|
70
|
+
exception = AnalysisException(
|
|
68
71
|
f"[TABLE_OR_VIEW_NOT_FOUND] The table or view cannot be found. {source_table_name}"
|
|
69
|
-
)
|
|
72
|
+
)
|
|
73
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
74
|
+
raise exception from None # Suppress original exception to reduce message size
|
|
70
75
|
# Re-raise if it's not a table not found error
|
|
71
76
|
raise
|
|
72
77
|
|
|
@@ -90,8 +95,10 @@ def _get_temporary_view(
|
|
|
90
95
|
spark_column_names=temp_view.column_map.get_spark_columns(),
|
|
91
96
|
snowpark_column_names=snowpark_column_names,
|
|
92
97
|
column_metadata=temp_view.column_map.column_metadata,
|
|
93
|
-
column_qualifiers=[
|
|
94
|
-
|
|
98
|
+
column_qualifiers=[
|
|
99
|
+
{ColumnQualifier(tuple(split_fully_qualified_spark_name(table_name)))}
|
|
100
|
+
for _ in range(len(temp_view.column_map.get_spark_columns()))
|
|
101
|
+
],
|
|
95
102
|
parent_column_name_map=temp_view.column_map.get_parent_column_name_map(),
|
|
96
103
|
)
|
|
97
104
|
|
|
@@ -118,9 +125,11 @@ def get_table_from_name(
|
|
|
118
125
|
|
|
119
126
|
# Verify if recursive view read is not attempted
|
|
120
127
|
if table_name in get_processed_views():
|
|
121
|
-
|
|
128
|
+
exception = AnalysisException(
|
|
122
129
|
f"[RECURSIVE_VIEW] Recursive view `{table_name}` detected (cycle: `{table_name}` -> `{table_name}`)"
|
|
123
130
|
)
|
|
131
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
132
|
+
raise exception
|
|
124
133
|
|
|
125
134
|
snowpark_name = ".".join(
|
|
126
135
|
quote_name_without_upper_casing(part)
|
|
@@ -159,10 +168,14 @@ def map_read_table(
|
|
|
159
168
|
and rel.read.data_source.format.lower() == "iceberg"
|
|
160
169
|
):
|
|
161
170
|
if len(rel.read.data_source.paths) != 1:
|
|
162
|
-
|
|
171
|
+
exception = SnowparkConnectNotImplementedError(
|
|
163
172
|
f"Unexpected paths: {rel.read.data_source.paths}"
|
|
164
173
|
)
|
|
174
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
175
|
+
raise exception
|
|
165
176
|
table_identifier = rel.read.data_source.paths[0]
|
|
166
177
|
else:
|
|
167
|
-
|
|
178
|
+
exception = ValueError("The relation must have a table identifier.")
|
|
179
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
180
|
+
raise exception
|
|
168
181
|
return get_table_from_name(table_identifier, session, rel.common.plan_id)
|
|
@@ -8,6 +8,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
|
|
|
8
8
|
|
|
9
9
|
from snowflake import snowpark
|
|
10
10
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
11
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
12
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
11
13
|
from snowflake.snowpark_connect.relation.read.utils import (
|
|
12
14
|
get_spark_column_names_from_snowpark_columns,
|
|
13
15
|
rename_columns_as_snowflake_standard,
|
|
@@ -82,9 +84,11 @@ def map_read_text(
|
|
|
82
84
|
"""
|
|
83
85
|
if rel.read.is_streaming is True:
|
|
84
86
|
# TODO: Structured streaming implementation.
|
|
85
|
-
|
|
87
|
+
exception = SnowparkConnectNotImplementedError(
|
|
86
88
|
"Streaming is not supported for CSV files."
|
|
87
89
|
)
|
|
90
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
91
|
+
raise exception
|
|
88
92
|
|
|
89
93
|
df = read_text(paths[0], schema, session, rel.read.data_source.options)
|
|
90
94
|
if len(paths) > 1:
|
|
@@ -16,6 +16,8 @@ from snowflake.snowpark.column import METADATA_FILENAME
|
|
|
16
16
|
from snowflake.snowpark.functions import col
|
|
17
17
|
from snowflake.snowpark.types import StructField
|
|
18
18
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
19
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
20
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
19
21
|
|
|
20
22
|
# Constant for the metadata filename column name
|
|
21
23
|
METADATA_FILENAME_COLUMN = "METADATA$FILENAME"
|
|
@@ -129,9 +131,11 @@ def filter_metadata_columns(
|
|
|
129
131
|
if len(non_metadata_columns) == 0:
|
|
130
132
|
# DataFrame contains only metadata columns (METADATA$FILENAME), no actual data columns remaining.
|
|
131
133
|
# We don't have a way to return an empty dataframe.
|
|
132
|
-
|
|
134
|
+
exception = AnalysisException(
|
|
133
135
|
"[DATAFRAME_MISSING_DATA_COLUMNS] Cannot perform operation on DataFrame that contains no data columns."
|
|
134
136
|
)
|
|
137
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
|
|
138
|
+
raise exception
|
|
135
139
|
|
|
136
140
|
filtered_df = result_df.select([col(name) for name in non_metadata_columns])
|
|
137
141
|
|
|
@@ -11,6 +11,8 @@ from s3fs.core import S3FileSystem
|
|
|
11
11
|
from snowflake import snowpark
|
|
12
12
|
from snowflake.snowpark.session import Session
|
|
13
13
|
from snowflake.snowpark_connect.config import sessions_config
|
|
14
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
15
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
16
|
from snowflake.snowpark_connect.relation.io_utils import (
|
|
15
17
|
get_cloud_from_url,
|
|
16
18
|
parse_azure_url,
|
|
@@ -42,9 +44,11 @@ def get_paths_from_stage(
|
|
|
42
44
|
rewrite_paths.append(f"{stage_name}/{path}")
|
|
43
45
|
paths = rewrite_paths
|
|
44
46
|
case "gcp":
|
|
45
|
-
|
|
47
|
+
exception = AnalysisException(
|
|
46
48
|
"You must configure an integration for Google Cloud Storage to perform I/O operations rather than accessing the URL directly. Reference: https://docs.snowflake.com/en/user-guide/data-load-gcs-config"
|
|
47
49
|
)
|
|
50
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
|
|
51
|
+
raise exception
|
|
48
52
|
case _:
|
|
49
53
|
filesystem, parsed_path = url_to_fs(paths[0])
|
|
50
54
|
if isinstance(filesystem, S3FileSystem): # aws
|
|
@@ -11,6 +11,8 @@ from snowflake import snowpark
|
|
|
11
11
|
from snowflake.snowpark import DataFrameWriter
|
|
12
12
|
from snowflake.snowpark.dataframe import DataFrame
|
|
13
13
|
from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
|
|
14
|
+
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
15
|
+
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
14
16
|
from snowflake.snowpark_connect.relation.read import jdbc_read_dbapi
|
|
15
17
|
from snowflake.snowpark_connect.relation.read.jdbc_read_dbapi import JdbcDialect
|
|
16
18
|
from snowflake.snowpark_connect.relation.read.utils import Connection
|
|
@@ -65,9 +67,13 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
65
67
|
self._create_table(conn, table, container, jdbc_dialect)
|
|
66
68
|
case "errorifexists":
|
|
67
69
|
if table_exist:
|
|
68
|
-
|
|
70
|
+
exception = ValueError(
|
|
69
71
|
"table is already exist and write mode is ERROR_IF_EXISTS"
|
|
70
72
|
)
|
|
73
|
+
attach_custom_error_code(
|
|
74
|
+
exception, ErrorCodes.INVALID_OPERATION
|
|
75
|
+
)
|
|
76
|
+
raise exception
|
|
71
77
|
else:
|
|
72
78
|
self._create_table(conn, table, container, jdbc_dialect)
|
|
73
79
|
case "overwrite":
|
|
@@ -82,7 +88,9 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
82
88
|
else:
|
|
83
89
|
self._create_table(conn, table, container, jdbc_dialect)
|
|
84
90
|
case _:
|
|
85
|
-
|
|
91
|
+
exception = ValueError(f"Invalid write mode value{write_mode}")
|
|
92
|
+
attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
|
|
93
|
+
raise exception
|
|
86
94
|
|
|
87
95
|
task_insert_into_data_source_with_retry(
|
|
88
96
|
input_df,
|
|
@@ -141,6 +149,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
141
149
|
cursor.execute(sql)
|
|
142
150
|
except Exception as e:
|
|
143
151
|
logger.error(f"failed to drop table {table} from the data source {e}")
|
|
152
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
144
153
|
raise e
|
|
145
154
|
|
|
146
155
|
def _create_table(
|
|
@@ -189,6 +198,7 @@ class JdbcDataFrameWriter(DataFrameWriter):
|
|
|
189
198
|
cursor.execute(sql)
|
|
190
199
|
except Exception as e:
|
|
191
200
|
logger.error(f"failed to create a table {table} from the data source {e}")
|
|
201
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
192
202
|
raise e
|
|
193
203
|
|
|
194
204
|
|
|
@@ -218,6 +228,7 @@ def _task_insert_into_data_source(
|
|
|
218
228
|
except Exception as e:
|
|
219
229
|
logger.debug(f"failed to insert into data source {e}")
|
|
220
230
|
conn.rollback()
|
|
231
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
221
232
|
raise e
|
|
222
233
|
finally:
|
|
223
234
|
cursor.close()
|
|
@@ -274,6 +285,7 @@ def task_insert_into_data_source_with_retry(
|
|
|
274
285
|
)
|
|
275
286
|
except Exception as e:
|
|
276
287
|
logger.debug(f"failed to insert into data source {e}")
|
|
288
|
+
attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
|
|
277
289
|
raise e
|
|
278
290
|
finally:
|
|
279
291
|
close_connection(conn)
|
|
@@ -339,4 +351,8 @@ def convert_sp_to_sql_type(
|
|
|
339
351
|
case _:
|
|
340
352
|
return "TIMESTAMP"
|
|
341
353
|
case _:
|
|
342
|
-
|
|
354
|
+
exception = TypeError(
|
|
355
|
+
f"Unsupported data type: {datatype.__class__.__name__}"
|
|
356
|
+
)
|
|
357
|
+
attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
|
|
358
|
+
raise exception
|