snowpark-connect 0.31.0__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/__init__.py +1 -0
- snowflake/snowpark_connect/column_name_handler.py +143 -105
- snowflake/snowpark_connect/column_qualifier.py +43 -0
- snowflake/snowpark_connect/dataframe_container.py +3 -2
- snowflake/snowpark_connect/execute_plan/map_execution_command.py +4 -2
- snowflake/snowpark_connect/expression/hybrid_column_map.py +5 -4
- snowflake/snowpark_connect/expression/map_expression.py +5 -4
- snowflake/snowpark_connect/expression/map_extension.py +12 -6
- snowflake/snowpark_connect/expression/map_sql_expression.py +50 -7
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +62 -25
- snowflake/snowpark_connect/expression/map_unresolved_function.py +924 -127
- snowflake/snowpark_connect/expression/map_unresolved_star.py +9 -7
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +16 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +1281 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +203 -0
- snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +202 -0
- snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +4 -1
- snowflake/snowpark_connect/relation/map_aggregate.py +6 -5
- snowflake/snowpark_connect/relation/map_column_ops.py +9 -3
- snowflake/snowpark_connect/relation/map_extension.py +10 -9
- snowflake/snowpark_connect/relation/map_join.py +219 -144
- snowflake/snowpark_connect/relation/map_row_ops.py +136 -54
- snowflake/snowpark_connect/relation/map_sql.py +134 -16
- snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
- snowflake/snowpark_connect/relation/read/map_read_json.py +87 -2
- snowflake/snowpark_connect/relation/read/map_read_table.py +6 -3
- snowflake/snowpark_connect/relation/utils.py +46 -0
- snowflake/snowpark_connect/relation/write/map_write.py +215 -289
- snowflake/snowpark_connect/resources_initializer.py +25 -13
- snowflake/snowpark_connect/server.py +10 -26
- snowflake/snowpark_connect/type_mapping.py +38 -3
- snowflake/snowpark_connect/typed_column.py +8 -6
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +27 -4
- snowflake/snowpark_connect/version.py +1 -1
- snowflake/snowpark_decoder/dp_session.py +1 -1
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/METADATA +7 -2
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/RECORD +46 -105
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
- snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
- {snowpark_connect-0.31.0.data → snowpark_connect-0.33.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.31.0.data → snowpark_connect-0.33.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.31.0.data → snowpark_connect-0.33.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/top_level.txt +0 -0
|
@@ -260,12 +260,47 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
260
260
|
class_name = str(exp.getClass().getSimpleName())
|
|
261
261
|
match class_name:
|
|
262
262
|
case "AggregateExpression":
|
|
263
|
-
|
|
263
|
+
aggregate_func = as_java_list(exp.children())[0]
|
|
264
|
+
func_name = aggregate_func.nodeName()
|
|
264
265
|
args = [
|
|
265
266
|
map_logical_plan_expression(e)
|
|
266
|
-
for e in list(as_java_list(
|
|
267
|
+
for e in list(as_java_list(aggregate_func.children()))
|
|
267
268
|
]
|
|
268
|
-
|
|
269
|
+
|
|
270
|
+
# Special handling for percentile_cont and percentile_disc
|
|
271
|
+
# These functions have a 'reverse' property that indicates sort order
|
|
272
|
+
# Pass it as a 3rd argument (sort_order expression) without modifying children
|
|
273
|
+
if func_name.lower() in ("percentile_cont", "percentiledisc"):
|
|
274
|
+
# percentile_cont/disc should always have exactly 2 children: unresolved attribute and percentile value
|
|
275
|
+
if len(args) != 2:
|
|
276
|
+
exception = AssertionError(
|
|
277
|
+
f"{func_name} expected 2 args but got {len(args)}"
|
|
278
|
+
)
|
|
279
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
280
|
+
raise exception
|
|
281
|
+
|
|
282
|
+
reverse = bool(aggregate_func.reverse())
|
|
283
|
+
|
|
284
|
+
direction = (
|
|
285
|
+
expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING
|
|
286
|
+
if reverse
|
|
287
|
+
else expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
sort_order_expr = expressions_proto.Expression(
|
|
291
|
+
sort_order=expressions_proto.Expression.SortOrder(
|
|
292
|
+
child=args[0],
|
|
293
|
+
direction=direction,
|
|
294
|
+
)
|
|
295
|
+
)
|
|
296
|
+
args.append(sort_order_expr)
|
|
297
|
+
proto = apply_filter_clause(func_name, [args[0]], exp)
|
|
298
|
+
# second arg is a literal value and it doesn't make sense to apply filter on it.
|
|
299
|
+
# also skips filtering on sort_order.
|
|
300
|
+
proto.unresolved_function.arguments.append(args[1])
|
|
301
|
+
proto.unresolved_function.arguments.append(sort_order_expr)
|
|
302
|
+
else:
|
|
303
|
+
proto = apply_filter_clause(func_name, args, exp)
|
|
269
304
|
case "Alias":
|
|
270
305
|
proto = expressions_proto.Expression(
|
|
271
306
|
alias=expressions_proto.Expression.Alias(
|
|
@@ -383,13 +418,21 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
383
418
|
)
|
|
384
419
|
)
|
|
385
420
|
case "Like" | "ILike" | "RLike":
|
|
421
|
+
arguments = [
|
|
422
|
+
map_logical_plan_expression(e)
|
|
423
|
+
for e in list(as_java_list(exp.children()))
|
|
424
|
+
]
|
|
425
|
+
# exp.escapeChar() returns a JPype JChar - convert to string and create a literal
|
|
426
|
+
if getattr(exp, "escapeChar", None) is not None:
|
|
427
|
+
escape_char_str = str(exp.escapeChar())
|
|
428
|
+
escape_literal = expressions_proto.Expression(
|
|
429
|
+
literal=expressions_proto.Expression.Literal(string=escape_char_str)
|
|
430
|
+
)
|
|
431
|
+
arguments.append(escape_literal)
|
|
386
432
|
proto = expressions_proto.Expression(
|
|
387
433
|
unresolved_function=expressions_proto.Expression.UnresolvedFunction(
|
|
388
434
|
function_name=class_name.lower(),
|
|
389
|
-
arguments=
|
|
390
|
-
map_logical_plan_expression(e)
|
|
391
|
-
for e in list(as_java_list(exp.children()))
|
|
392
|
-
],
|
|
435
|
+
arguments=arguments,
|
|
393
436
|
)
|
|
394
437
|
)
|
|
395
438
|
case "LikeAny" | "NotLikeAny" | "LikeAll" | "NotLikeAll":
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
|
+
from typing import Any
|
|
6
7
|
|
|
7
8
|
import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
8
9
|
from pyspark.errors.exceptions.connect import AnalysisException
|
|
@@ -247,7 +248,7 @@ def map_unresolved_attribute(
|
|
|
247
248
|
)
|
|
248
249
|
)
|
|
249
250
|
col = get_col(snowpark_name)
|
|
250
|
-
qualifiers = column_mapping.
|
|
251
|
+
qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
|
|
251
252
|
typed_col = TypedColumn(col, lambda: typer.type(col))
|
|
252
253
|
typed_col.set_qualifiers(qualifiers)
|
|
253
254
|
# Store matched columns info for later use
|
|
@@ -262,7 +263,7 @@ def map_unresolved_attribute(
|
|
|
262
263
|
)
|
|
263
264
|
)
|
|
264
265
|
col = get_col(snowpark_name)
|
|
265
|
-
qualifiers = column_mapping.
|
|
266
|
+
qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
|
|
266
267
|
typed_col = TypedColumn(col, lambda: typer.type(col))
|
|
267
268
|
typed_col.set_qualifiers(qualifiers)
|
|
268
269
|
return (matched_columns[0], typed_col)
|
|
@@ -275,12 +276,33 @@ def map_unresolved_attribute(
|
|
|
275
276
|
else:
|
|
276
277
|
quoted_attr_name = name_parts[0]
|
|
277
278
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
279
|
+
# Helper function to try finding a column in current and outer scopes
|
|
280
|
+
def try_resolve_column(column_name: str) -> tuple[str | None, Any]:
|
|
281
|
+
# Try current scope
|
|
282
|
+
snowpark_name = column_mapping.get_snowpark_column_name_from_spark_column_name(
|
|
283
|
+
column_name, allow_non_exists=True
|
|
284
|
+
)
|
|
285
|
+
if snowpark_name is not None:
|
|
286
|
+
return snowpark_name, column_mapping
|
|
287
|
+
|
|
288
|
+
# Try outer scopes
|
|
289
|
+
for outer_df in get_outer_dataframes():
|
|
290
|
+
snowpark_name = (
|
|
291
|
+
outer_df.column_map.get_snowpark_column_name_from_spark_column_name(
|
|
292
|
+
column_name, allow_non_exists=True
|
|
293
|
+
)
|
|
294
|
+
)
|
|
295
|
+
if snowpark_name is not None:
|
|
296
|
+
return snowpark_name, outer_df.column_map
|
|
297
|
+
|
|
298
|
+
return None, None
|
|
299
|
+
|
|
300
|
+
# Try to resolve the full qualified name first
|
|
301
|
+
snowpark_name, found_column_map = try_resolve_column(quoted_attr_name)
|
|
302
|
+
|
|
281
303
|
if snowpark_name is not None:
|
|
282
304
|
col = get_col(snowpark_name)
|
|
283
|
-
qualifiers =
|
|
305
|
+
qualifiers = found_column_map.get_qualifiers_for_spark_column(quoted_attr_name)
|
|
284
306
|
else:
|
|
285
307
|
# this means it has to be a struct column with a field name
|
|
286
308
|
snowpark_name: str | None = None
|
|
@@ -295,28 +317,43 @@ def map_unresolved_attribute(
|
|
|
295
317
|
# For qualified names like "table.column.field", we need to find the column part
|
|
296
318
|
for i in range(len(name_parts)):
|
|
297
319
|
candidate_column = name_parts[i]
|
|
298
|
-
snowpark_name = (
|
|
299
|
-
|
|
300
|
-
candidate_column, allow_non_exists=True
|
|
301
|
-
)
|
|
302
|
-
)
|
|
320
|
+
snowpark_name, found_column_map = try_resolve_column(candidate_column)
|
|
321
|
+
|
|
303
322
|
if snowpark_name is not None:
|
|
304
323
|
column_part_index = i
|
|
305
324
|
break
|
|
306
325
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
326
|
+
# Validate qualifier scope: if we found a column but skipped prefix parts,
|
|
327
|
+
# those prefix parts could be valid qualifiers for the column
|
|
328
|
+
# We have prefix parts like 'nt1' in 'nt1.k' that were skipped
|
|
329
|
+
maybe_qualified = column_part_index > 0
|
|
330
|
+
if (
|
|
331
|
+
snowpark_name is not None
|
|
332
|
+
and maybe_qualified
|
|
333
|
+
and found_column_map is not None
|
|
334
|
+
):
|
|
335
|
+
prefix_parts = name_parts[:column_part_index]
|
|
336
|
+
found_col_qualifiers = found_column_map.get_qualifiers_for_spark_column(
|
|
337
|
+
candidate_column
|
|
338
|
+
)
|
|
317
339
|
|
|
318
|
-
if
|
|
319
|
-
|
|
340
|
+
# Check if any qualifier matches the prefix
|
|
341
|
+
has_matching_qualifier = False
|
|
342
|
+
for qual in found_col_qualifiers:
|
|
343
|
+
if not qual.is_empty and len(qual.parts) >= len(prefix_parts):
|
|
344
|
+
if qual.parts[-len(prefix_parts) :] == tuple(prefix_parts):
|
|
345
|
+
has_matching_qualifier = True
|
|
346
|
+
break
|
|
347
|
+
|
|
348
|
+
# If no matching qualifier, it's a scope violation
|
|
349
|
+
if not has_matching_qualifier:
|
|
350
|
+
# The prefix is not a valid qualifier for this column - scope violation!
|
|
351
|
+
exception = AnalysisException(
|
|
352
|
+
f'[UNRESOLVED_COLUMN] Column "{attr_name}" cannot be resolved. '
|
|
353
|
+
f'The table or alias "{".".join(prefix_parts)}" is not in scope or does not exist.'
|
|
354
|
+
)
|
|
355
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
356
|
+
raise exception
|
|
320
357
|
|
|
321
358
|
if snowpark_name is None:
|
|
322
359
|
# Attempt LCA fallback.
|
|
@@ -338,7 +375,7 @@ def map_unresolved_attribute(
|
|
|
338
375
|
)
|
|
339
376
|
if snowpark_name is not None:
|
|
340
377
|
col = get_col(snowpark_name)
|
|
341
|
-
qualifiers = column_mapping.
|
|
378
|
+
qualifiers = column_mapping.get_qualifiers_for_spark_column(
|
|
342
379
|
unqualified_name
|
|
343
380
|
)
|
|
344
381
|
typed_col = TypedColumn(col, lambda: typer.type(col))
|
|
@@ -405,7 +442,7 @@ def map_unresolved_attribute(
|
|
|
405
442
|
for field_name in path:
|
|
406
443
|
col = col.getItem(field_name)
|
|
407
444
|
|
|
408
|
-
qualifiers =
|
|
445
|
+
qualifiers = set()
|
|
409
446
|
|
|
410
447
|
typed_col = TypedColumn(col, lambda: typer.type(col))
|
|
411
448
|
typed_col.set_qualifiers(qualifiers)
|