snowpark-connect 0.31.0__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (111) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/column_name_handler.py +143 -105
  3. snowflake/snowpark_connect/column_qualifier.py +43 -0
  4. snowflake/snowpark_connect/dataframe_container.py +3 -2
  5. snowflake/snowpark_connect/execute_plan/map_execution_command.py +4 -2
  6. snowflake/snowpark_connect/expression/hybrid_column_map.py +5 -4
  7. snowflake/snowpark_connect/expression/map_expression.py +5 -4
  8. snowflake/snowpark_connect/expression/map_extension.py +12 -6
  9. snowflake/snowpark_connect/expression/map_sql_expression.py +50 -7
  10. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +62 -25
  11. snowflake/snowpark_connect/expression/map_unresolved_function.py +924 -127
  12. snowflake/snowpark_connect/expression/map_unresolved_star.py +9 -7
  13. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/__init__.py +16 -0
  14. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/accessors.py +1281 -0
  15. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/functions.py +203 -0
  16. snowflake/snowpark_connect/includes/python/pyspark/pandas/spark/utils.py +202 -0
  17. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +4 -1
  18. snowflake/snowpark_connect/relation/map_aggregate.py +6 -5
  19. snowflake/snowpark_connect/relation/map_column_ops.py +9 -3
  20. snowflake/snowpark_connect/relation/map_extension.py +10 -9
  21. snowflake/snowpark_connect/relation/map_join.py +219 -144
  22. snowflake/snowpark_connect/relation/map_row_ops.py +136 -54
  23. snowflake/snowpark_connect/relation/map_sql.py +134 -16
  24. snowflake/snowpark_connect/relation/map_subquery_alias.py +4 -1
  25. snowflake/snowpark_connect/relation/read/map_read_json.py +87 -2
  26. snowflake/snowpark_connect/relation/read/map_read_table.py +6 -3
  27. snowflake/snowpark_connect/relation/utils.py +46 -0
  28. snowflake/snowpark_connect/relation/write/map_write.py +215 -289
  29. snowflake/snowpark_connect/resources_initializer.py +25 -13
  30. snowflake/snowpark_connect/server.py +10 -26
  31. snowflake/snowpark_connect/type_mapping.py +38 -3
  32. snowflake/snowpark_connect/typed_column.py +8 -6
  33. snowflake/snowpark_connect/utils/sequence.py +21 -0
  34. snowflake/snowpark_connect/utils/session.py +27 -4
  35. snowflake/snowpark_connect/version.py +1 -1
  36. snowflake/snowpark_decoder/dp_session.py +1 -1
  37. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/METADATA +7 -2
  38. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/RECORD +46 -105
  39. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  99. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  100. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  101. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  102. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2_grpc.py +0 -4
  103. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2_grpc.py +0 -4
  104. {snowpark_connect-0.31.0.data → snowpark_connect-0.33.0.data}/scripts/snowpark-connect +0 -0
  105. {snowpark_connect-0.31.0.data → snowpark_connect-0.33.0.data}/scripts/snowpark-session +0 -0
  106. {snowpark_connect-0.31.0.data → snowpark_connect-0.33.0.data}/scripts/snowpark-submit +0 -0
  107. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/WHEEL +0 -0
  108. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/licenses/LICENSE-binary +0 -0
  109. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/licenses/LICENSE.txt +0 -0
  110. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/licenses/NOTICE-binary +0 -0
  111. {snowpark_connect-0.31.0.dist-info → snowpark_connect-0.33.0.dist-info}/top_level.txt +0 -0
@@ -260,12 +260,47 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
260
260
  class_name = str(exp.getClass().getSimpleName())
261
261
  match class_name:
262
262
  case "AggregateExpression":
263
- func_name = as_java_list(exp.children())[0].nodeName()
263
+ aggregate_func = as_java_list(exp.children())[0]
264
+ func_name = aggregate_func.nodeName()
264
265
  args = [
265
266
  map_logical_plan_expression(e)
266
- for e in list(as_java_list(as_java_list(exp.children())[0].children()))
267
+ for e in list(as_java_list(aggregate_func.children()))
267
268
  ]
268
- proto = apply_filter_clause(func_name, args, exp)
269
+
270
+ # Special handling for percentile_cont and percentile_disc
271
+ # These functions have a 'reverse' property that indicates sort order
272
+ # Pass it as a 3rd argument (sort_order expression) without modifying children
273
+ if func_name.lower() in ("percentile_cont", "percentiledisc"):
274
+ # percentile_cont/disc should always have exactly 2 children: unresolved attribute and percentile value
275
+ if len(args) != 2:
276
+ exception = AssertionError(
277
+ f"{func_name} expected 2 args but got {len(args)}"
278
+ )
279
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
280
+ raise exception
281
+
282
+ reverse = bool(aggregate_func.reverse())
283
+
284
+ direction = (
285
+ expressions_proto.Expression.SortOrder.SORT_DIRECTION_DESCENDING
286
+ if reverse
287
+ else expressions_proto.Expression.SortOrder.SORT_DIRECTION_ASCENDING
288
+ )
289
+
290
+ sort_order_expr = expressions_proto.Expression(
291
+ sort_order=expressions_proto.Expression.SortOrder(
292
+ child=args[0],
293
+ direction=direction,
294
+ )
295
+ )
296
+ args.append(sort_order_expr)
297
+ proto = apply_filter_clause(func_name, [args[0]], exp)
298
+ # second arg is a literal value and it doesn't make sense to apply filter on it.
299
+ # also skips filtering on sort_order.
300
+ proto.unresolved_function.arguments.append(args[1])
301
+ proto.unresolved_function.arguments.append(sort_order_expr)
302
+ else:
303
+ proto = apply_filter_clause(func_name, args, exp)
269
304
  case "Alias":
270
305
  proto = expressions_proto.Expression(
271
306
  alias=expressions_proto.Expression.Alias(
@@ -383,13 +418,21 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
383
418
  )
384
419
  )
385
420
  case "Like" | "ILike" | "RLike":
421
+ arguments = [
422
+ map_logical_plan_expression(e)
423
+ for e in list(as_java_list(exp.children()))
424
+ ]
425
+ # exp.escapeChar() returns a JPype JChar - convert to string and create a literal
426
+ if getattr(exp, "escapeChar", None) is not None:
427
+ escape_char_str = str(exp.escapeChar())
428
+ escape_literal = expressions_proto.Expression(
429
+ literal=expressions_proto.Expression.Literal(string=escape_char_str)
430
+ )
431
+ arguments.append(escape_literal)
386
432
  proto = expressions_proto.Expression(
387
433
  unresolved_function=expressions_proto.Expression.UnresolvedFunction(
388
434
  function_name=class_name.lower(),
389
- arguments=[
390
- map_logical_plan_expression(e)
391
- for e in list(as_java_list(exp.children()))
392
- ],
435
+ arguments=arguments,
393
436
  )
394
437
  )
395
438
  case "LikeAny" | "NotLikeAny" | "LikeAll" | "NotLikeAll":
@@ -3,6 +3,7 @@
3
3
  #
4
4
 
5
5
  import re
6
+ from typing import Any
6
7
 
7
8
  import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
8
9
  from pyspark.errors.exceptions.connect import AnalysisException
@@ -247,7 +248,7 @@ def map_unresolved_attribute(
247
248
  )
248
249
  )
249
250
  col = get_col(snowpark_name)
250
- qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_col_name)
251
+ qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
251
252
  typed_col = TypedColumn(col, lambda: typer.type(col))
252
253
  typed_col.set_qualifiers(qualifiers)
253
254
  # Store matched columns info for later use
@@ -262,7 +263,7 @@ def map_unresolved_attribute(
262
263
  )
263
264
  )
264
265
  col = get_col(snowpark_name)
265
- qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_col_name)
266
+ qualifiers = column_mapping.get_qualifiers_for_spark_column(quoted_col_name)
266
267
  typed_col = TypedColumn(col, lambda: typer.type(col))
267
268
  typed_col.set_qualifiers(qualifiers)
268
269
  return (matched_columns[0], typed_col)
@@ -275,12 +276,33 @@ def map_unresolved_attribute(
275
276
  else:
276
277
  quoted_attr_name = name_parts[0]
277
278
 
278
- snowpark_name = column_mapping.get_snowpark_column_name_from_spark_column_name(
279
- quoted_attr_name, allow_non_exists=True
280
- )
279
+ # Helper function to try finding a column in current and outer scopes
280
+ def try_resolve_column(column_name: str) -> tuple[str | None, Any]:
281
+ # Try current scope
282
+ snowpark_name = column_mapping.get_snowpark_column_name_from_spark_column_name(
283
+ column_name, allow_non_exists=True
284
+ )
285
+ if snowpark_name is not None:
286
+ return snowpark_name, column_mapping
287
+
288
+ # Try outer scopes
289
+ for outer_df in get_outer_dataframes():
290
+ snowpark_name = (
291
+ outer_df.column_map.get_snowpark_column_name_from_spark_column_name(
292
+ column_name, allow_non_exists=True
293
+ )
294
+ )
295
+ if snowpark_name is not None:
296
+ return snowpark_name, outer_df.column_map
297
+
298
+ return None, None
299
+
300
+ # Try to resolve the full qualified name first
301
+ snowpark_name, found_column_map = try_resolve_column(quoted_attr_name)
302
+
281
303
  if snowpark_name is not None:
282
304
  col = get_col(snowpark_name)
283
- qualifiers = column_mapping.get_qualifier_for_spark_column(quoted_attr_name)
305
+ qualifiers = found_column_map.get_qualifiers_for_spark_column(quoted_attr_name)
284
306
  else:
285
307
  # this means it has to be a struct column with a field name
286
308
  snowpark_name: str | None = None
@@ -295,28 +317,43 @@ def map_unresolved_attribute(
295
317
  # For qualified names like "table.column.field", we need to find the column part
296
318
  for i in range(len(name_parts)):
297
319
  candidate_column = name_parts[i]
298
- snowpark_name = (
299
- column_mapping.get_snowpark_column_name_from_spark_column_name(
300
- candidate_column, allow_non_exists=True
301
- )
302
- )
320
+ snowpark_name, found_column_map = try_resolve_column(candidate_column)
321
+
303
322
  if snowpark_name is not None:
304
323
  column_part_index = i
305
324
  break
306
325
 
307
- # Also try in outer dataframes
308
- for outer_df in get_outer_dataframes():
309
- snowpark_name = (
310
- outer_df.column_map.get_snowpark_column_name_from_spark_column_name(
311
- candidate_column, allow_non_exists=True
312
- )
313
- )
314
- if snowpark_name is not None:
315
- column_part_index = i
316
- break
326
+ # Validate qualifier scope: if we found a column but skipped prefix parts,
327
+ # those prefix parts could be valid qualifiers for the column
328
+ # We have prefix parts like 'nt1' in 'nt1.k' that were skipped
329
+ maybe_qualified = column_part_index > 0
330
+ if (
331
+ snowpark_name is not None
332
+ and maybe_qualified
333
+ and found_column_map is not None
334
+ ):
335
+ prefix_parts = name_parts[:column_part_index]
336
+ found_col_qualifiers = found_column_map.get_qualifiers_for_spark_column(
337
+ candidate_column
338
+ )
317
339
 
318
- if snowpark_name is not None:
319
- break
340
+ # Check if any qualifier matches the prefix
341
+ has_matching_qualifier = False
342
+ for qual in found_col_qualifiers:
343
+ if not qual.is_empty and len(qual.parts) >= len(prefix_parts):
344
+ if qual.parts[-len(prefix_parts) :] == tuple(prefix_parts):
345
+ has_matching_qualifier = True
346
+ break
347
+
348
+ # If no matching qualifier, it's a scope violation
349
+ if not has_matching_qualifier:
350
+ # The prefix is not a valid qualifier for this column - scope violation!
351
+ exception = AnalysisException(
352
+ f'[UNRESOLVED_COLUMN] Column "{attr_name}" cannot be resolved. '
353
+ f'The table or alias "{".".join(prefix_parts)}" is not in scope or does not exist.'
354
+ )
355
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
356
+ raise exception
320
357
 
321
358
  if snowpark_name is None:
322
359
  # Attempt LCA fallback.
@@ -338,7 +375,7 @@ def map_unresolved_attribute(
338
375
  )
339
376
  if snowpark_name is not None:
340
377
  col = get_col(snowpark_name)
341
- qualifiers = column_mapping.get_qualifier_for_spark_column(
378
+ qualifiers = column_mapping.get_qualifiers_for_spark_column(
342
379
  unqualified_name
343
380
  )
344
381
  typed_col = TypedColumn(col, lambda: typer.type(col))
@@ -405,7 +442,7 @@ def map_unresolved_attribute(
405
442
  for field_name in path:
406
443
  col = col.getItem(field_name)
407
444
 
408
- qualifiers = []
445
+ qualifiers = set()
409
446
 
410
447
  typed_col = TypedColumn(col, lambda: typer.type(col))
411
448
  typed_col.set_qualifiers(qualifiers)