snowpark-connect 0.32.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of snowpark-connect might be problematic. Click here for more details.
- snowflake/snowpark_connect/column_name_handler.py +91 -40
- snowflake/snowpark_connect/column_qualifier.py +0 -4
- snowflake/snowpark_connect/config.py +9 -0
- snowflake/snowpark_connect/expression/hybrid_column_map.py +5 -4
- snowflake/snowpark_connect/expression/literal.py +12 -12
- snowflake/snowpark_connect/expression/map_sql_expression.py +18 -4
- snowflake/snowpark_connect/expression/map_unresolved_attribute.py +150 -29
- snowflake/snowpark_connect/expression/map_unresolved_function.py +93 -55
- snowflake/snowpark_connect/relation/map_aggregate.py +156 -257
- snowflake/snowpark_connect/relation/map_column_ops.py +19 -0
- snowflake/snowpark_connect/relation/map_join.py +454 -252
- snowflake/snowpark_connect/relation/map_row_ops.py +136 -54
- snowflake/snowpark_connect/relation/map_sql.py +335 -90
- snowflake/snowpark_connect/relation/read/map_read.py +9 -1
- snowflake/snowpark_connect/relation/read/map_read_csv.py +19 -2
- snowflake/snowpark_connect/relation/read/map_read_json.py +90 -2
- snowflake/snowpark_connect/relation/read/map_read_parquet.py +3 -0
- snowflake/snowpark_connect/relation/read/map_read_text.py +4 -0
- snowflake/snowpark_connect/relation/read/reader_config.py +10 -0
- snowflake/snowpark_connect/relation/read/utils.py +41 -0
- snowflake/snowpark_connect/relation/utils.py +50 -2
- snowflake/snowpark_connect/relation/write/map_write.py +251 -292
- snowflake/snowpark_connect/resources_initializer.py +25 -13
- snowflake/snowpark_connect/server.py +9 -24
- snowflake/snowpark_connect/type_mapping.py +2 -0
- snowflake/snowpark_connect/typed_column.py +2 -2
- snowflake/snowpark_connect/utils/context.py +0 -14
- snowflake/snowpark_connect/utils/expression_transformer.py +163 -0
- snowflake/snowpark_connect/utils/sequence.py +21 -0
- snowflake/snowpark_connect/utils/session.py +4 -1
- snowflake/snowpark_connect/utils/udf_helper.py +1 -0
- snowflake/snowpark_connect/utils/udtf_helper.py +3 -0
- snowflake/snowpark_connect/version.py +1 -1
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/METADATA +4 -2
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/RECORD +43 -104
- snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
- snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
- snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
- snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
- snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
- snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
- snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
- {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-connect +0 -0
- {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-session +0 -0
- {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-submit +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/WHEEL +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/LICENSE-binary +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/NOTICE-binary +0 -0
- {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -27,6 +27,7 @@ from snowflake.snowpark_connect.utils.context import (
|
|
|
27
27
|
from snowflake.snowpark_connect.utils.identifiers import (
|
|
28
28
|
split_fully_qualified_spark_name,
|
|
29
29
|
)
|
|
30
|
+
from snowflake.snowpark_connect.utils.sequence import next_unique_num
|
|
30
31
|
|
|
31
32
|
ALREADY_QUOTED = re.compile('^(".+")$', re.DOTALL)
|
|
32
33
|
|
|
@@ -46,6 +47,7 @@ def set_schema_getter(df: DataFrame, get_schema: Callable[[], StructType]) -> No
|
|
|
46
47
|
df.__class__ = PatchedDataFrame
|
|
47
48
|
|
|
48
49
|
|
|
50
|
+
# TODO replace plan_id-offset with single unique value
|
|
49
51
|
def make_column_names_snowpark_compatible(
|
|
50
52
|
names: list[str], plan_id: int, offset: int = 0
|
|
51
53
|
) -> list[str]:
|
|
@@ -91,6 +93,14 @@ def make_column_names_snowpark_compatible(
|
|
|
91
93
|
]
|
|
92
94
|
|
|
93
95
|
|
|
96
|
+
def make_unique_snowpark_name(spark_name: str) -> str:
|
|
97
|
+
"""
|
|
98
|
+
Returns a snowpark column name that's guaranteed to be unique in this session,
|
|
99
|
+
by appending "#<unique number>" to the given spark name.
|
|
100
|
+
"""
|
|
101
|
+
return quote_name(f"{spark_name}-{next_unique_num():x}")
|
|
102
|
+
|
|
103
|
+
|
|
94
104
|
@dataclass(frozen=True)
|
|
95
105
|
class ColumnNames:
|
|
96
106
|
spark_name: str
|
|
@@ -137,7 +147,7 @@ class ColumnNameMap:
|
|
|
137
147
|
|
|
138
148
|
# Rename chain dictionary to track column renaming history
|
|
139
149
|
self.rename_chains: dict[str, str] = {} # old_name -> new_name mapping
|
|
140
|
-
self.current_columns: set[str] = set() #
|
|
150
|
+
self.current_columns: set[str] = set() # current column names
|
|
141
151
|
|
|
142
152
|
# Parent ColumnNameMap classes
|
|
143
153
|
self._parent_column_name_map = parent_column_name_map
|
|
@@ -170,7 +180,7 @@ class ColumnNameMap:
|
|
|
170
180
|
snowpark_name=snowpark_column_names[i],
|
|
171
181
|
qualifiers=column_qualifiers[i]
|
|
172
182
|
if column_qualifiers and column_qualifiers[i]
|
|
173
|
-
else
|
|
183
|
+
else set(),
|
|
174
184
|
catalog_info=catalog_info,
|
|
175
185
|
database_info=database_info,
|
|
176
186
|
)
|
|
@@ -511,21 +521,6 @@ class ColumnNameMap:
|
|
|
511
521
|
if self._quote_if_unquoted(c.snowpark_name) not in cols_to_drop
|
|
512
522
|
]
|
|
513
523
|
|
|
514
|
-
def get_qualifier_for_spark_column(
|
|
515
|
-
self,
|
|
516
|
-
spark_column_name: str,
|
|
517
|
-
) -> ColumnQualifier:
|
|
518
|
-
"""
|
|
519
|
-
Backward compatibility: returns the first qualifier for the given Spark column name.
|
|
520
|
-
Throws if more than one qualifier exists.
|
|
521
|
-
"""
|
|
522
|
-
qualifiers = self.get_qualifiers_for_spark_column(spark_column_name)
|
|
523
|
-
if len(qualifiers) > 1:
|
|
524
|
-
raise ValueError(
|
|
525
|
-
"Shouldn't happen. Multiple qualifiers found; expected only one."
|
|
526
|
-
)
|
|
527
|
-
return next(iter(qualifiers))
|
|
528
|
-
|
|
529
524
|
def get_qualifiers_for_spark_column(
|
|
530
525
|
self,
|
|
531
526
|
spark_column_name: str,
|
|
@@ -544,7 +539,7 @@ class ColumnNameMap:
|
|
|
544
539
|
col = mapping.get(name)
|
|
545
540
|
|
|
546
541
|
if col is None or len(col) == 0:
|
|
547
|
-
return
|
|
542
|
+
return set()
|
|
548
543
|
|
|
549
544
|
return col[0].qualifiers
|
|
550
545
|
|
|
@@ -605,7 +600,7 @@ class ColumnNameMap:
|
|
|
605
600
|
removed_index.add(index)
|
|
606
601
|
spark_columns.append(new_spark_columns[index])
|
|
607
602
|
snowpark_columns.append(new_snowpark_columns[index])
|
|
608
|
-
qualifiers.append(
|
|
603
|
+
qualifiers.append(set())
|
|
609
604
|
else:
|
|
610
605
|
spark_columns.append(c.spark_name)
|
|
611
606
|
snowpark_columns.append(c.snowpark_name)
|
|
@@ -615,7 +610,7 @@ class ColumnNameMap:
|
|
|
615
610
|
if i not in removed_index:
|
|
616
611
|
spark_columns.append(new_spark_columns[i])
|
|
617
612
|
snowpark_columns.append(new_snowpark_columns[i])
|
|
618
|
-
qualifiers.append(
|
|
613
|
+
qualifiers.append(set())
|
|
619
614
|
|
|
620
615
|
return spark_columns, snowpark_columns, qualifiers
|
|
621
616
|
|
|
@@ -625,6 +620,71 @@ class ColumnNameMap:
|
|
|
625
620
|
else:
|
|
626
621
|
return spark_name.upper()
|
|
627
622
|
|
|
623
|
+
def get_columns_after_join(
|
|
624
|
+
self, right: ColumnNameMap, join_columns: list[str], join_type: str
|
|
625
|
+
) -> list[ColumnNames]:
|
|
626
|
+
"""
|
|
627
|
+
Returns a list of columns (names and qualifiers) after a using_columns join with the given column map
|
|
628
|
+
"""
|
|
629
|
+
|
|
630
|
+
# first, let's gather right-side join columns for qualifier lookup
|
|
631
|
+
# and the remaining columns to append them to the result
|
|
632
|
+
join_column_names = [self._normalized_spark_name(c) for c in join_columns]
|
|
633
|
+
right_join_columns: dict[str, ColumnNames] = {}
|
|
634
|
+
right_remaining_columns: list[ColumnNames] = []
|
|
635
|
+
for oc in right.columns:
|
|
636
|
+
col_name = self._normalized_spark_name(oc.spark_name)
|
|
637
|
+
# only take the first matching column
|
|
638
|
+
if col_name in join_column_names and col_name not in right_join_columns:
|
|
639
|
+
right_join_columns[col_name] = oc
|
|
640
|
+
else:
|
|
641
|
+
right_remaining_columns.append(oc)
|
|
642
|
+
|
|
643
|
+
# now gather left-side columns
|
|
644
|
+
left_join_columns: dict[str, ColumnNames] = {}
|
|
645
|
+
left_remaining_columns: list[ColumnNames] = []
|
|
646
|
+
for c in self.columns:
|
|
647
|
+
col_name = self._normalized_spark_name(c.spark_name)
|
|
648
|
+
if col_name in join_column_names and col_name not in left_join_columns:
|
|
649
|
+
# only assign join-side qualifier for outer joins
|
|
650
|
+
match join_type:
|
|
651
|
+
case "left":
|
|
652
|
+
qualifiers = c.qualifiers
|
|
653
|
+
case "right":
|
|
654
|
+
qualifiers = right_join_columns[col_name].qualifiers
|
|
655
|
+
case _:
|
|
656
|
+
qualifiers = (
|
|
657
|
+
c.qualifiers | right_join_columns[col_name].qualifiers
|
|
658
|
+
)
|
|
659
|
+
left_join_columns[col_name] = ColumnNames(
|
|
660
|
+
c.spark_name, c.snowpark_name, qualifiers
|
|
661
|
+
)
|
|
662
|
+
else:
|
|
663
|
+
left_remaining_columns.append(c)
|
|
664
|
+
|
|
665
|
+
# join columns go first in the user-given order,
|
|
666
|
+
# then the remaining left-side columns, then remaining right-side columns
|
|
667
|
+
match join_type:
|
|
668
|
+
case "right":
|
|
669
|
+
ordered_join_columns = [
|
|
670
|
+
right_join_columns[name] for name in join_column_names
|
|
671
|
+
]
|
|
672
|
+
case _:
|
|
673
|
+
ordered_join_columns = [
|
|
674
|
+
left_join_columns[name] for name in join_column_names
|
|
675
|
+
]
|
|
676
|
+
return ordered_join_columns + left_remaining_columns + right_remaining_columns
|
|
677
|
+
|
|
678
|
+
def get_conflicting_snowpark_columns(self, other: ColumnNameMap) -> set[str]:
|
|
679
|
+
conflicting_columns = set()
|
|
680
|
+
snowpark_names = {c.snowpark_name for c in self.columns}
|
|
681
|
+
|
|
682
|
+
for c in other.columns:
|
|
683
|
+
if c.snowpark_name in snowpark_names:
|
|
684
|
+
conflicting_columns.add(c.snowpark_name)
|
|
685
|
+
|
|
686
|
+
return conflicting_columns
|
|
687
|
+
|
|
628
688
|
|
|
629
689
|
class JoinColumnNameMap(ColumnNameMap):
|
|
630
690
|
def __init__(
|
|
@@ -681,19 +741,6 @@ class JoinColumnNameMap(ColumnNameMap):
|
|
|
681
741
|
else snowpark_column_name_in_left
|
|
682
742
|
)
|
|
683
743
|
|
|
684
|
-
# this means that the reference is for the column in right dataframe but same snowpark name exist in left dataframe as well
|
|
685
|
-
# or vice versa, so we need to append _left or _right to the snowpark name
|
|
686
|
-
if (
|
|
687
|
-
snowpark_name in self.left_column_mapping.get_snowpark_columns()
|
|
688
|
-
and snowpark_column_name_in_right is not None
|
|
689
|
-
):
|
|
690
|
-
snowpark_name = quote_name(f"{unquote_if_quoted(snowpark_name)}_right")
|
|
691
|
-
elif (
|
|
692
|
-
snowpark_name in self.right_column_mapping.get_snowpark_columns()
|
|
693
|
-
and snowpark_column_name_in_left is not None
|
|
694
|
-
):
|
|
695
|
-
snowpark_name = quote_name(f"{unquote_if_quoted(snowpark_name)}_left")
|
|
696
|
-
|
|
697
744
|
return snowpark_name
|
|
698
745
|
|
|
699
746
|
def get_snowpark_column_names_from_spark_column_names(
|
|
@@ -784,19 +831,23 @@ class JoinColumnNameMap(ColumnNameMap):
|
|
|
784
831
|
def get_qualifiers_for_spark_column(
|
|
785
832
|
self, spark_column_name: str
|
|
786
833
|
) -> set[ColumnQualifier]:
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
def get_qualifier_for_spark_column(self, spark_column_name: str) -> ColumnQualifier:
|
|
790
|
-
qualifier_left = self.left_column_mapping.get_qualifier_for_spark_column(
|
|
834
|
+
qualifiers_left = self.left_column_mapping.get_qualifiers_for_spark_column(
|
|
791
835
|
spark_column_name
|
|
792
836
|
)
|
|
793
|
-
|
|
837
|
+
qualifiers_right = self.right_column_mapping.get_qualifiers_for_spark_column(
|
|
794
838
|
spark_column_name
|
|
795
839
|
)
|
|
796
840
|
|
|
797
|
-
if (
|
|
841
|
+
if (len(qualifiers_left) > 0) and (len(qualifiers_right) > 0):
|
|
798
842
|
exception = AnalysisException(f"Ambiguous column name {spark_column_name}")
|
|
799
843
|
attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
|
|
800
844
|
raise exception
|
|
801
845
|
|
|
802
|
-
return
|
|
846
|
+
return qualifiers_right if len(qualifiers_left) == 0 else qualifiers_left
|
|
847
|
+
|
|
848
|
+
def get_columns_after_join(
|
|
849
|
+
self, right: ColumnNameMap, join_columns: list[str], join_type: str
|
|
850
|
+
) -> list[ColumnNames]:
|
|
851
|
+
exception = NotImplementedError("Method not implemented!")
|
|
852
|
+
attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
|
|
853
|
+
raise exception
|
|
@@ -23,10 +23,6 @@ class ColumnQualifier:
|
|
|
23
23
|
def is_empty(self) -> bool:
|
|
24
24
|
return len(self.parts) == 0
|
|
25
25
|
|
|
26
|
-
@classmethod
|
|
27
|
-
def no_qualifier(cls) -> ColumnQualifier:
|
|
28
|
-
return cls(())
|
|
29
|
-
|
|
30
26
|
def all_qualified_names(self, name: str) -> list[str]:
|
|
31
27
|
qualifier_parts = self.parts
|
|
32
28
|
qualifier_prefixes = [
|
|
@@ -275,6 +275,7 @@ SESSION_CONFIG_KEY_WHITELIST = {
|
|
|
275
275
|
"spark.hadoop.fs.s3a.server-side-encryption.key",
|
|
276
276
|
"spark.hadoop.fs.s3a.assumed.role.arn",
|
|
277
277
|
"snowpark.connect.describe_cache_ttl_seconds",
|
|
278
|
+
"mapreduce.fileoutputcommitter.marksuccessfuljobs",
|
|
278
279
|
}
|
|
279
280
|
AZURE_ACCOUNT_KEY = re.compile(
|
|
280
281
|
r"^fs\.azure\.sas\.[^\.]+\.[^\.]+\.blob\.core\.windows\.net$"
|
|
@@ -304,6 +305,7 @@ class SessionConfig:
|
|
|
304
305
|
"spark.sql.tvf.allowMultipleTableArguments.enabled": "true",
|
|
305
306
|
"snowpark.connect.enable_snowflake_extension_behavior": "false",
|
|
306
307
|
"snowpark.connect.describe_cache_ttl_seconds": "300",
|
|
308
|
+
"mapreduce.fileoutputcommitter.marksuccessfuljobs": "false",
|
|
307
309
|
}
|
|
308
310
|
|
|
309
311
|
def __init__(self) -> None:
|
|
@@ -639,6 +641,13 @@ def get_cte_optimization_enabled() -> bool:
|
|
|
639
641
|
return get_boolean_session_config_param("snowpark.connect.cte.optimization_enabled")
|
|
640
642
|
|
|
641
643
|
|
|
644
|
+
def get_success_file_generation_enabled() -> bool:
|
|
645
|
+
"""Get the _SUCCESS file generation configuration setting."""
|
|
646
|
+
return get_boolean_session_config_param(
|
|
647
|
+
"mapreduce.fileoutputcommitter.marksuccessfuljobs"
|
|
648
|
+
)
|
|
649
|
+
|
|
650
|
+
|
|
642
651
|
def get_describe_cache_ttl_seconds() -> int:
|
|
643
652
|
"""Get the describe query cache TTL from session config, with a default fallback."""
|
|
644
653
|
session_config: SessionConfig = sessions_config[get_session_id()]
|
|
@@ -148,14 +148,15 @@ class HybridColumnMap:
|
|
|
148
148
|
exp, self.aggregated_column_map, self.aggregated_typer
|
|
149
149
|
)
|
|
150
150
|
|
|
151
|
-
# For other expression types, try aggregated context first (likely references to computed values)
|
|
152
151
|
try:
|
|
152
|
+
# 1. Evaluate the expression using the input grouping columns. i.e input_df.
|
|
153
|
+
# If not found, use the aggregate alias.
|
|
154
|
+
return map_expression(exp, self.input_column_map, self.input_typer)
|
|
155
|
+
except Exception:
|
|
156
|
+
# Fall back to input context
|
|
153
157
|
return map_expression(
|
|
154
158
|
exp, self.aggregated_column_map, self.aggregated_typer
|
|
155
159
|
)
|
|
156
|
-
except Exception:
|
|
157
|
-
# Fall back to input context
|
|
158
|
-
return map_expression(exp, self.input_column_map, self.input_typer)
|
|
159
160
|
|
|
160
161
|
|
|
161
162
|
def create_hybrid_column_map_for_having(
|
|
@@ -12,7 +12,6 @@ from tzlocal import get_localzone
|
|
|
12
12
|
from snowflake.snowpark_connect.config import global_config
|
|
13
13
|
from snowflake.snowpark_connect.error.error_codes import ErrorCodes
|
|
14
14
|
from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
|
|
15
|
-
from snowflake.snowpark_connect.utils.context import get_is_evaluating_sql
|
|
16
15
|
from snowflake.snowpark_connect.utils.telemetry import (
|
|
17
16
|
SnowparkConnectNotImplementedError,
|
|
18
17
|
)
|
|
@@ -55,20 +54,21 @@ def get_literal_field_and_name(literal: expressions_proto.Expression.Literal):
|
|
|
55
54
|
microseconds = literal.timestamp
|
|
56
55
|
else:
|
|
57
56
|
microseconds = literal.timestamp_ntz
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
tz_dt = datetime.datetime.fromtimestamp(
|
|
62
|
-
microseconds // 1_000_000, tz=local_tz
|
|
57
|
+
|
|
58
|
+
dt_utc = datetime.datetime.fromtimestamp(
|
|
59
|
+
microseconds // 1_000_000, tz=datetime.timezone.utc
|
|
63
60
|
) + datetime.timedelta(microseconds=microseconds % 1_000_000)
|
|
61
|
+
|
|
64
62
|
if t == "timestamp_ntz":
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
63
|
+
# For timestamp_ntz, display in UTC
|
|
64
|
+
lit_dt = dt_utc.replace(tzinfo=None)
|
|
65
|
+
tz_dt = dt_utc
|
|
66
|
+
else:
|
|
67
|
+
# For timestamp_ltz, always display in session timezone
|
|
68
68
|
config_tz = global_config.spark_sql_session_timeZone
|
|
69
|
-
|
|
70
|
-
tz_dt =
|
|
71
|
-
lit_dt =
|
|
69
|
+
display_tz = ZoneInfo(config_tz) if config_tz else local_tz
|
|
70
|
+
tz_dt = dt_utc.astimezone(display_tz)
|
|
71
|
+
lit_dt = tz_dt.replace(tzinfo=None)
|
|
72
72
|
|
|
73
73
|
def _format_timestamp(dt) -> str:
|
|
74
74
|
without_micros = f"{dt.year:04d}-{dt.month:02d}-{dt.day:02d} {dt.hour:02d}:{dt.minute:02d}:{dt.second:02d}"
|
|
@@ -67,10 +67,16 @@ def sql_parser():
|
|
|
67
67
|
"""
|
|
68
68
|
|
|
69
69
|
ts_type = global_config.spark_sql_timestampType
|
|
70
|
+
session_tz = global_config.spark_sql_session_timeZone
|
|
70
71
|
|
|
71
72
|
if ts_type is not None:
|
|
72
73
|
_get_sql_conf().get().setConfString("spark.sql.timestampType", str(ts_type))
|
|
73
74
|
|
|
75
|
+
if session_tz is not None:
|
|
76
|
+
_get_sql_conf().get().setConfString(
|
|
77
|
+
"spark.sql.session.timeZone", str(session_tz)
|
|
78
|
+
)
|
|
79
|
+
|
|
74
80
|
return _get_sql_parser()
|
|
75
81
|
|
|
76
82
|
|
|
@@ -418,13 +424,21 @@ def map_logical_plan_expression(exp: jpype.JObject) -> expressions_proto.Express
|
|
|
418
424
|
)
|
|
419
425
|
)
|
|
420
426
|
case "Like" | "ILike" | "RLike":
|
|
427
|
+
arguments = [
|
|
428
|
+
map_logical_plan_expression(e)
|
|
429
|
+
for e in list(as_java_list(exp.children()))
|
|
430
|
+
]
|
|
431
|
+
# exp.escapeChar() returns a JPype JChar - convert to string and create a literal
|
|
432
|
+
if getattr(exp, "escapeChar", None) is not None:
|
|
433
|
+
escape_char_str = str(exp.escapeChar())
|
|
434
|
+
escape_literal = expressions_proto.Expression(
|
|
435
|
+
literal=expressions_proto.Expression.Literal(string=escape_char_str)
|
|
436
|
+
)
|
|
437
|
+
arguments.append(escape_literal)
|
|
421
438
|
proto = expressions_proto.Expression(
|
|
422
439
|
unresolved_function=expressions_proto.Expression.UnresolvedFunction(
|
|
423
440
|
function_name=class_name.lower(),
|
|
424
|
-
arguments=
|
|
425
|
-
map_logical_plan_expression(e)
|
|
426
|
-
for e in list(as_java_list(exp.children()))
|
|
427
|
-
],
|
|
441
|
+
arguments=arguments,
|
|
428
442
|
)
|
|
429
443
|
)
|
|
430
444
|
case "LikeAny" | "NotLikeAny" | "LikeAll" | "NotLikeAll":
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
#
|
|
4
4
|
|
|
5
5
|
import re
|
|
6
|
+
from typing import Any
|
|
6
7
|
|
|
7
8
|
import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
|
|
8
9
|
from pyspark.errors.exceptions.connect import AnalysisException
|
|
@@ -69,6 +70,143 @@ def _get_catalog_database_from_column_map(
|
|
|
69
70
|
return catalog_database_info
|
|
70
71
|
|
|
71
72
|
|
|
73
|
+
def _try_resolve_column_in_scopes(
|
|
74
|
+
column_name: str, column_mapping: ColumnNameMap
|
|
75
|
+
) -> tuple[str | None, Any]:
|
|
76
|
+
"""
|
|
77
|
+
Try to resolve a column name in current and outer scopes.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
column_name: The column name to resolve
|
|
81
|
+
column_mapping: The column mapping for the current scope
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Tuple of (snowpark_name, found_column_map) or (None, None) if not found
|
|
85
|
+
"""
|
|
86
|
+
# Try current scope
|
|
87
|
+
snowpark_name = column_mapping.get_snowpark_column_name_from_spark_column_name(
|
|
88
|
+
column_name, allow_non_exists=True
|
|
89
|
+
)
|
|
90
|
+
if snowpark_name is not None:
|
|
91
|
+
return snowpark_name, column_mapping
|
|
92
|
+
|
|
93
|
+
# Try outer scopes
|
|
94
|
+
for outer_df in get_outer_dataframes():
|
|
95
|
+
snowpark_name = (
|
|
96
|
+
outer_df.column_map.get_snowpark_column_name_from_spark_column_name(
|
|
97
|
+
column_name, allow_non_exists=True
|
|
98
|
+
)
|
|
99
|
+
)
|
|
100
|
+
if snowpark_name is not None:
|
|
101
|
+
return snowpark_name, outer_df.column_map
|
|
102
|
+
|
|
103
|
+
return None, None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _find_column_with_qualifier_match(
|
|
107
|
+
name_parts: list[str],
|
|
108
|
+
column_mapping: ColumnNameMap,
|
|
109
|
+
) -> tuple[int, str | None, Any]:
|
|
110
|
+
"""
|
|
111
|
+
Find the column position in name_parts where the prefix matches a qualifier.
|
|
112
|
+
|
|
113
|
+
In Spark, table qualifiers have at most 3 parts:
|
|
114
|
+
- 1 part: table only (e.g., 't1') → ColumnQualifier(('t1',))
|
|
115
|
+
- 2 parts: database.table (e.g., 'mydb.t5') → ColumnQualifier(('mydb', 't5'))
|
|
116
|
+
- 3 parts: catalog.database.table (e.g., 'cat.mydb.t5') → ColumnQualifier(('cat', 'mydb', 't5'))
|
|
117
|
+
|
|
118
|
+
Examples of how this works (suffix matching):
|
|
119
|
+
1) Input: "mydb1.t5.t5.i1" with qualifier ('mydb1', 't5')
|
|
120
|
+
- At i=2: prefix=['mydb1','t5'], matches qualifier suffix ('mydb1', 't5') → Column found!
|
|
121
|
+
- Remaining ['i1'] is treated as field access
|
|
122
|
+
|
|
123
|
+
2) Input: "t5.t5.i1" with qualifier ('mydb1', 't5')
|
|
124
|
+
- At i=1: prefix=['t5'], matches qualifier suffix ('t5',) → Column found!
|
|
125
|
+
- Remaining ['i1'] is treated as field access
|
|
126
|
+
|
|
127
|
+
3) Input: "cat.mydb.t5.t5.i1" with qualifier ('cat', 'mydb', 't5')
|
|
128
|
+
- At i=3: prefix=['cat','mydb','t5'], matches qualifier suffix → Column found!
|
|
129
|
+
- Remaining ['i1'] is treated as field access
|
|
130
|
+
|
|
131
|
+
The key insight: if the prefix before a candidate matches the END (suffix) of a qualifier,
|
|
132
|
+
then that position is the column reference. This allows partial qualification (e.g., just table
|
|
133
|
+
name instead of full database.table)
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
name_parts: The parts of the qualified name (e.g., ['mydb1', 't5', 't5', 'i1'])
|
|
137
|
+
column_mapping: The column mapping to resolve columns against
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Tuple of (column_part_index, snowpark_name, found_column_map)
|
|
141
|
+
Returns (0, None, None) if no valid column found
|
|
142
|
+
|
|
143
|
+
Raises:
|
|
144
|
+
AnalysisException: If a column is found but with invalid qualifier (scope violation)
|
|
145
|
+
"""
|
|
146
|
+
# Track if we found a column but with wrong qualifier (scope violation)
|
|
147
|
+
scope_violation = None
|
|
148
|
+
|
|
149
|
+
for i in range(len(name_parts)):
|
|
150
|
+
candidate_column = name_parts[i]
|
|
151
|
+
snowpark_name, found_column_map = _try_resolve_column_in_scopes(
|
|
152
|
+
candidate_column, column_mapping
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
if snowpark_name is not None:
|
|
156
|
+
candidate_qualifiers = found_column_map.get_qualifiers_for_spark_column(
|
|
157
|
+
candidate_column
|
|
158
|
+
)
|
|
159
|
+
prefix_parts = name_parts[:i]
|
|
160
|
+
|
|
161
|
+
# Check if this is a valid column reference position
|
|
162
|
+
# A valid position is where the prefix exactly matches one of the qualifiers
|
|
163
|
+
is_valid_reference = False
|
|
164
|
+
|
|
165
|
+
if i == 0:
|
|
166
|
+
# No prefix (unqualified access)
|
|
167
|
+
# Always valid - Spark allows unqualified access to any column
|
|
168
|
+
# The remaining parts (name_parts[1:]) will be treated as
|
|
169
|
+
# struct/map/array field access (e.g., "person.address.city" where
|
|
170
|
+
# person is the column and address.city is the field path)
|
|
171
|
+
is_valid_reference = True
|
|
172
|
+
else:
|
|
173
|
+
# Has prefix - check if it matches the end (suffix) of any qualifier
|
|
174
|
+
# Spark allows partial qualification, so for qualifier ('mydb1', 't5'):
|
|
175
|
+
# - Can access as mydb1.t5.t5.i1 (full qualifier match)
|
|
176
|
+
# - Can access as t5.t5.i1 (suffix match - just table part)
|
|
177
|
+
# e.g., for "t5.t5.i1", when i=1, prefix=['t5'] matches suffix of ('mydb1', 't5')
|
|
178
|
+
# If valid, the remaining parts (name_parts[i+1:]) will be treated as
|
|
179
|
+
# struct/map/array field access (e.g., ['i1'] is a field in column t5)
|
|
180
|
+
for qual in candidate_qualifiers:
|
|
181
|
+
if len(qual.parts) >= len(prefix_parts) and qual.parts[
|
|
182
|
+
-len(prefix_parts) :
|
|
183
|
+
] == tuple(prefix_parts):
|
|
184
|
+
is_valid_reference = True
|
|
185
|
+
break
|
|
186
|
+
|
|
187
|
+
if is_valid_reference:
|
|
188
|
+
# This is the actual column reference
|
|
189
|
+
return (i, snowpark_name, found_column_map)
|
|
190
|
+
elif i > 0:
|
|
191
|
+
# Found column but qualifier doesn't match - this is a scope violation
|
|
192
|
+
# e.g., SELECT nt1.k where k exists but nt1 is not its qualifier
|
|
193
|
+
attr_name = ".".join(name_parts)
|
|
194
|
+
scope_violation = (attr_name, ".".join(prefix_parts))
|
|
195
|
+
|
|
196
|
+
# If we detected a scope violation, throw error
|
|
197
|
+
if scope_violation:
|
|
198
|
+
attr_name, invalid_qualifier = scope_violation
|
|
199
|
+
exception = AnalysisException(
|
|
200
|
+
f'[UNRESOLVED_COLUMN] Column "{attr_name}" cannot be resolved. '
|
|
201
|
+
f'The table or alias "{invalid_qualifier}" is not in scope or does not exist.'
|
|
202
|
+
)
|
|
203
|
+
attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
|
|
204
|
+
raise exception
|
|
205
|
+
|
|
206
|
+
# No valid column found
|
|
207
|
+
return (0, None, None)
|
|
208
|
+
|
|
209
|
+
|
|
72
210
|
def map_unresolved_attribute(
|
|
73
211
|
exp: expressions_proto.Expression,
|
|
74
212
|
column_mapping: ColumnNameMap,
|
|
@@ -275,12 +413,14 @@ def map_unresolved_attribute(
|
|
|
275
413
|
else:
|
|
276
414
|
quoted_attr_name = name_parts[0]
|
|
277
415
|
|
|
278
|
-
|
|
279
|
-
|
|
416
|
+
# Try to resolve the full qualified name first
|
|
417
|
+
snowpark_name, found_column_map = _try_resolve_column_in_scopes(
|
|
418
|
+
quoted_attr_name, column_mapping
|
|
280
419
|
)
|
|
420
|
+
|
|
281
421
|
if snowpark_name is not None:
|
|
282
422
|
col = get_col(snowpark_name)
|
|
283
|
-
qualifiers =
|
|
423
|
+
qualifiers = found_column_map.get_qualifiers_for_spark_column(quoted_attr_name)
|
|
284
424
|
else:
|
|
285
425
|
# this means it has to be a struct column with a field name
|
|
286
426
|
snowpark_name: str | None = None
|
|
@@ -291,32 +431,13 @@ def map_unresolved_attribute(
|
|
|
291
431
|
original_attr_name, column_mapping
|
|
292
432
|
)
|
|
293
433
|
|
|
294
|
-
#
|
|
295
|
-
#
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
snowpark_name
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
)
|
|
302
|
-
)
|
|
303
|
-
if snowpark_name is not None:
|
|
304
|
-
column_part_index = i
|
|
305
|
-
break
|
|
306
|
-
|
|
307
|
-
# Also try in outer dataframes
|
|
308
|
-
for outer_df in get_outer_dataframes():
|
|
309
|
-
snowpark_name = (
|
|
310
|
-
outer_df.column_map.get_snowpark_column_name_from_spark_column_name(
|
|
311
|
-
candidate_column, allow_non_exists=True
|
|
312
|
-
)
|
|
313
|
-
)
|
|
314
|
-
if snowpark_name is not None:
|
|
315
|
-
column_part_index = i
|
|
316
|
-
break
|
|
317
|
-
|
|
318
|
-
if snowpark_name is not None:
|
|
319
|
-
break
|
|
434
|
+
# Find the column by matching qualifiers with the prefix parts
|
|
435
|
+
# Note: This may raise AnalysisException if a scope violation is detected
|
|
436
|
+
(
|
|
437
|
+
column_part_index,
|
|
438
|
+
snowpark_name,
|
|
439
|
+
found_column_map,
|
|
440
|
+
) = _find_column_with_qualifier_match(name_parts, column_mapping)
|
|
320
441
|
|
|
321
442
|
if snowpark_name is None:
|
|
322
443
|
# Attempt LCA fallback.
|