snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +680 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +237 -23
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  23. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  24. snowflake/snowpark_connect/expression/literal.py +37 -13
  25. snowflake/snowpark_connect/expression/map_cast.py +123 -5
  26. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  27. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  28. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  29. snowflake/snowpark_connect/expression/map_udf.py +85 -20
  30. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  31. snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
  32. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  33. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  34. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  35. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  36. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  37. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  38. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  39. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  40. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  41. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  42. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  43. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  44. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  45. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  46. snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
  47. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  48. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  49. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  50. snowflake/snowpark_connect/relation/map_join.py +683 -442
  51. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  52. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  53. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  54. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  55. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  56. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  57. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  58. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  59. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  60. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  61. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  62. snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
  63. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  64. snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
  65. snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
  66. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  67. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  68. snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
  69. snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
  70. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  71. snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
  72. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  73. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  74. snowflake/snowpark_connect/relation/utils.py +128 -5
  75. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  76. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  77. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  78. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  79. snowflake/snowpark_connect/resources_initializer.py +110 -48
  80. snowflake/snowpark_connect/server.py +546 -456
  81. snowflake/snowpark_connect/server_common/__init__.py +500 -0
  82. snowflake/snowpark_connect/snowflake_session.py +65 -0
  83. snowflake/snowpark_connect/start_server.py +53 -5
  84. snowflake/snowpark_connect/type_mapping.py +349 -27
  85. snowflake/snowpark_connect/typed_column.py +9 -7
  86. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  87. snowflake/snowpark_connect/utils/cache.py +49 -27
  88. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  89. snowflake/snowpark_connect/utils/context.py +187 -37
  90. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  91. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  92. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  93. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  94. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  95. snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
  96. snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
  97. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  98. snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
  99. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  100. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  101. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  102. snowflake/snowpark_connect/utils/profiling.py +25 -8
  103. snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
  104. snowflake/snowpark_connect/utils/sequence.py +21 -0
  105. snowflake/snowpark_connect/utils/session.py +64 -28
  106. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  107. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  108. snowflake/snowpark_connect/utils/telemetry.py +163 -22
  109. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  110. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  111. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  112. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  113. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  114. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  115. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  116. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  117. snowflake/snowpark_connect/version.py +1 -1
  118. snowflake/snowpark_decoder/dp_session.py +6 -2
  119. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  120. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
  121. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
  122. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
  123. snowflake/snowpark_connect/hidden_column.py +0 -39
  124. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  125. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  126. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  127. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  128. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  129. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  130. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  131. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  132. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  133. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  134. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  186. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
  187. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
  188. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
  189. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
  190. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
  191. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
  192. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
@@ -1,90 +1,29 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
- import re
5
- import uuid
6
- from collections import Counter
7
4
 
8
5
  import pyspark.sql.connect.proto.base_pb2 as proto_base
9
6
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
10
7
 
11
- from snowflake.snowpark import DataFrame, Session
12
- from snowflake.snowpark.exceptions import SnowparkSQLException
13
- from snowflake.snowpark_connect.column_name_handler import ColumnNames
14
- from snowflake.snowpark_connect.config import global_config, sessions_config
15
8
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
9
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
10
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
16
11
  from snowflake.snowpark_connect.execute_plan.utils import pandas_to_arrow_batches_bytes
17
12
  from snowflake.snowpark_connect.expression import map_udf
18
13
  from snowflake.snowpark_connect.relation import map_udtf
19
14
  from snowflake.snowpark_connect.relation.map_relation import map_relation
20
15
  from snowflake.snowpark_connect.relation.map_sql import map_sql_to_pandas_df
21
- from snowflake.snowpark_connect.relation.write.map_write import map_write, map_write_v2
22
- from snowflake.snowpark_connect.utils.context import get_session_id
23
- from snowflake.snowpark_connect.utils.identifiers import (
24
- spark_to_sf_single_id,
25
- spark_to_sf_single_id_with_unquoting,
16
+ from snowflake.snowpark_connect.relation.read.metadata_utils import (
17
+ without_internal_columns,
26
18
  )
19
+ from snowflake.snowpark_connect.relation.write.map_write import map_write, map_write_v2
27
20
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
28
21
  from snowflake.snowpark_connect.utils.telemetry import (
29
22
  SnowparkConnectNotImplementedError,
30
23
  )
31
-
32
- _INTERNAL_VIEW_PREFIX = "__SC_RENAMED_V_"
33
-
34
- _CREATE_VIEW_PATTERN = re.compile(r"create\s+or\s+replace\s+view", re.IGNORECASE)
35
-
36
-
37
- def _create_column_rename_map(
38
- columns: list[ColumnNames], rename_duplicated: bool
39
- ) -> dict:
40
- if rename_duplicated is False:
41
- # if we are not renaming duplicated columns, we can just return the original names
42
- return {
43
- col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
44
- for col in columns
45
- }
46
-
47
- column_counts = Counter()
48
- not_renamed_cols = []
49
- renamed_cols = []
50
-
51
- for col in columns:
52
- new_column_name = col.spark_name
53
- normalized_name = new_column_name.lower()
54
- column_counts[normalized_name] += 1
55
-
56
- if column_counts[normalized_name] > 1:
57
- new_column_name = (
58
- f"{new_column_name}_DEDUP_{column_counts[normalized_name] - 1}"
59
- )
60
- renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
61
- else:
62
- not_renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
63
-
64
- if len(renamed_cols) == 0:
65
- return {
66
- col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
67
- for col in not_renamed_cols
68
- }
69
-
70
- # we need to make sure that we don't have duplicated names after renaming
71
- # columns that were not renamed in this iteration should have priority over renamed duplicates
72
- return _create_column_rename_map(not_renamed_cols + renamed_cols, True)
73
-
74
-
75
- def _find_duplicated_columns(
76
- columns: list[ColumnNames],
77
- ) -> (list[str], list[ColumnNames]):
78
- duplicates = []
79
- remaining_columns = []
80
- seen = set()
81
- for col in columns:
82
- if col.spark_name in seen:
83
- duplicates.append(col.snowpark_name)
84
- else:
85
- seen.add(col.spark_name)
86
- remaining_columns.append(col)
87
- return duplicates, remaining_columns
24
+ from snowflake.snowpark_connect.utils.temporary_view_helper import (
25
+ create_temporary_view_from_dataframe,
26
+ )
88
27
 
89
28
 
90
29
  def map_execution_command(
@@ -94,56 +33,10 @@ def map_execution_command(
94
33
  match request.plan.command.WhichOneof("command_type"):
95
34
  case "create_dataframe_view":
96
35
  req = request.plan.command.create_dataframe_view
97
- input_df_container = map_relation(req.input)
98
- input_df = input_df_container.dataframe
99
- column_map = input_df_container.column_map
100
-
101
- session_config = sessions_config[get_session_id()]
102
- duplicate_column_names_handling_mode = session_config[
103
- "snowpark.connect.views.duplicate_column_names_handling_mode"
104
- ]
105
-
106
- # rename columns to match spark names
107
- if duplicate_column_names_handling_mode == "rename":
108
- # deduplicate column names by appending _DEDUP_1, _DEDUP_2, etc.
109
- input_df = input_df.rename(
110
- _create_column_rename_map(column_map.columns, True)
111
- )
112
- elif duplicate_column_names_handling_mode == "drop":
113
- # Drop duplicate column names by removing all but the first occurrence.
114
- duplicated_columns, remaining_columns = _find_duplicated_columns(
115
- column_map.columns
116
- )
117
- if len(duplicated_columns) > 0:
118
- input_df = input_df.drop(*duplicated_columns)
119
- input_df = input_df.rename(
120
- _create_column_rename_map(remaining_columns, False)
121
- )
122
- else:
123
- # rename columns without deduplication
124
- input_df = input_df.rename(
125
- _create_column_rename_map(column_map.columns, False)
126
- )
127
-
128
- if req.is_global:
129
- view_name = [global_config.spark_sql_globalTempDatabase, req.name]
130
- else:
131
- view_name = [req.name]
132
- view_name = [
133
- spark_to_sf_single_id_with_unquoting(part) for part in view_name
134
- ]
135
-
136
- if req.replace:
137
- try:
138
- input_df.create_or_replace_temp_view(view_name)
139
- except SnowparkSQLException as exc:
140
- if _is_error_caused_by_view_referencing_itself(exc):
141
- # This error is caused by statement with self reference like `CREATE VIEW A AS SELECT X FROM A`.
142
- _create_chained_view(input_df, view_name)
143
- else:
144
- raise
145
- else:
146
- input_df.create_temp_view(view_name)
36
+ input_df_container = without_internal_columns(map_relation(req.input))
37
+ create_temporary_view_from_dataframe(
38
+ input_df_container, req.name, req.is_global, req.replace
39
+ )
147
40
  case "write_stream_operation_start":
148
41
  match request.plan.command.write_stream_operation_start.format:
149
42
  case "console":
@@ -204,46 +97,8 @@ def map_execution_command(
204
97
  map_udtf.register_udtf(request.plan.command.register_table_function)
205
98
 
206
99
  case other:
207
- raise SnowparkConnectNotImplementedError(
100
+ exception = SnowparkConnectNotImplementedError(
208
101
  f"Command type {other} not implemented"
209
102
  )
210
-
211
-
212
- def _generate_random_builtin_view_name() -> str:
213
- return _INTERNAL_VIEW_PREFIX + str(uuid.uuid4()).replace("-", "")
214
-
215
-
216
- def _is_error_caused_by_view_referencing_itself(exc: Exception) -> bool:
217
- return "view definition refers to view being defined" in str(exc).lower()
218
-
219
-
220
- def _create_chained_view(input_df: DataFrame, view_name: str) -> None:
221
- """
222
- In order to create a view, which references itself, Spark would here take the previous
223
- definition of A and paste it in place of `FROM A`. Snowflake would fail in such case, so
224
- as a workaround, we create a chain of internal views instead. This function:
225
- 1. Renames previous definition of A to some internal name (instead of deleting).
226
- 2. Adjusts the DDL of a new statement to reference the name of a renmaed internal view, instead of itself.
227
- """
228
-
229
- session = Session.get_active_session()
230
-
231
- view_name = ".".join(view_name)
232
-
233
- tmp_name = _generate_random_builtin_view_name()
234
- old_name_replacement = _generate_random_builtin_view_name()
235
-
236
- input_df.create_or_replace_temp_view(tmp_name)
237
-
238
- session.sql(f"ALTER VIEW {view_name} RENAME TO {old_name_replacement}").collect()
239
-
240
- ddl: str = session.sql(f"SELECT GET_DDL('VIEW', '{tmp_name}')").collect()[0][0]
241
-
242
- ddl = ddl.replace(view_name, old_name_replacement)
243
-
244
- # GET_DDL result doesn't contain `TEMPORARY`, it's likely a bug.
245
- ddl = _CREATE_VIEW_PATTERN.sub("create or replace temp view", ddl)
246
-
247
- session.sql(ddl).collect()
248
-
249
- session.sql(f"ALTER VIEW {tmp_name} RENAME TO {view_name}").collect()
103
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
104
+ raise exception
@@ -21,11 +21,16 @@ from snowflake.snowpark._internal.utils import (
21
21
  )
22
22
  from snowflake.snowpark_connect.constants import SERVER_SIDE_SESSION_ID
23
23
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
24
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
25
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
24
26
  from snowflake.snowpark_connect.execute_plan.utils import (
25
27
  arrow_table_to_arrow_bytes,
26
28
  pandas_to_arrow_batches_bytes,
27
29
  )
28
30
  from snowflake.snowpark_connect.relation.map_relation import map_relation
31
+ from snowflake.snowpark_connect.relation.read.metadata_utils import (
32
+ without_internal_columns,
33
+ )
29
34
  from snowflake.snowpark_connect.type_mapping import (
30
35
  map_snowpark_types_to_pyarrow_types,
31
36
  snowpark_to_proto_type,
@@ -53,7 +58,9 @@ def sproc_connector_fetch_arrow_batches_fix(self) -> Iterator[Table]:
53
58
  if self._prefetch_hook is not None:
54
59
  self._prefetch_hook()
55
60
  if self._query_result_format != "arrow":
56
- raise NotSupportedError
61
+ exception = NotSupportedError()
62
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
63
+ raise exception
57
64
  return self._result_set._fetch_arrow_batches()
58
65
 
59
66
 
@@ -92,14 +99,19 @@ def map_execution_root(
92
99
  ) -> Iterator[proto_base.ExecutePlanResponse | QueryResult]:
93
100
  result: DataFrameContainer | pandas.DataFrame = map_relation(request.plan.root)
94
101
  if isinstance(result, pandas.DataFrame):
95
- result_df = result
102
+ pandas_df = result
103
+ data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
104
+ row_count = len(pandas_df)
105
+ schema = None
106
+ yield _build_execute_plan_response(row_count, data_bytes, schema, request)
96
107
  else:
97
- result_df = result.dataframe
98
-
99
- if isinstance(result_df, snowpark.DataFrame):
100
- snowpark_schema = result_df.schema
101
- schema = snowpark_to_proto_type(snowpark_schema, result.column_map, result_df)
102
- spark_columns = result.column_map.get_spark_columns()
108
+ filtered_result = without_internal_columns(result)
109
+ filtered_result_df = filtered_result.dataframe
110
+ snowpark_schema = filtered_result_df.schema
111
+ schema = snowpark_to_proto_type(
112
+ snowpark_schema, filtered_result.column_map, filtered_result_df
113
+ )
114
+ spark_columns = filtered_result.column_map.get_spark_columns()
103
115
  if tcm.TCM_MODE:
104
116
  # TCM result handling:
105
117
  # - small result (only one batch): just return the executePlanResponse
@@ -108,22 +120,22 @@ def map_execution_root(
108
120
  is_large_result = False
109
121
  second_batch = False
110
122
  first_arrow_table = None
111
- with result_df.session.query_history() as qh:
112
- for arrow_table in to_arrow_batch_iter(result_df):
123
+ with filtered_result_df.session.query_history() as qh:
124
+ for arrow_table in to_arrow_batch_iter(filtered_result_df):
113
125
  if second_batch:
114
126
  is_large_result = True
115
127
  break
116
128
  first_arrow_table = arrow_table
117
129
  second_batch = True
118
130
  queries_cnt = len(
119
- result_df._plan.execution_queries[PlanQueryType.QUERIES]
131
+ filtered_result_df._plan.execution_queries[PlanQueryType.QUERIES]
120
132
  )
121
133
  # get query uuid from the last query; this may not be the last queries in query history because snowpark
122
134
  # may run some post action queries, e.g., drop temp table.
123
135
  query_id = qh.queries[queries_cnt - 1].query_id
124
136
  if first_arrow_table is None:
125
137
  # empty arrow batch iterator
126
- pandas_df = result_df.to_pandas()
138
+ pandas_df = filtered_result_df.to_pandas()
127
139
  data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
128
140
  yield _build_execute_plan_response(0, data_bytes, schema, request)
129
141
  elif not tcm.TCM_RETURN_QUERY_ID_FOR_SMALL_RESULT and not is_large_result:
@@ -150,7 +162,7 @@ def map_execution_root(
150
162
  spark_schema.SerializeToString(),
151
163
  )
152
164
  else:
153
- arrow_table_iter = to_arrow_batch_iter(result_df)
165
+ arrow_table_iter = to_arrow_batch_iter(filtered_result_df)
154
166
  batch_count = 0
155
167
  for arrow_table in arrow_table_iter:
156
168
  if arrow_table.num_rows > 0:
@@ -166,12 +178,6 @@ def map_execution_root(
166
178
 
167
179
  # Empty result needs special processing
168
180
  if batch_count == 0:
169
- pandas_df = result_df.to_pandas()
181
+ pandas_df = filtered_result_df.to_pandas()
170
182
  data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
171
183
  yield _build_execute_plan_response(0, data_bytes, schema, request)
172
- else:
173
- pandas_df = result_df
174
- data_bytes = pandas_to_arrow_batches_bytes(pandas_df)
175
- row_count = len(pandas_df)
176
- schema = None
177
- yield _build_execute_plan_response(row_count, data_bytes, schema, request)
@@ -8,6 +8,8 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
8
8
  from pyspark.sql.pandas.types import _dedup_names
9
9
 
10
10
  from snowflake.snowpark import types as sf_types
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.type_mapping import map_snowpark_types_to_pyarrow_types
12
14
  from snowflake.snowpark_connect.utils.telemetry import (
13
15
  SnowparkConnectNotImplementedError,
@@ -88,9 +90,11 @@ def is_streaming(rel: relation_proto.Relation) -> bool:
88
90
  case "html_string":
89
91
  return is_streaming(rel.html_string.input)
90
92
  case "cached_remote_relation":
91
- raise SnowparkConnectNotImplementedError(
93
+ exception = SnowparkConnectNotImplementedError(
92
94
  "Cached remote relation not implemented"
93
95
  )
96
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
97
+ raise exception
94
98
  case "common_inline_user_defined_table_function":
95
99
  return is_streaming(rel.common_inline_user_defined_table_function.input)
96
100
  case "fill_na":
@@ -7,6 +7,9 @@ from typing import Any
7
7
  import pyspark.sql.connect.proto.expressions_pb2 as expressions_pb2
8
8
  import pyspark.sql.connect.proto.types_pb2 as types_pb2
9
9
 
10
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
11
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
12
+
10
13
 
11
14
  @dataclass(frozen=True)
12
15
  class DefaultParameter:
@@ -154,7 +157,9 @@ def _create_literal_expression(value: Any) -> expressions_pb2.Expression:
154
157
  null_type.null.SetInParent()
155
158
  expr.literal.null.CopyFrom(null_type)
156
159
  else:
157
- raise ValueError(f"Unsupported literal type: {value}")
160
+ exception = ValueError(f"Unsupported literal type: {value}")
161
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_TYPE)
162
+ raise exception
158
163
 
159
164
  return expr
160
165
 
@@ -189,11 +194,13 @@ def inject_function_defaults(
189
194
 
190
195
  # Check if any required params are missing.
191
196
  if missing_arg_count > len(defaults):
192
- raise ValueError(
197
+ exception = ValueError(
193
198
  f"Function '{function_name}' is missing required arguments. "
194
199
  f"Expected {total_args} args, got {current_arg_count}, "
195
200
  f"but only {len(defaults)} defaults are defined."
196
201
  )
202
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
203
+ raise exception
197
204
 
198
205
  defaults_to_append = defaults[-missing_arg_count:]
199
206
  injected = False
@@ -18,6 +18,9 @@ from snowflake import snowpark
18
18
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
19
19
  from snowflake.snowpark_connect.expression.typer import ExpressionTyper
20
20
  from snowflake.snowpark_connect.typed_column import TypedColumn
21
+ from snowflake.snowpark_connect.utils.identifiers import (
22
+ split_fully_qualified_spark_name,
23
+ )
21
24
 
22
25
 
23
26
  class HybridColumnMap:
@@ -119,9 +122,11 @@ class HybridColumnMap:
119
122
  # Handle column references
120
123
  if expr_type == "unresolved_attribute":
121
124
  column_name = exp.unresolved_attribute.unparsed_identifier
125
+ name_parts = split_fully_qualified_spark_name(column_name)
126
+ alias_column_name = name_parts[0]
122
127
 
123
128
  # Check if it's an alias to an existing aggregate expression
124
- if column_name in self.aggregate_aliases:
129
+ if alias_column_name in self.aggregate_aliases:
125
130
  # Use the aggregated context to get the alias
126
131
  return map_expression(
127
132
  exp, self.aggregated_column_map, self.aggregated_typer
@@ -148,14 +153,15 @@ class HybridColumnMap:
148
153
  exp, self.aggregated_column_map, self.aggregated_typer
149
154
  )
150
155
 
151
- # For other expression types, try aggregated context first (likely references to computed values)
152
156
  try:
157
+ # 1. Evaluate the expression using the input grouping columns. i.e input_df.
158
+ # If not found, use the aggregate alias.
159
+ return map_expression(exp, self.input_column_map, self.input_typer)
160
+ except Exception:
161
+ # Fall back to input context
153
162
  return map_expression(
154
163
  exp, self.aggregated_column_map, self.aggregated_typer
155
164
  )
156
- except Exception:
157
- # Fall back to input context
158
- return map_expression(exp, self.input_column_map, self.input_typer)
159
165
 
160
166
 
161
167
  def create_hybrid_column_map_for_having(
@@ -190,3 +196,45 @@ def create_hybrid_column_map_for_having(
190
196
  grouping_expressions=grouping_expressions,
191
197
  aggregate_aliases=aggregate_aliases,
192
198
  )
199
+
200
+
201
+ def create_hybrid_column_map_for_order_by(
202
+ aggregate_metadata, # AggregateMetadata type
203
+ aggregated_df: snowpark.DataFrame,
204
+ aggregated_column_map: ColumnNameMap,
205
+ ) -> HybridColumnMap:
206
+ """
207
+ Create a HybridColumnMap instance for ORDER BY clause resolution after aggregation.
208
+
209
+ This is similar to HAVING clause resolution - ORDER BY can reference:
210
+ 1. Grouping columns (e.g., year, a)
211
+ 2. Aggregate aliases (e.g., cnt)
212
+ 3. Expressions on grouping columns (e.g., year(date) where date is pre-aggregation)
213
+
214
+ Args:
215
+ aggregate_metadata: Metadata from the aggregate operation
216
+ aggregated_df: The DataFrame after aggregation
217
+ aggregated_column_map: Column mapping for the aggregated DataFrame
218
+
219
+ Returns:
220
+ HybridColumnMap for resolving ORDER BY expressions
221
+ """
222
+ # Create typers for both contexts
223
+ input_typer = ExpressionTyper(aggregate_metadata.input_dataframe)
224
+ aggregated_typer = ExpressionTyper(aggregated_df)
225
+
226
+ # Build alias mapping from spark column names to aggregate expressions
227
+ aggregate_aliases = {}
228
+ for i, (spark_name, _) in enumerate(aggregate_metadata.raw_aggregations):
229
+ if i < len(aggregate_metadata.aggregate_expressions):
230
+ aggregate_aliases[spark_name] = aggregate_metadata.aggregate_expressions[i]
231
+
232
+ return HybridColumnMap(
233
+ input_column_map=aggregate_metadata.input_column_map,
234
+ input_typer=input_typer,
235
+ aggregated_column_map=aggregated_column_map,
236
+ aggregated_typer=aggregated_typer,
237
+ aggregate_expressions=aggregate_metadata.aggregate_expressions,
238
+ grouping_expressions=aggregate_metadata.grouping_expressions,
239
+ aggregate_aliases=aggregate_aliases,
240
+ )
@@ -10,7 +10,8 @@ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
10
10
  from tzlocal import get_localzone
11
11
 
12
12
  from snowflake.snowpark_connect.config import global_config
13
- from snowflake.snowpark_connect.utils.context import get_is_evaluating_sql
13
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
14
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
15
  from snowflake.snowpark_connect.utils.telemetry import (
15
16
  SnowparkConnectNotImplementedError,
16
17
  )
@@ -53,20 +54,21 @@ def get_literal_field_and_name(literal: expressions_proto.Expression.Literal):
53
54
  microseconds = literal.timestamp
54
55
  else:
55
56
  microseconds = literal.timestamp_ntz
56
- lit_dt = datetime.datetime.fromtimestamp(
57
- microseconds // 1_000_000
58
- ) + datetime.timedelta(microseconds=microseconds % 1_000_000)
59
- tz_dt = datetime.datetime.fromtimestamp(
60
- microseconds // 1_000_000, tz=local_tz
57
+
58
+ dt_utc = datetime.datetime.fromtimestamp(
59
+ microseconds // 1_000_000, tz=datetime.timezone.utc
61
60
  ) + datetime.timedelta(microseconds=microseconds % 1_000_000)
61
+
62
62
  if t == "timestamp_ntz":
63
- lit_dt = lit_dt.astimezone(datetime.timezone.utc)
64
- tz_dt = tz_dt.astimezone(datetime.timezone.utc)
65
- elif not get_is_evaluating_sql():
63
+ # For timestamp_ntz, display in UTC
64
+ lit_dt = dt_utc.replace(tzinfo=None)
65
+ tz_dt = dt_utc
66
+ else:
67
+ # For timestamp_ltz, always display in session timezone
66
68
  config_tz = global_config.spark_sql_session_timeZone
67
- config_tz = ZoneInfo(config_tz) if config_tz else local_tz
68
- tz_dt = tz_dt.astimezone(config_tz)
69
- lit_dt = lit_dt.astimezone(local_tz)
69
+ display_tz = ZoneInfo(config_tz) if config_tz else local_tz
70
+ tz_dt = dt_utc.astimezone(display_tz)
71
+ lit_dt = tz_dt.replace(tzinfo=None)
70
72
 
71
73
  def _format_timestamp(dt) -> str:
72
74
  without_micros = f"{dt.year:04d}-{dt.month:02d}-{dt.day:02d} {dt.hour:02d}:{dt.minute:02d}:{dt.second:02d}"
@@ -97,7 +99,29 @@ def get_literal_field_and_name(literal: expressions_proto.Expression.Literal):
97
99
  *(get_literal_field_and_name(e) for e in literal.array.elements)
98
100
  )
99
101
  return array_values, f"ARRAY({', '.join(element_names)})"
102
+ case "struct":
103
+ struct_key_names = [
104
+ field.name for field in literal.struct.struct_type.struct.fields
105
+ ]
106
+ struct_values = [
107
+ get_literal_field_and_name(el)[0] for el in literal.struct.elements
108
+ ]
109
+
110
+ struct_dict = dict(zip(struct_key_names, struct_values))
111
+
112
+ struct_elements = [
113
+ item for pair in zip(struct_key_names, struct_values) for item in pair
114
+ ]
115
+
116
+ return (
117
+ struct_dict,
118
+ f"OBJECT_CONSTRUCT_KEEP_NULL({', '.join(str(x) for x in struct_elements)})",
119
+ )
100
120
  case "null" | None:
101
121
  return None, "NULL"
102
122
  case other:
103
- raise SnowparkConnectNotImplementedError(f"Other Literal Type {other}")
123
+ exception = SnowparkConnectNotImplementedError(
124
+ f"Other Literal Type {other}"
125
+ )
126
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
127
+ raise exception