snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +680 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +237 -23
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  23. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  24. snowflake/snowpark_connect/expression/literal.py +37 -13
  25. snowflake/snowpark_connect/expression/map_cast.py +123 -5
  26. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  27. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  28. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  29. snowflake/snowpark_connect/expression/map_udf.py +85 -20
  30. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  31. snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
  32. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  33. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  34. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  35. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  36. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  37. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  38. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  39. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  40. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  41. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  42. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  43. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  44. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  45. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  46. snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
  47. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  48. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  49. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  50. snowflake/snowpark_connect/relation/map_join.py +683 -442
  51. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  52. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  53. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  54. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  55. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  56. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  57. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  58. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  59. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  60. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  61. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  62. snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
  63. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  64. snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
  65. snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
  66. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  67. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  68. snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
  69. snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
  70. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  71. snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
  72. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  73. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  74. snowflake/snowpark_connect/relation/utils.py +128 -5
  75. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  76. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  77. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  78. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  79. snowflake/snowpark_connect/resources_initializer.py +110 -48
  80. snowflake/snowpark_connect/server.py +546 -456
  81. snowflake/snowpark_connect/server_common/__init__.py +500 -0
  82. snowflake/snowpark_connect/snowflake_session.py +65 -0
  83. snowflake/snowpark_connect/start_server.py +53 -5
  84. snowflake/snowpark_connect/type_mapping.py +349 -27
  85. snowflake/snowpark_connect/typed_column.py +9 -7
  86. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  87. snowflake/snowpark_connect/utils/cache.py +49 -27
  88. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  89. snowflake/snowpark_connect/utils/context.py +187 -37
  90. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  91. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  92. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  93. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  94. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  95. snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
  96. snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
  97. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  98. snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
  99. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  100. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  101. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  102. snowflake/snowpark_connect/utils/profiling.py +25 -8
  103. snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
  104. snowflake/snowpark_connect/utils/sequence.py +21 -0
  105. snowflake/snowpark_connect/utils/session.py +64 -28
  106. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  107. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  108. snowflake/snowpark_connect/utils/telemetry.py +163 -22
  109. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  110. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  111. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  112. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  113. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  114. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  115. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  116. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  117. snowflake/snowpark_connect/version.py +1 -1
  118. snowflake/snowpark_decoder/dp_session.py +6 -2
  119. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  120. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
  121. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
  122. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
  123. snowflake/snowpark_connect/hidden_column.py +0 -39
  124. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  125. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  126. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  127. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  128. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  129. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  130. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  131. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  132. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  133. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  134. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  186. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
  187. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
  188. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
  189. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
  190. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
  191. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
  192. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,43 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
10
+ quote_name_without_upper_casing,
11
+ )
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class ColumnQualifier:
16
+ parts: tuple[str, ...]
17
+
18
+ def __post_init__(self) -> None:
19
+ if not all(isinstance(x, str) for x in self.parts):
20
+ raise TypeError("ColumnQualifier.parts must be strings")
21
+
22
+ @property
23
+ def is_empty(self) -> bool:
24
+ return len(self.parts) == 0
25
+
26
+ def all_qualified_names(self, name: str) -> list[str]:
27
+ qualifier_parts = self.parts
28
+ qualifier_prefixes = [
29
+ ".".join(quote_name_without_upper_casing(x) for x in qualifier_parts[i:])
30
+ for i in range(len(qualifier_parts))
31
+ ]
32
+ return [f"{prefix}.{name}" for prefix in qualifier_prefixes]
33
+
34
+ def to_upper(self):
35
+ return ColumnQualifier(tuple(part.upper() for part in self.parts))
36
+
37
+ def matches(self, target: ColumnQualifier) -> bool:
38
+ if self.is_empty or target.is_empty:
39
+ return False
40
+ # If the column has fewer qualifiers than the target, it cannot match
41
+ if len(self.parts) < len(target.parts):
42
+ return False
43
+ return self.parts[-len(target.parts) :] == target.parts
@@ -8,7 +8,7 @@ import re
8
8
  import sys
9
9
  from collections import defaultdict
10
10
  from copy import copy, deepcopy
11
- from typing import Any
11
+ from typing import Any, Dict, Optional
12
12
 
13
13
  import jpype
14
14
  import pyspark.sql.connect.proto.base_pb2 as proto_base
@@ -17,11 +17,17 @@ from tzlocal import get_localzone_name
17
17
  from snowflake import snowpark
18
18
  from snowflake.snowpark._internal.analyzer.analyzer_utils import (
19
19
  quote_name_without_upper_casing,
20
+ unquote_if_quoted,
20
21
  )
21
22
  from snowflake.snowpark.exceptions import SnowparkSQLException
22
23
  from snowflake.snowpark.types import TimestampTimeZone, TimestampType
24
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
25
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
23
26
  from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
24
- from snowflake.snowpark_connect.utils.context import get_session_id
27
+ from snowflake.snowpark_connect.utils.context import (
28
+ get_jpype_jclass_lock,
29
+ get_spark_session_id,
30
+ )
25
31
  from snowflake.snowpark_connect.utils.external_udxf_cache import (
26
32
  clear_external_udxf_cache,
27
33
  )
@@ -139,9 +145,21 @@ class GlobalConfig:
139
145
  "spark.sql.parser.quotedRegexColumnNames": "false",
140
146
  # custom configs
141
147
  "snowpark.connect.version": ".".join(map(str, sas_version)),
148
+ "snowpark.connect.temporary.views.create_in_snowflake": "false",
142
149
  # Control whether repartition(n) on a DataFrame forces splitting into n files during writes
143
150
  # This matches spark behavior more closely, but introduces overhead.
144
151
  "snowflake.repartition.for.writes": "false",
152
+ "snowpark.connect.structured_types.fix": "true",
153
+ # Local relation optimization: Use List[Row] for small data, PyArrow for large data
154
+ # Enabled in production by default to improve performance for createDataFrame on small local relations.
155
+ # Disabled in tests by default unless explicitly enabled to stabilize flaky tests that are not applying row ordering.
156
+ # SNOW-2719980: Remove this flag after test fragility issues are resolved
157
+ "snowpark.connect.localRelation.optimizeSmallData": "true",
158
+ "spark.sql.execution.arrow.maxRecordsPerBatch": "10000", # TODO: no-op
159
+ # USE_VECTORIZED_SCANNER will become the default in a future BCR; Snowflake recommends setting it to TRUE for new workloads.
160
+ # This significantly reduces latency for loading Parquet files by downloading only relevant columnar sections into memory.
161
+ "snowpark.connect.parquet.useVectorizedScanner": "true",
162
+ "spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue": "false",
145
163
  }
146
164
 
147
165
  boolean_config_list = [
@@ -150,11 +168,14 @@ class GlobalConfig:
150
168
  "spark.sql.repl.eagerEval.enabled",
151
169
  "spark.sql.crossJoin.enabled",
152
170
  "spark.sql.caseSensitive",
171
+ "snowpark.connect.localRelation.optimizeSmallData",
172
+ "snowpark.connect.parquet.useVectorizedScanner",
153
173
  "spark.sql.ansi.enabled",
154
174
  "spark.sql.legacy.allowHashOnMapType",
155
175
  "spark.Catalog.databaseFilterInformationSchema",
156
176
  "spark.sql.parser.quotedRegexColumnNames",
157
177
  "snowflake.repartition.for.writes",
178
+ "spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue",
158
179
  ]
159
180
 
160
181
  int_config_list = [
@@ -257,21 +278,34 @@ SESSION_CONFIG_KEY_WHITELIST = {
257
278
  "spark.sql.execution.pythonUDTF.arrow.enabled",
258
279
  "spark.sql.tvf.allowMultipleTableArguments.enabled",
259
280
  "snowpark.connect.sql.passthrough",
281
+ "snowpark.connect.cte.optimization_enabled",
260
282
  "snowpark.connect.iceberg.external_volume",
261
283
  "snowpark.connect.sql.identifiers.auto-uppercase",
284
+ "snowpark.connect.sql.partition.external_table_location",
262
285
  "snowpark.connect.udtf.compatibility_mode",
263
286
  "snowpark.connect.views.duplicate_column_names_handling_mode",
264
- "enable_snowflake_extension_behavior",
287
+ "snowpark.connect.temporary.views.create_in_snowflake",
288
+ "snowpark.connect.enable_snowflake_extension_behavior",
289
+ "spark.hadoop.fs.s3a.server-side-encryption.key",
290
+ "spark.hadoop.fs.s3a.assumed.role.arn",
291
+ "snowpark.connect.describe_cache_ttl_seconds",
292
+ "mapreduce.fileoutputcommitter.marksuccessfuljobs",
293
+ "spark.sql.parquet.enable.summary-metadata",
294
+ "parquet.enable.summary-metadata",
265
295
  }
266
- AZURE_SAS_KEY = re.compile(
296
+ AZURE_ACCOUNT_KEY = re.compile(
267
297
  r"^fs\.azure\.sas\.[^\.]+\.[^\.]+\.blob\.core\.windows\.net$"
268
298
  )
299
+ AZURE_SAS_KEY = re.compile(
300
+ r"^fs\.azure\.sas\.fixed\.token\.[^\.]+\.dfs\.core\.windows\.net$"
301
+ )
269
302
 
270
303
 
271
304
  def valid_session_config_key(key: str):
272
305
  return (
273
306
  key in SESSION_CONFIG_KEY_WHITELIST # AWS session keys
274
307
  or AZURE_SAS_KEY.match(key) # Azure session keys
308
+ or AZURE_ACCOUNT_KEY.match(key) # Azure account keys
275
309
  )
276
310
 
277
311
 
@@ -279,17 +313,23 @@ class SessionConfig:
279
313
  """This class contains the session configuration for the Spark Server."""
280
314
 
281
315
  default_session_config = {
282
- "snowpark.connect.sql.identifiers.auto-uppercase": "all_except_columns",
283
316
  "snowpark.connect.sql.passthrough": "false",
317
+ "snowpark.connect.cte.optimization_enabled": "false",
284
318
  "snowpark.connect.udtf.compatibility_mode": "false",
285
319
  "snowpark.connect.views.duplicate_column_names_handling_mode": "rename",
286
320
  "spark.sql.execution.pythonUDTF.arrow.enabled": "false",
287
321
  "spark.sql.tvf.allowMultipleTableArguments.enabled": "true",
288
- "enable_snowflake_extension_behavior": "false",
322
+ "snowpark.connect.enable_snowflake_extension_behavior": "false",
323
+ "snowpark.connect.describe_cache_ttl_seconds": "300",
324
+ "snowpark.connect.sql.partition.external_table_location": None,
325
+ "mapreduce.fileoutputcommitter.marksuccessfuljobs": "false",
326
+ "spark.sql.parquet.enable.summary-metadata": "false",
327
+ "parquet.enable.summary-metadata": "false",
289
328
  }
290
329
 
291
330
  def __init__(self) -> None:
292
331
  self.config = deepcopy(self.default_session_config)
332
+ self.table_metadata: Dict[str, Dict[str, Any]] = {}
293
333
 
294
334
  def __getitem__(self, item: str) -> str:
295
335
  return self.get(item)
@@ -344,9 +384,11 @@ def route_config_proto(
344
384
  if not pair.HasField("value"):
345
385
  from pyspark.errors import IllegalArgumentException
346
386
 
347
- raise IllegalArgumentException(
387
+ exception = IllegalArgumentException(
348
388
  f"Cannot set config '{pair.key}' to None"
349
389
  )
390
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
391
+ raise exception
350
392
 
351
393
  set_config_param(
352
394
  config.session_id, pair.key, pair.value, snowpark_session
@@ -429,7 +471,11 @@ def route_config_proto(
429
471
  pair.value = str(global_config.is_modifiable(key)).lower()
430
472
  return res
431
473
  case _:
432
- raise SnowparkConnectNotImplementedError(f"Unexpected request {config}")
474
+ exception = SnowparkConnectNotImplementedError(
475
+ f"Unexpected request {config}"
476
+ )
477
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
478
+ raise exception
433
479
 
434
480
 
435
481
  def set_config_param(
@@ -469,19 +515,27 @@ def _verify_static_config_not_modified(key: str) -> None:
469
515
  # https://github.com/apache/spark/blob/v3.5.3/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala#L161
470
516
  # Spark does not allow to modify static configurations at runtime.
471
517
  if global_config.is_static_config(key) and global_config.is_set(key):
472
- raise ValueError(f"Cannot modify the value of a static config: {key}")
518
+ exception = ValueError(f"Cannot modify the value of a static config: {key}")
519
+ attach_custom_error_code(exception, ErrorCodes.CONFIG_CHANGE_NOT_ALLOWED)
520
+ raise exception
473
521
 
474
522
 
475
523
  def _verify_is_valid_config_value(key: str, value: Any) -> None:
476
524
  if key in CONFIG_ALLOWED_VALUES and value not in CONFIG_ALLOWED_VALUES[key]:
477
- raise ValueError(
525
+ exception = ValueError(
478
526
  f"Invalid value '{value}' for key '{key}'. Allowed values: {', '.join(CONFIG_ALLOWED_VALUES[key])}."
479
527
  )
528
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
529
+ raise exception
480
530
 
481
531
 
482
532
  def _verify_is_not_readonly_config(key):
483
533
  if key in global_config.readonly_config_list:
484
- raise ValueError(f"Config with key {key} is read-only and cannot be modified.")
534
+ exception = ValueError(
535
+ f"Config with key {key} is read-only and cannot be modified."
536
+ )
537
+ attach_custom_error_code(exception, ErrorCodes.CONFIG_CHANGE_NOT_ALLOWED)
538
+ raise exception
485
539
 
486
540
 
487
541
  def set_jvm_timezone(timezone_id: str):
@@ -498,10 +552,13 @@ def set_jvm_timezone(timezone_id: str):
498
552
  RuntimeError: If JVM is not started
499
553
  """
500
554
  if not jpype.isJVMStarted():
501
- raise RuntimeError("JVM must be started before setting timezone")
555
+ exception = RuntimeError("JVM must be started before setting timezone")
556
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
557
+ raise exception
502
558
 
503
559
  try:
504
- TimeZone = jpype.JClass("java.util.TimeZone")
560
+ with get_jpype_jclass_lock():
561
+ TimeZone = jpype.JClass("java.util.TimeZone")
505
562
  new_timezone = TimeZone.getTimeZone(timezone_id)
506
563
  TimeZone.setDefault(new_timezone)
507
564
 
@@ -513,7 +570,9 @@ def set_jvm_timezone(timezone_id: str):
513
570
  def reset_jvm_timezone_to_system_default():
514
571
  """Reset JVM timezone to the system's default timezone"""
515
572
  if not jpype.isJVMStarted():
516
- raise RuntimeError("JVM must be started first")
573
+ exception = RuntimeError("JVM must be started first")
574
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
575
+ raise exception
517
576
 
518
577
  try:
519
578
  TimeZone = jpype.JClass("java.util.TimeZone")
@@ -522,9 +581,13 @@ def reset_jvm_timezone_to_system_default():
522
581
  f"Reset JVM timezone to system default: {TimeZone.getDefault().getID()}"
523
582
  )
524
583
  except jpype.JException as e:
525
- raise RuntimeError(f"Java exception while resetting timezone: {e}")
584
+ exception = RuntimeError(f"Java exception while resetting timezone: {e}")
585
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
586
+ raise exception
526
587
  except Exception as e:
527
- raise RuntimeError(f"Unexpected error resetting JVM timezone: {e}")
588
+ exception = RuntimeError(f"Unexpected error resetting JVM timezone: {e}")
589
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
590
+ raise exception
528
591
 
529
592
 
530
593
  def set_snowflake_parameters(
@@ -569,27 +632,98 @@ def set_snowflake_parameters(
569
632
  snowpark_session.use_database(db)
570
633
  case (prev, curr) if prev != curr:
571
634
  snowpark_session.use_schema(prev)
635
+ case "snowpark.connect.cte.optimization_enabled":
636
+ # Set CTE optimization on the snowpark session
637
+ cte_enabled = str_to_bool(value)
638
+ snowpark_session.cte_optimization_enabled = cte_enabled
639
+ logger.info(f"Updated snowpark session CTE optimization: {cte_enabled}")
640
+ case "snowpark.connect.structured_types.fix":
641
+ # TODO: SNOW-2367714 Remove this once the fix is automatically enabled in Snowpark
642
+ snowpark.context._enable_fix_2360274 = str_to_bool(value)
643
+ logger.info(f"Updated snowpark session structured types fix: {value}")
572
644
  case _:
573
645
  pass
574
646
 
575
647
 
576
648
  def get_boolean_session_config_param(name: str) -> bool:
577
- session_config = sessions_config[get_session_id()]
649
+ session_config = sessions_config[get_spark_session_id()]
578
650
  return str_to_bool(session_config[name])
579
651
 
580
652
 
653
+ def get_string_session_config_param(name: str) -> str:
654
+ session_config = sessions_config[get_spark_session_id()]
655
+ return str(session_config[name])
656
+
657
+
658
+ def get_cte_optimization_enabled() -> bool:
659
+ """Get the CTE optimization configuration setting."""
660
+ return get_boolean_session_config_param("snowpark.connect.cte.optimization_enabled")
661
+
662
+
663
+ def get_success_file_generation_enabled() -> bool:
664
+ """Get the _SUCCESS file generation configuration setting."""
665
+ return get_boolean_session_config_param(
666
+ "mapreduce.fileoutputcommitter.marksuccessfuljobs"
667
+ )
668
+
669
+
670
+ def get_parquet_metadata_generation_enabled() -> bool:
671
+ """
672
+ Get the Parquet metadata file generation configuration setting.
673
+ """
674
+ return get_boolean_session_config_param(
675
+ "spark.sql.parquet.enable.summary-metadata"
676
+ ) or get_boolean_session_config_param("parquet.enable.summary-metadata")
677
+
678
+
679
+ def get_describe_cache_ttl_seconds() -> int:
680
+ """Get the describe query cache TTL from session config, with a default fallback."""
681
+ session_config: SessionConfig = sessions_config[get_spark_session_id()]
682
+ default_ttl: str = SessionConfig.default_session_config[
683
+ "snowpark.connect.describe_cache_ttl_seconds"
684
+ ]
685
+ try:
686
+ ttl_str = session_config.get(
687
+ "snowpark.connect.describe_cache_ttl_seconds", default_ttl
688
+ )
689
+ return int(ttl_str)
690
+ except ValueError: # fallback to default ttl
691
+ return int(default_ttl)
692
+
693
+
694
+ def should_create_temporary_view_in_snowflake() -> bool:
695
+ return str_to_bool(
696
+ global_config["snowpark.connect.temporary.views.create_in_snowflake"]
697
+ )
698
+
699
+
581
700
  def auto_uppercase_column_identifiers() -> bool:
582
- session_config = sessions_config[get_session_id()]
583
- return session_config[
701
+ session_config = sessions_config[get_spark_session_id()]
702
+ auto_upper_case_config = session_config[
584
703
  "snowpark.connect.sql.identifiers.auto-uppercase"
585
- ].lower() in ("all", "only_columns")
704
+ ]
705
+ if auto_upper_case_config:
706
+ return auto_upper_case_config.lower() in ("all", "only_columns")
707
+
708
+ return not global_config.spark_sql_caseSensitive
586
709
 
587
710
 
588
711
  def auto_uppercase_non_column_identifiers() -> bool:
589
- session_config = sessions_config[get_session_id()]
590
- return session_config[
712
+ session_config = sessions_config[get_spark_session_id()]
713
+ auto_upper_case_config = session_config[
591
714
  "snowpark.connect.sql.identifiers.auto-uppercase"
592
- ].lower() in ("all", "all_except_columns")
715
+ ]
716
+ if auto_upper_case_config:
717
+ return auto_upper_case_config.lower() in ("all", "all_except_columns")
718
+
719
+ return not global_config.spark_sql_caseSensitive
720
+
721
+
722
+ def external_table_location() -> Optional[str]:
723
+ session_config = sessions_config[get_spark_session_id()]
724
+ return session_config.get(
725
+ "snowpark.connect.sql.partition.external_table_location", None
726
+ )
593
727
 
594
728
 
595
729
  def parse_imports(session: snowpark.Session, imports: str | None) -> None:
@@ -613,3 +747,83 @@ def get_timestamp_type():
613
747
  # shouldn't happen since `spark.sql.timestampType` is always defined, and `spark.conf.unset` sets it to default (TIMESTAMP_LTZ)
614
748
  timestamp_type = TimestampType(TimestampTimeZone.LTZ)
615
749
  return timestamp_type
750
+
751
+
752
+ def record_table_metadata(
753
+ table_identifier: str,
754
+ table_type: str,
755
+ data_source: str,
756
+ supports_column_rename: bool = True,
757
+ ) -> None:
758
+ """
759
+ Record metadata about a table for Spark compatibility checks.
760
+
761
+ Args:
762
+ table_identifier: Full table identifier (catalog.database.table)
763
+ table_type: "v1" or "v2"
764
+ data_source: Source format (parquet, csv, iceberg, etc.)
765
+ supports_column_rename: Whether the table supports RENAME COLUMN
766
+ """
767
+ session_id = get_spark_session_id()
768
+ session_config = sessions_config[session_id]
769
+
770
+ # Normalize table identifier for consistent lookup
771
+ # Use the full catalog.database.table identifier to avoid conflicts
772
+ normalized_identifier = table_identifier.upper().strip('"')
773
+
774
+ session_config.table_metadata[normalized_identifier] = {
775
+ "table_type": table_type,
776
+ "data_source": data_source,
777
+ "supports_column_rename": supports_column_rename,
778
+ }
779
+
780
+
781
+ def get_table_metadata(table_identifier: str) -> Dict[str, Any] | None:
782
+ """
783
+ Get stored metadata for a table.
784
+
785
+ Args:
786
+ table_identifier: Full table identifier (catalog.database.table)
787
+
788
+ Returns:
789
+ Table metadata dict or None if not found
790
+ """
791
+ session_id = get_spark_session_id()
792
+ session_config = sessions_config[session_id]
793
+
794
+ normalized_identifier = unquote_if_quoted(table_identifier).upper()
795
+
796
+ return session_config.table_metadata.get(normalized_identifier)
797
+
798
+
799
+ def check_table_supports_operation(table_identifier: str, operation: str) -> bool:
800
+ """
801
+ Check if a table supports a given operation based on metadata and config.
802
+
803
+ Args:
804
+ table_identifier: Full table identifier (catalog.database.table)
805
+ operation: Operation to check (e.g., "rename_column")
806
+
807
+ Returns:
808
+ True if operation is supported, False if should be blocked
809
+ """
810
+ table_metadata = get_table_metadata(table_identifier)
811
+
812
+ if not table_metadata:
813
+ return True
814
+
815
+ session_id = get_spark_session_id()
816
+ session_config = sessions_config[session_id]
817
+ enable_extensions = str_to_bool(
818
+ session_config.get(
819
+ "snowpark.connect.enable_snowflake_extension_behavior", "false"
820
+ )
821
+ )
822
+
823
+ if enable_extensions:
824
+ return True
825
+
826
+ if operation == "rename_column":
827
+ return table_metadata.get("supports_column_rename", True)
828
+
829
+ return True
@@ -16,3 +16,5 @@ MAP_IN_ARROW_EVAL_TYPE = 207 # eval_type for mapInArrow operations
16
16
  COLUMN_METADATA_COLLISION_KEY = "{expr_id}_{key}"
17
17
 
18
18
  DUPLICATE_KEY_FOUND_ERROR_TEMPLATE = "Duplicate key found: {key}. You can set spark.sql.mapKeyDedupPolicy to LAST_WIN to deduplicate map keys with last wins policy."
19
+
20
+ SPARK_VERSION = "3.5.3"
@@ -4,14 +4,40 @@
4
4
 
5
5
  from __future__ import annotations
6
6
 
7
+ from dataclasses import dataclass
7
8
  from typing import TYPE_CHECKING, Callable
8
9
 
9
10
  from snowflake import snowpark
10
11
  from snowflake.snowpark.types import StructField, StructType
11
- from snowflake.snowpark_connect.hidden_column import HiddenColumn
12
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
12
13
 
13
14
  if TYPE_CHECKING:
15
+ import pyspark.sql.connect.proto.expressions_pb2 as expressions_proto
16
+
14
17
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
18
+ from snowflake.snowpark_connect.typed_column import TypedColumn
19
+
20
+
21
+ @dataclass
22
+ class AggregateMetadata:
23
+ """
24
+ Metadata about aggregation for resolving expressions in ORDER BY.
25
+
26
+ When a Sort operation follows an Aggregate operation, ORDER BY expressions
27
+ may reference:
28
+ 1. Grouping columns from the GROUP BY clause
29
+ 2. Aggregate result columns (aliases)
30
+ 3. Expressions on pre-aggregation columns (e.g., year(date) where date existed before GROUP BY)
31
+
32
+ This metadata enables hybrid resolution similar to HAVING clause.
33
+ """
34
+
35
+ input_column_map: ColumnNameMap
36
+ input_dataframe: snowpark.DataFrame
37
+ grouping_expressions: list[expressions_proto.Expression]
38
+ aggregate_expressions: list[expressions_proto.Expression]
39
+ spark_columns: list[str]
40
+ raw_aggregations: list[tuple[str, TypedColumn]]
15
41
 
16
42
 
17
43
  class DataFrameContainer:
@@ -30,6 +56,9 @@ class DataFrameContainer:
30
56
  alias: str | None = None,
31
57
  cached_schema_getter: Callable[[], StructType] | None = None,
32
58
  partition_hint: int | None = None,
59
+ can_be_cached: bool = True,
60
+ can_be_materialized: bool = True,
61
+ aggregate_metadata: AggregateMetadata | None = None,
33
62
  ) -> None:
34
63
  """
35
64
  Initialize a new DataFrameContainer.
@@ -41,12 +70,16 @@ class DataFrameContainer:
41
70
  alias: Optional alias for the DataFrame
42
71
  cached_schema_getter: Optional function to get cached schema
43
72
  partition_hint: Optional partition count from repartition() operations
73
+ aggregate_metadata: Optional metadata about aggregation for ORDER BY resolution
44
74
  """
45
75
  self._dataframe = dataframe
46
76
  self._column_map = self._create_default_column_map(column_map)
47
77
  self._table_name = table_name
48
78
  self._alias = alias
49
79
  self._partition_hint = partition_hint
80
+ self._can_be_cached = can_be_cached
81
+ self._can_be_materialized = can_be_materialized
82
+ self._aggregate_metadata = aggregate_metadata
50
83
 
51
84
  if cached_schema_getter is not None:
52
85
  self._apply_cached_schema_getter(cached_schema_getter)
@@ -59,13 +92,16 @@ class DataFrameContainer:
59
92
  snowpark_column_names: list[str],
60
93
  snowpark_column_types: list | None = None,
61
94
  column_metadata: dict | None = None,
62
- column_qualifiers: list[list[str]] | None = None,
95
+ column_qualifiers: list[set[ColumnQualifier]] | None = None,
63
96
  parent_column_name_map: ColumnNameMap | None = None,
64
- hidden_columns: set[HiddenColumn] | None = None,
65
97
  table_name: str | None = None,
66
98
  alias: str | None = None,
67
99
  cached_schema_getter: Callable[[], StructType] | None = None,
68
100
  partition_hint: int | None = None,
101
+ equivalent_snowpark_names: list[set[str]] | None = None,
102
+ column_is_hidden: list[bool] | None = None,
103
+ can_be_cached: bool = True,
104
+ aggregate_metadata: AggregateMetadata | None = None,
69
105
  ) -> DataFrameContainer:
70
106
  """
71
107
  Create a new container with complete column mapping configuration.
@@ -78,11 +114,14 @@ class DataFrameContainer:
78
114
  column_metadata: Optional metadata dictionary
79
115
  column_qualifiers: Optional column qualifiers
80
116
  parent_column_name_map: Optional parent column name map
81
- hidden_columns: Optional list of hidden column names
82
117
  table_name: Optional table name
83
118
  alias: Optional alias
84
119
  cached_schema_getter: Optional function to get cached schema
85
120
  partition_hint: Optional partition count from repartition() operations
121
+ equivalent_snowpark_names: list of sets with old snowpark names that can be resolved with an existing column
122
+ column_is_hidden: Optional list of booleans indicating whether each column is hidden
123
+ can_be_cached: Optional boolean indicating if the dataframe can be cached
124
+ aggregate_metadata: Optional metadata about aggregation for ORDER BY resolution
86
125
 
87
126
  Returns:
88
127
  A new DataFrameContainer instance
@@ -101,7 +140,8 @@ class DataFrameContainer:
101
140
  column_metadata,
102
141
  column_qualifiers,
103
142
  parent_column_name_map,
104
- hidden_columns,
143
+ equivalent_snowpark_names,
144
+ column_is_hidden,
105
145
  )
106
146
 
107
147
  # Determine the schema getter to use
@@ -129,8 +169,25 @@ class DataFrameContainer:
129
169
  alias=alias,
130
170
  cached_schema_getter=final_schema_getter,
131
171
  partition_hint=partition_hint,
172
+ can_be_cached=can_be_cached,
173
+ aggregate_metadata=aggregate_metadata,
132
174
  )
133
175
 
176
+ @property
177
+ def can_be_cached(self) -> bool:
178
+ """Indicate if the DataFrame can be cached in df_cache"""
179
+ return self._can_be_cached
180
+
181
+ @property
182
+ def can_be_materialized(self) -> bool:
183
+ """Indicate if the DataFrame can be materialized in df_cache"""
184
+ return self._can_be_materialized
185
+
186
+ def without_materialization(self):
187
+ """Prevent the DataFrame from being materialized in df_cache"""
188
+ self._can_be_materialized = False
189
+ return self
190
+
134
191
  @property
135
192
  def dataframe(self) -> snowpark.DataFrame:
136
193
  """Get the underlying Snowpark DataFrame."""
@@ -224,9 +281,10 @@ class DataFrameContainer:
224
281
  spark_column_names: list[str],
225
282
  snowpark_column_names: list[str],
226
283
  column_metadata: dict | None = None,
227
- column_qualifiers: list[list[str]] | None = None,
284
+ column_qualifiers: list[set[ColumnQualifier]] | None = None,
228
285
  parent_column_name_map: ColumnNameMap | None = None,
229
- hidden_columns: set[HiddenColumn] | None = None,
286
+ equivalent_snowpark_names: list[set[str]] | None = None,
287
+ column_is_hidden: list[bool] | None = None,
230
288
  ) -> ColumnNameMap:
231
289
  """Create a ColumnNameMap with the provided configuration."""
232
290
  from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
@@ -237,7 +295,8 @@ class DataFrameContainer:
237
295
  column_metadata=column_metadata,
238
296
  column_qualifiers=column_qualifiers,
239
297
  parent_column_name_map=parent_column_name_map,
240
- hidden_columns=hidden_columns,
298
+ equivalent_snowpark_names=equivalent_snowpark_names,
299
+ column_is_hidden=column_is_hidden,
241
300
  )
242
301
 
243
302
  @staticmethod
@@ -262,3 +321,38 @@ class DataFrameContainer:
262
321
  )
263
322
  ]
264
323
  )
324
+
325
+ def without_hidden_columns(self) -> DataFrameContainer:
326
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap
327
+
328
+ if not any(c.is_hidden for c in self._column_map.columns):
329
+ return self
330
+
331
+ hidden_column_names = [
332
+ c.snowpark_name for c in self._column_map.columns if c.is_hidden
333
+ ]
334
+ visible_columns = [c for c in self._column_map.columns if not c.is_hidden]
335
+
336
+ filtered_df = self._dataframe.drop(hidden_column_names)
337
+ filtered_column_map = ColumnNameMap(
338
+ spark_column_names=[c.spark_name for c in visible_columns],
339
+ snowpark_column_names=[c.snowpark_name for c in visible_columns],
340
+ column_metadata=self._column_map.column_metadata,
341
+ column_qualifiers=[c.qualifiers for c in visible_columns],
342
+ parent_column_name_map=self._column_map._parent_column_name_map,
343
+ )
344
+
345
+ return DataFrameContainer(
346
+ dataframe=filtered_df,
347
+ column_map=filtered_column_map,
348
+ table_name=self._table_name,
349
+ alias=self._alias,
350
+ cached_schema_getter=lambda: StructType(
351
+ [
352
+ field
353
+ for field in self._dataframe.schema.fields
354
+ if field.name not in hidden_column_names
355
+ ]
356
+ ),
357
+ partition_hint=self._partition_hint,
358
+ )