snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -6,20 +6,28 @@ import hashlib
6
6
  import inspect
7
7
  import random
8
8
  import re
9
- import threading
10
9
  import time
11
10
  from typing import Any
12
11
 
13
12
  from snowflake import snowpark
14
13
  from snowflake.connector.cursor import ResultMetadataV2
15
14
  from snowflake.snowpark._internal.server_connection import ServerConnection
15
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
16
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
17
+ from snowflake.snowpark_connect.utils.concurrent import (
18
+ SynchronizedDict,
19
+ SynchronizedList,
20
+ )
16
21
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
17
22
  from snowflake.snowpark_connect.utils.telemetry import telemetry
18
23
 
19
- DESCRIBE_CACHE_TTL_SECONDS = 15
20
24
  USE_DESCRIBE_QUERY_CACHE = True
21
25
 
22
- DDL_DETECTION_PATTERN = re.compile(r"^\s*(CREATE|ALTER|DROP|RENAME)\b", re.IGNORECASE)
26
+ DDL_DETECTION_PATTERN = re.compile(r"\s*(CREATE|ALTER|DROP)\b", re.IGNORECASE)
27
+ PLAIN_CREATE_PATTERN = re.compile(
28
+ r"\s*CREATE\s+((LOCAL|GLOBAL)\s+)?(TRANSIENT\s+)?TABLE\b", re.IGNORECASE
29
+ )
30
+
23
31
  # Pattern for simple constant queries like: SELECT 3 :: INT AS "3-80000030-0" FROM ( SELECT $1 AS "__DUMMY" FROM VALUES (NULL :: STRING))
24
32
  # Using exact spacing pattern from generated SQL for deterministic matching
25
33
  # Column ID format: {original_name}-{8_digit_hex_plan_id}-{column_index}
@@ -32,8 +40,7 @@ SIMPLE_CONSTANT_PATTERN = re.compile(
32
40
 
33
41
  class DescribeQueryCache:
34
42
  def __init__(self) -> None:
35
- self._cache = {}
36
- self._lock = threading.Lock()
43
+ self._cache = SynchronizedDict()
37
44
 
38
45
  @staticmethod
39
46
  def _hash_query(sql_query: str) -> str:
@@ -48,49 +55,53 @@ class DescribeQueryCache:
48
55
  return sql_query
49
56
 
50
57
  def get(self, sql_query: str) -> list[ResultMetadataV2] | None:
58
+ from snowflake.snowpark_connect.config import get_describe_cache_ttl_seconds
59
+
60
+ telemetry.report_describe_query_cache_lookup()
61
+
51
62
  cache_key = self._get_cache_key(sql_query)
52
63
  key = self._hash_query(cache_key)
53
64
  current_time = time.monotonic()
54
65
 
55
- # TODO: maybe too much locking, we could use read-write lock also. Or a thread safe dictionary.
56
- with self._lock:
57
- if key in self._cache:
58
- result, timestamp = self._cache[key]
59
- if current_time < timestamp + DESCRIBE_CACHE_TTL_SECONDS:
60
- logger.debug(
61
- f"Returning query result from cache for query: {sql_query[:20]}"
62
- )
63
-
64
- # If this is a constant query, we need to transform the result metadata
65
- # to match the actual query's column name
66
- if (
67
- cache_key != sql_query
68
- ): # Only transform if we normalized the key
69
- match = SIMPLE_CONSTANT_PATTERN.match(sql_query)
70
- if match:
71
- number, column_id = match.groups()
72
- expected_column_name = column_id
73
-
74
- # Transform the cached result to match this query's column name
75
- # There should only be one column in these constant queries
76
- metadata = result[0]
77
- new_metadata = ResultMetadataV2(
78
- name=expected_column_name,
79
- type_code=metadata.type_code,
80
- display_size=metadata.display_size,
81
- internal_size=metadata.internal_size,
82
- precision=metadata.precision,
83
- scale=metadata.scale,
84
- is_nullable=metadata.is_nullable,
85
- )
86
- return [new_metadata]
87
-
88
- return result
89
- else:
90
- logger.debug(
91
- f"Had a cached entry, but it expired for query: {sql_query[:20]}"
92
- )
93
- del self._cache[key]
66
+ if key in self._cache:
67
+ result, timestamp = self._cache[key]
68
+
69
+ expired_by = current_time - (timestamp + get_describe_cache_ttl_seconds())
70
+ if expired_by < 0:
71
+ logger.debug(
72
+ f"Returning query result from cache for query: {sql_query[:20]}"
73
+ )
74
+ self._cache[key] = (result, current_time)
75
+
76
+ # If this is a constant query, we need to transform the result metadata
77
+ # to match the actual query's column name
78
+ if cache_key != sql_query: # Only transform if we normalized the key
79
+ match = SIMPLE_CONSTANT_PATTERN.match(sql_query)
80
+ if match:
81
+ number, column_id = match.groups()
82
+ expected_column_name = column_id
83
+
84
+ # Transform the cached result to match this query's column name
85
+ # There should only be one column in these constant queries
86
+ metadata = result[0]
87
+ new_metadata = ResultMetadataV2(
88
+ name=expected_column_name,
89
+ type_code=metadata.type_code,
90
+ display_size=metadata.display_size,
91
+ internal_size=metadata.internal_size,
92
+ precision=metadata.precision,
93
+ scale=metadata.scale,
94
+ is_nullable=metadata.is_nullable,
95
+ )
96
+
97
+ telemetry.report_describe_query_cache_hit()
98
+ return [new_metadata]
99
+
100
+ telemetry.report_describe_query_cache_hit()
101
+ return result
102
+ else:
103
+ telemetry.report_describe_query_cache_expired(expired_by)
104
+ del self._cache[key]
94
105
  return None
95
106
 
96
107
  def put(self, sql_query: str, result: list[ResultMetadataV2] | None) -> None:
@@ -102,12 +113,18 @@ class DescribeQueryCache:
102
113
 
103
114
  logger.debug(f"Putting query into cache: {sql_query[:50]}...")
104
115
 
105
- with self._lock:
106
- self._cache[key] = (result, time.monotonic())
116
+ self._cache[key] = (result, time.monotonic())
107
117
 
108
118
  def clear(self) -> None:
109
- with self._lock:
110
- self._cache.clear()
119
+ self._cache.clear()
120
+
121
+ def update_cache_for_query(self, query: str) -> None:
122
+ # Clear cache for DDL operations that modify existing objects (exclude CREATE TABLE)
123
+ if DDL_DETECTION_PATTERN.search(query) and not PLAIN_CREATE_PATTERN.search(
124
+ query
125
+ ):
126
+ self.clear()
127
+ telemetry.report_describe_query_cache_clear()
111
128
 
112
129
 
113
130
  def instrument_session_for_describe_cache(session: snowpark.Session):
@@ -118,7 +135,7 @@ def instrument_session_for_describe_cache(session: snowpark.Session):
118
135
  return
119
136
 
120
137
  session._describe_query_cache = DescribeQueryCache()
121
- session._snowpark_api_describe_calls = []
138
+ session._snowpark_api_describe_calls = SynchronizedList()
122
139
 
123
140
  def update_cache_for_query(query: str):
124
141
  cache = None
@@ -126,10 +143,7 @@ def instrument_session_for_describe_cache(session: snowpark.Session):
126
143
  if isinstance(cache_instance, DescribeQueryCache):
127
144
  cache = cache_instance
128
145
 
129
- # TODO: This is very broad right now. We should be able to reduce the scope of clearing.
130
- if DDL_DETECTION_PATTERN.search(query):
131
- logger.debug(f"DDL detected, clearing describe query cache: '{query}'")
132
- cache.clear()
146
+ cache.update_cache_for_query(query)
133
147
 
134
148
  def wrap_execute(wrapped_fn):
135
149
  def fn(query: str, **kwargs):
@@ -139,6 +153,7 @@ def instrument_session_for_describe_cache(session: snowpark.Session):
139
153
  telemetry.report_query(result, **kwargs)
140
154
  except Exception as e:
141
155
  telemetry.report_query(e, **kwargs)
156
+ attach_custom_error_code(e, ErrorCodes.INTERNAL_ERROR)
142
157
  raise e
143
158
  return result
144
159
 
@@ -8,6 +8,8 @@ Environment variable utilities for Snowpark Connect.
8
8
 
9
9
  import os
10
10
 
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
12
14
 
13
15
 
@@ -37,9 +39,11 @@ def get_int_from_env(env_var: str, default: int) -> int:
37
39
  """
38
40
  # Validate that default is actually an integer
39
41
  if not isinstance(default, int):
40
- raise TypeError(
42
+ exception = TypeError(
41
43
  f"Default value must be an integer, got {type(default).__name__}: {default}"
42
44
  )
45
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
46
+ raise exception
43
47
 
44
48
  value = os.getenv(env_var)
45
49
  if value is None:
@@ -0,0 +1,172 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from snowflake.snowpark import Column, functions as snowpark_fn
6
+ from snowflake.snowpark._internal.analyzer.expression import (
7
+ CaseWhen,
8
+ Expression,
9
+ FunctionExpression,
10
+ SnowflakeUDF,
11
+ )
12
+
13
+ _SF_AGGREGATE_FUNCTIONS = [
14
+ "any_value",
15
+ "avg",
16
+ "corr",
17
+ "count",
18
+ "count_if",
19
+ "covar_pop",
20
+ "covar_samp",
21
+ "listagg",
22
+ "max",
23
+ "max_by",
24
+ "median",
25
+ "min",
26
+ "min_by",
27
+ "mode",
28
+ "percentile_cont",
29
+ "percentile_disc",
30
+ "stddev",
31
+ "stddev_samp",
32
+ "stddev_pop",
33
+ "sum",
34
+ "var_pop",
35
+ "var_samp",
36
+ "variance_pop",
37
+ "variance",
38
+ "variance_samp",
39
+ "bitand_agg",
40
+ "bitor_agg",
41
+ "bitxor_agg",
42
+ "booland_agg",
43
+ "boolor_agg",
44
+ "boolxor_agg",
45
+ "hash_agg",
46
+ "array_agg",
47
+ "object_agg",
48
+ "regr_avgx",
49
+ "regr_avgy",
50
+ "regr_count",
51
+ "regr_intercept",
52
+ "regr_r2",
53
+ "regr_slope",
54
+ "regr_sxx",
55
+ "regr_sxy",
56
+ "regr_syy",
57
+ "kurtosis",
58
+ "skew",
59
+ "array_union_agg",
60
+ "array_unique_agg",
61
+ "bitmap_bit_position",
62
+ "bitmap_bucket_number",
63
+ "bitmap_count",
64
+ "bitmap_construct_agg",
65
+ "bitmap_or_agg",
66
+ "approx_count_distinct",
67
+ "datasketches_hll",
68
+ "datasketches_hll_accumulate",
69
+ "datasketches_hll_combine",
70
+ "datasketches_hll_estimate",
71
+ "hll",
72
+ "hll_accumulate",
73
+ "hll_combine",
74
+ "hll_estimate",
75
+ "hll_export",
76
+ "hll_import",
77
+ "approximate_jaccard_index",
78
+ "approximate_similarity",
79
+ "minhash",
80
+ "minhash_combine",
81
+ "approx_top_k",
82
+ "approx_top_k_accumulate",
83
+ "approx_top_k_combine",
84
+ "approx_top_k_estimate",
85
+ "approx_percentile",
86
+ "approx_percentile_accumulate",
87
+ "approx_percentile_combine",
88
+ "approx_percentile_estimate",
89
+ "grouping",
90
+ "grouping_id",
91
+ "ai_agg",
92
+ "ai_summarize_agg",
93
+ ]
94
+
95
+
96
+ def _is_agg_function_expression(expression: Expression) -> bool:
97
+ if (
98
+ isinstance(expression, FunctionExpression)
99
+ and expression.pretty_name.lower() in _SF_AGGREGATE_FUNCTIONS
100
+ ):
101
+ return True
102
+
103
+ # For PySpark aggregate functions that were mapped using a UDAF, e.g. try_sum
104
+ if isinstance(expression, SnowflakeUDF) and expression.is_aggregate_function:
105
+ return True
106
+
107
+ return False
108
+
109
+
110
+ def _get_child_expressions(expression: Expression) -> list[Expression]:
111
+ if isinstance(expression, CaseWhen):
112
+ return expression._child_expressions
113
+
114
+ return expression.children or []
115
+
116
+
117
+ def inject_condition_to_all_agg_functions(
118
+ expression: Expression, condition: Column
119
+ ) -> None:
120
+ """
121
+ Recursively traverses an expression tree and wraps all aggregate function arguments with a CASE WHEN condition.
122
+
123
+ Args:
124
+ expression: The Snowpark expression tree to traverse and modify.
125
+ condition: The Column condition to inject into aggregate function arguments.
126
+ """
127
+
128
+ any_agg_function_found = _inject_condition_to_all_agg_functions(
129
+ expression, condition
130
+ )
131
+
132
+ if not any_agg_function_found:
133
+ raise ValueError(f"No aggregate functions found in: {expression.sql}")
134
+
135
+
136
+ def _inject_condition_to_all_agg_functions(
137
+ expression: Expression, condition: Column
138
+ ) -> bool:
139
+ any_agg_function_found = False
140
+
141
+ if _is_agg_function_expression(expression):
142
+ new_children = []
143
+ for child in _get_child_expressions(expression):
144
+ case_when = snowpark_fn.when(condition, Column(child))
145
+
146
+ new_children.append(case_when._expr1)
147
+
148
+ # Swap children
149
+ expression.children = new_children
150
+ if len(new_children) > 0:
151
+ expression.child = new_children[0]
152
+
153
+ return True
154
+
155
+ for child in _get_child_expressions(expression):
156
+ is_agg_function_in_child = _inject_condition_to_all_agg_functions(
157
+ child, condition
158
+ )
159
+
160
+ if is_agg_function_in_child:
161
+ any_agg_function_found = True
162
+
163
+ return any_agg_function_found
164
+
165
+
166
+ def is_child_agg_function_expression(exp: Expression) -> bool:
167
+ if _is_agg_function_expression(exp):
168
+ return True
169
+
170
+ return any(
171
+ is_child_agg_function_expression(child) for child in _get_child_expressions(exp)
172
+ )
@@ -2,6 +2,7 @@
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
4
  import re
5
+ from typing import Any, TypeVar
5
6
 
6
7
  from pyspark.errors import AnalysisException
7
8
 
@@ -12,6 +13,8 @@ from snowflake.snowpark_connect.config import (
12
13
  auto_uppercase_column_identifiers,
13
14
  auto_uppercase_non_column_identifiers,
14
15
  )
16
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
17
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
15
18
 
16
19
  QUOTED_SPARK_IDENTIFIER = re.compile(r"^`[^`]*(?:``[^`]*)*`$")
17
20
  UNQUOTED_SPARK_IDENTIFIER = re.compile(r"^\w+$")
@@ -24,15 +27,23 @@ def unquote_spark_identifier_if_quoted(spark_name: str) -> str:
24
27
  if QUOTED_SPARK_IDENTIFIER.match(spark_name):
25
28
  return spark_name[1:-1].replace("``", "`")
26
29
 
27
- raise AnalysisException(f"Invalid name: {spark_name}")
30
+ exception = AnalysisException(f"Invalid name: {spark_name}")
31
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
32
+ raise exception
28
33
 
29
34
 
30
- def spark_to_sf_single_id_with_unquoting(name: str) -> str:
35
+ def spark_to_sf_single_id_with_unquoting(
36
+ name: str, use_auto_upper_case: bool = False
37
+ ) -> str:
31
38
  """
32
39
  Transforms a spark name to a valid snowflake name by quoting and potentially uppercasing it.
33
40
  Unquotes the spark name if necessary. Will raise an AnalysisException if given name is not valid.
34
41
  """
35
- return spark_to_sf_single_id(unquote_spark_identifier_if_quoted(name))
42
+ return (
43
+ spark_to_sf_single_id(unquote_spark_identifier_if_quoted(name))
44
+ if use_auto_upper_case
45
+ else quote_name_without_upper_casing(unquote_spark_identifier_if_quoted(name))
46
+ )
36
47
 
37
48
 
38
49
  def spark_to_sf_single_id(name: str, is_column: bool = False) -> str:
@@ -117,3 +128,126 @@ def split_fully_qualified_spark_name(qualified_name: str | None) -> list[str]:
117
128
  parts.append("".join(token_chars))
118
129
 
119
130
  return parts
131
+
132
+
133
+ # See https://docs.snowflake.com/en/sql-reference/identifiers-syntax for identifier syntax
134
+ UNQUOTED_IDENTIFIER_REGEX = r"([a-zA-Z_])([a-zA-Z0-9_$]{0,254})"
135
+ QUOTED_IDENTIFIER_REGEX = r'"((""|[^"]){0,255})"'
136
+ VALID_IDENTIFIER_REGEX = f"(?:{UNQUOTED_IDENTIFIER_REGEX}|{QUOTED_IDENTIFIER_REGEX})"
137
+
138
+
139
+ Self = TypeVar("Self", bound="FQN")
140
+
141
+
142
+ class FQN:
143
+ """Represents an object identifier, supporting fully qualified names.
144
+
145
+ The instance supports builder pattern that allows updating the identifier with database and
146
+ schema from different sources.
147
+
148
+ Examples
149
+ ________
150
+ >>> fqn = FQN.from_string("my_schema.object").using_connection(conn)
151
+
152
+ >>> fqn = FQN.from_string("my_name").set_database("db").set_schema("foo")
153
+ """
154
+
155
+ def __init__(
156
+ self,
157
+ database: str | None,
158
+ schema: str | None,
159
+ name: str,
160
+ signature: str | None = None,
161
+ ) -> None:
162
+ self._database = database
163
+ self._schema = schema
164
+ self._name = name
165
+ self.signature = signature
166
+
167
+ @property
168
+ def database(self) -> str | None:
169
+ return self._database
170
+
171
+ @property
172
+ def schema(self) -> str | None:
173
+ return self._schema
174
+
175
+ @property
176
+ def name(self) -> str:
177
+ return self._name
178
+
179
+ @property
180
+ def prefix(self) -> str:
181
+ if self.database:
182
+ return f"{self.database}.{self.schema if self.schema else 'PUBLIC'}"
183
+ if self.schema:
184
+ return f"{self.schema}"
185
+ return ""
186
+
187
+ @property
188
+ def identifier(self) -> str:
189
+ if self.prefix:
190
+ return f"{self.prefix}.{self.name}"
191
+ return self.name
192
+
193
+ def __str__(self) -> str:
194
+ return self.identifier
195
+
196
+ def __eq__(self, other: Any) -> bool:
197
+ if not isinstance(other, FQN):
198
+ exception = AnalysisException(f"{other} is not a valid FQN")
199
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
200
+ raise exception
201
+ return self.identifier == other.identifier
202
+
203
+ @classmethod
204
+ def from_string(cls, identifier: str) -> Self:
205
+ """Take in an object name in the form [[database.]schema.]name and return a new :class:`FQN` instance.
206
+
207
+ Raises:
208
+ InvalidIdentifierError: If the object identifier does not meet identifier requirements.
209
+ """
210
+ qualifier_pattern = (
211
+ rf"(?:(?P<first_qualifier>{VALID_IDENTIFIER_REGEX})\.)?"
212
+ rf"(?:(?P<second_qualifier>{VALID_IDENTIFIER_REGEX})\.)?"
213
+ rf"(?P<name>{VALID_IDENTIFIER_REGEX})(?P<signature>\(.*\))?"
214
+ )
215
+ result = re.fullmatch(qualifier_pattern, identifier)
216
+
217
+ if result is None:
218
+ exception = AnalysisException(f"{identifier} is not a valid identifier")
219
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
220
+ raise exception
221
+
222
+ unqualified_name = result.group("name")
223
+ if result.group("second_qualifier") is not None:
224
+ database = result.group("first_qualifier")
225
+ schema = result.group("second_qualifier")
226
+ else:
227
+ database = None
228
+ schema = result.group("first_qualifier")
229
+
230
+ signature = None
231
+ if result.group("signature"):
232
+ signature = result.group("signature")
233
+ return cls(
234
+ name=unqualified_name, schema=schema, database=database, signature=signature
235
+ )
236
+
237
+ def set_database(self, database: str | None) -> Self:
238
+ if database:
239
+ self._database = database
240
+ return self
241
+
242
+ def set_schema(self, schema: str | None) -> Self:
243
+ if schema:
244
+ self._schema = schema
245
+ return self
246
+
247
+ def set_name(self, name: str) -> Self:
248
+ self._name = name
249
+ return self
250
+
251
+ def to_dict(self) -> dict[str, str | None]:
252
+ """Return the dictionary representation of the instance."""
253
+ return {"name": self.name, "schema": self.schema, "database": self.database}
@@ -1,10 +1,47 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
-
4
+ import contextlib
5
5
  import functools
6
+ import re
6
7
 
7
8
  from snowflake.snowpark import Session
9
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
10
+ create_file_format_statement,
11
+ )
12
+ from snowflake.snowpark_connect.utils.identifiers import FQN
13
+
14
+ _MINUS_AT_THE_BEGINNING_REGEX = re.compile(r"^-")
15
+
16
+
17
+ def cached_file_format(
18
+ session: Session, file_format: str, format_type_options: dict[str, str]
19
+ ) -> str:
20
+ """
21
+ Cache and return a file format name based on the given options.
22
+ """
23
+
24
+ function_name = _MINUS_AT_THE_BEGINNING_REGEX.sub(
25
+ "1", str(hash(frozenset(format_type_options.items())))
26
+ )
27
+ file_format_name = f"__SNOWPARK_CONNECT_FILE_FORMAT__{file_format}_{function_name}"
28
+ if file_format_name in session._file_formats:
29
+ return file_format_name
30
+
31
+ session.sql(
32
+ create_file_format_statement(
33
+ file_format_name,
34
+ file_format,
35
+ format_type_options,
36
+ temp=True,
37
+ if_not_exist=True,
38
+ use_scoped_temp_objects=False,
39
+ is_generated=True,
40
+ )
41
+ ).collect()
42
+
43
+ session._file_formats.add(file_format_name)
44
+ return file_format_name
8
45
 
9
46
 
10
47
  @functools.cache
@@ -33,3 +70,22 @@ def file_format(
33
70
  ).collect()
34
71
 
35
72
  return file_format_name
73
+
74
+
75
+ def get_table_type(
76
+ snowpark_table_name: str,
77
+ snowpark_session: Session,
78
+ ) -> str:
79
+ fqn = FQN.from_string(snowpark_table_name)
80
+ with contextlib.suppress(Exception):
81
+ if fqn.database is not None:
82
+ return snowpark_session.catalog.getTable(
83
+ table_name=fqn.name, schema=fqn.schema, database=fqn.database
84
+ ).table_type
85
+ elif fqn.schema is not None:
86
+ return snowpark_session.catalog.getTable(
87
+ table_name=fqn.name, schema=fqn.schema
88
+ ).table_type
89
+ else:
90
+ return snowpark_session.catalog.getTable(table_name=fqn.name).table_type
91
+ return "TABLE"