snowpark-connect 0.27.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (192) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +680 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +237 -23
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  23. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  24. snowflake/snowpark_connect/expression/literal.py +37 -13
  25. snowflake/snowpark_connect/expression/map_cast.py +123 -5
  26. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  27. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  28. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  29. snowflake/snowpark_connect/expression/map_udf.py +85 -20
  30. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  31. snowflake/snowpark_connect/expression/map_unresolved_function.py +2748 -746
  32. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  33. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  34. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  35. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  36. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  37. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  38. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  39. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  40. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  41. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  42. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  43. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  44. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  45. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  46. snowflake/snowpark_connect/relation/map_aggregate.py +196 -255
  47. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  48. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  49. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  50. snowflake/snowpark_connect/relation/map_join.py +683 -442
  51. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  52. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  53. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  54. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  55. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  56. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  57. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  58. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  59. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  60. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  61. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  62. snowflake/snowpark_connect/relation/read/map_read_csv.py +255 -45
  63. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  64. snowflake/snowpark_connect/relation/read/map_read_json.py +320 -85
  65. snowflake/snowpark_connect/relation/read/map_read_parquet.py +142 -27
  66. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  67. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  68. snowflake/snowpark_connect/relation/read/map_read_table.py +82 -5
  69. snowflake/snowpark_connect/relation/read/map_read_text.py +18 -3
  70. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  71. snowflake/snowpark_connect/relation/read/reader_config.py +36 -3
  72. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  73. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  74. snowflake/snowpark_connect/relation/utils.py +128 -5
  75. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  76. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  77. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  78. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  79. snowflake/snowpark_connect/resources_initializer.py +110 -48
  80. snowflake/snowpark_connect/server.py +546 -456
  81. snowflake/snowpark_connect/server_common/__init__.py +500 -0
  82. snowflake/snowpark_connect/snowflake_session.py +65 -0
  83. snowflake/snowpark_connect/start_server.py +53 -5
  84. snowflake/snowpark_connect/type_mapping.py +349 -27
  85. snowflake/snowpark_connect/typed_column.py +9 -7
  86. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  87. snowflake/snowpark_connect/utils/cache.py +49 -27
  88. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  89. snowflake/snowpark_connect/utils/context.py +187 -37
  90. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  91. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  92. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  93. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  94. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  95. snowflake/snowpark_connect/utils/java_stored_procedure.py +125 -0
  96. snowflake/snowpark_connect/utils/java_udaf_utils.py +303 -0
  97. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  98. snowflake/snowpark_connect/utils/jvm_udf_utils.py +248 -0
  99. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  100. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  101. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  102. snowflake/snowpark_connect/utils/profiling.py +25 -8
  103. snowflake/snowpark_connect/utils/scala_udf_utils.py +101 -332
  104. snowflake/snowpark_connect/utils/sequence.py +21 -0
  105. snowflake/snowpark_connect/utils/session.py +64 -28
  106. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  107. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  108. snowflake/snowpark_connect/utils/telemetry.py +163 -22
  109. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  110. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  111. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  112. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  113. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  114. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  115. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  116. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  117. snowflake/snowpark_connect/version.py +1 -1
  118. snowflake/snowpark_decoder/dp_session.py +6 -2
  119. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  120. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-submit +2 -2
  121. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/METADATA +14 -7
  122. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/RECORD +129 -167
  123. snowflake/snowpark_connect/hidden_column.py +0 -39
  124. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  125. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  126. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  127. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  128. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  129. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  130. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  131. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  132. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  133. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  134. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  186. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-connect +0 -0
  187. {snowpark_connect-0.27.0.data → snowpark_connect-1.6.0.data}/scripts/snowpark-session +0 -0
  188. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/WHEEL +0 -0
  189. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE-binary +0 -0
  190. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/LICENSE.txt +0 -0
  191. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/licenses/NOTICE-binary +0 -0
  192. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.6.0.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,8 @@ Environment variable utilities for Snowpark Connect.
8
8
 
9
9
  import os
10
10
 
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.utils.snowpark_connect_logging import logger
12
14
 
13
15
 
@@ -37,9 +39,11 @@ def get_int_from_env(env_var: str, default: int) -> int:
37
39
  """
38
40
  # Validate that default is actually an integer
39
41
  if not isinstance(default, int):
40
- raise TypeError(
42
+ exception = TypeError(
41
43
  f"Default value must be an integer, got {type(default).__name__}: {default}"
42
44
  )
45
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
46
+ raise exception
43
47
 
44
48
  value = os.getenv(env_var)
45
49
  if value is None:
@@ -0,0 +1,172 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from snowflake.snowpark import Column, functions as snowpark_fn
6
+ from snowflake.snowpark._internal.analyzer.expression import (
7
+ CaseWhen,
8
+ Expression,
9
+ FunctionExpression,
10
+ SnowflakeUDF,
11
+ )
12
+
13
+ _SF_AGGREGATE_FUNCTIONS = [
14
+ "any_value",
15
+ "avg",
16
+ "corr",
17
+ "count",
18
+ "count_if",
19
+ "covar_pop",
20
+ "covar_samp",
21
+ "listagg",
22
+ "max",
23
+ "max_by",
24
+ "median",
25
+ "min",
26
+ "min_by",
27
+ "mode",
28
+ "percentile_cont",
29
+ "percentile_disc",
30
+ "stddev",
31
+ "stddev_samp",
32
+ "stddev_pop",
33
+ "sum",
34
+ "var_pop",
35
+ "var_samp",
36
+ "variance_pop",
37
+ "variance",
38
+ "variance_samp",
39
+ "bitand_agg",
40
+ "bitor_agg",
41
+ "bitxor_agg",
42
+ "booland_agg",
43
+ "boolor_agg",
44
+ "boolxor_agg",
45
+ "hash_agg",
46
+ "array_agg",
47
+ "object_agg",
48
+ "regr_avgx",
49
+ "regr_avgy",
50
+ "regr_count",
51
+ "regr_intercept",
52
+ "regr_r2",
53
+ "regr_slope",
54
+ "regr_sxx",
55
+ "regr_sxy",
56
+ "regr_syy",
57
+ "kurtosis",
58
+ "skew",
59
+ "array_union_agg",
60
+ "array_unique_agg",
61
+ "bitmap_bit_position",
62
+ "bitmap_bucket_number",
63
+ "bitmap_count",
64
+ "bitmap_construct_agg",
65
+ "bitmap_or_agg",
66
+ "approx_count_distinct",
67
+ "datasketches_hll",
68
+ "datasketches_hll_accumulate",
69
+ "datasketches_hll_combine",
70
+ "datasketches_hll_estimate",
71
+ "hll",
72
+ "hll_accumulate",
73
+ "hll_combine",
74
+ "hll_estimate",
75
+ "hll_export",
76
+ "hll_import",
77
+ "approximate_jaccard_index",
78
+ "approximate_similarity",
79
+ "minhash",
80
+ "minhash_combine",
81
+ "approx_top_k",
82
+ "approx_top_k_accumulate",
83
+ "approx_top_k_combine",
84
+ "approx_top_k_estimate",
85
+ "approx_percentile",
86
+ "approx_percentile_accumulate",
87
+ "approx_percentile_combine",
88
+ "approx_percentile_estimate",
89
+ "grouping",
90
+ "grouping_id",
91
+ "ai_agg",
92
+ "ai_summarize_agg",
93
+ ]
94
+
95
+
96
+ def _is_agg_function_expression(expression: Expression) -> bool:
97
+ if (
98
+ isinstance(expression, FunctionExpression)
99
+ and expression.pretty_name.lower() in _SF_AGGREGATE_FUNCTIONS
100
+ ):
101
+ return True
102
+
103
+ # For PySpark aggregate functions that were mapped using a UDAF, e.g. try_sum
104
+ if isinstance(expression, SnowflakeUDF) and expression.is_aggregate_function:
105
+ return True
106
+
107
+ return False
108
+
109
+
110
+ def _get_child_expressions(expression: Expression) -> list[Expression]:
111
+ if isinstance(expression, CaseWhen):
112
+ return expression._child_expressions
113
+
114
+ return expression.children or []
115
+
116
+
117
+ def inject_condition_to_all_agg_functions(
118
+ expression: Expression, condition: Column
119
+ ) -> None:
120
+ """
121
+ Recursively traverses an expression tree and wraps all aggregate function arguments with a CASE WHEN condition.
122
+
123
+ Args:
124
+ expression: The Snowpark expression tree to traverse and modify.
125
+ condition: The Column condition to inject into aggregate function arguments.
126
+ """
127
+
128
+ any_agg_function_found = _inject_condition_to_all_agg_functions(
129
+ expression, condition
130
+ )
131
+
132
+ if not any_agg_function_found:
133
+ raise ValueError(f"No aggregate functions found in: {expression.sql}")
134
+
135
+
136
+ def _inject_condition_to_all_agg_functions(
137
+ expression: Expression, condition: Column
138
+ ) -> bool:
139
+ any_agg_function_found = False
140
+
141
+ if _is_agg_function_expression(expression):
142
+ new_children = []
143
+ for child in _get_child_expressions(expression):
144
+ case_when = snowpark_fn.when(condition, Column(child))
145
+
146
+ new_children.append(case_when._expr1)
147
+
148
+ # Swap children
149
+ expression.children = new_children
150
+ if len(new_children) > 0:
151
+ expression.child = new_children[0]
152
+
153
+ return True
154
+
155
+ for child in _get_child_expressions(expression):
156
+ is_agg_function_in_child = _inject_condition_to_all_agg_functions(
157
+ child, condition
158
+ )
159
+
160
+ if is_agg_function_in_child:
161
+ any_agg_function_found = True
162
+
163
+ return any_agg_function_found
164
+
165
+
166
+ def is_child_agg_function_expression(exp: Expression) -> bool:
167
+ if _is_agg_function_expression(exp):
168
+ return True
169
+
170
+ return any(
171
+ is_child_agg_function_expression(child) for child in _get_child_expressions(exp)
172
+ )
@@ -2,6 +2,7 @@
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
4
  import re
5
+ from typing import Any, TypeVar
5
6
 
6
7
  from pyspark.errors import AnalysisException
7
8
 
@@ -12,6 +13,8 @@ from snowflake.snowpark_connect.config import (
12
13
  auto_uppercase_column_identifiers,
13
14
  auto_uppercase_non_column_identifiers,
14
15
  )
16
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
17
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
15
18
 
16
19
  QUOTED_SPARK_IDENTIFIER = re.compile(r"^`[^`]*(?:``[^`]*)*`$")
17
20
  UNQUOTED_SPARK_IDENTIFIER = re.compile(r"^\w+$")
@@ -24,15 +27,23 @@ def unquote_spark_identifier_if_quoted(spark_name: str) -> str:
24
27
  if QUOTED_SPARK_IDENTIFIER.match(spark_name):
25
28
  return spark_name[1:-1].replace("``", "`")
26
29
 
27
- raise AnalysisException(f"Invalid name: {spark_name}")
30
+ exception = AnalysisException(f"Invalid name: {spark_name}")
31
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
32
+ raise exception
28
33
 
29
34
 
30
- def spark_to_sf_single_id_with_unquoting(name: str) -> str:
35
+ def spark_to_sf_single_id_with_unquoting(
36
+ name: str, use_auto_upper_case: bool = False
37
+ ) -> str:
31
38
  """
32
39
  Transforms a spark name to a valid snowflake name by quoting and potentially uppercasing it.
33
40
  Unquotes the spark name if necessary. Will raise an AnalysisException if given name is not valid.
34
41
  """
35
- return spark_to_sf_single_id(unquote_spark_identifier_if_quoted(name))
42
+ return (
43
+ spark_to_sf_single_id(unquote_spark_identifier_if_quoted(name))
44
+ if use_auto_upper_case
45
+ else quote_name_without_upper_casing(unquote_spark_identifier_if_quoted(name))
46
+ )
36
47
 
37
48
 
38
49
  def spark_to_sf_single_id(name: str, is_column: bool = False) -> str:
@@ -117,3 +128,126 @@ def split_fully_qualified_spark_name(qualified_name: str | None) -> list[str]:
117
128
  parts.append("".join(token_chars))
118
129
 
119
130
  return parts
131
+
132
+
133
+ # See https://docs.snowflake.com/en/sql-reference/identifiers-syntax for identifier syntax
134
+ UNQUOTED_IDENTIFIER_REGEX = r"([a-zA-Z_])([a-zA-Z0-9_$]{0,254})"
135
+ QUOTED_IDENTIFIER_REGEX = r'"((""|[^"]){0,255})"'
136
+ VALID_IDENTIFIER_REGEX = f"(?:{UNQUOTED_IDENTIFIER_REGEX}|{QUOTED_IDENTIFIER_REGEX})"
137
+
138
+
139
+ Self = TypeVar("Self", bound="FQN")
140
+
141
+
142
+ class FQN:
143
+ """Represents an object identifier, supporting fully qualified names.
144
+
145
+ The instance supports builder pattern that allows updating the identifier with database and
146
+ schema from different sources.
147
+
148
+ Examples
149
+ ________
150
+ >>> fqn = FQN.from_string("my_schema.object").using_connection(conn)
151
+
152
+ >>> fqn = FQN.from_string("my_name").set_database("db").set_schema("foo")
153
+ """
154
+
155
+ def __init__(
156
+ self,
157
+ database: str | None,
158
+ schema: str | None,
159
+ name: str,
160
+ signature: str | None = None,
161
+ ) -> None:
162
+ self._database = database
163
+ self._schema = schema
164
+ self._name = name
165
+ self.signature = signature
166
+
167
+ @property
168
+ def database(self) -> str | None:
169
+ return self._database
170
+
171
+ @property
172
+ def schema(self) -> str | None:
173
+ return self._schema
174
+
175
+ @property
176
+ def name(self) -> str:
177
+ return self._name
178
+
179
+ @property
180
+ def prefix(self) -> str:
181
+ if self.database:
182
+ return f"{self.database}.{self.schema if self.schema else 'PUBLIC'}"
183
+ if self.schema:
184
+ return f"{self.schema}"
185
+ return ""
186
+
187
+ @property
188
+ def identifier(self) -> str:
189
+ if self.prefix:
190
+ return f"{self.prefix}.{self.name}"
191
+ return self.name
192
+
193
+ def __str__(self) -> str:
194
+ return self.identifier
195
+
196
+ def __eq__(self, other: Any) -> bool:
197
+ if not isinstance(other, FQN):
198
+ exception = AnalysisException(f"{other} is not a valid FQN")
199
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
200
+ raise exception
201
+ return self.identifier == other.identifier
202
+
203
+ @classmethod
204
+ def from_string(cls, identifier: str) -> Self:
205
+ """Take in an object name in the form [[database.]schema.]name and return a new :class:`FQN` instance.
206
+
207
+ Raises:
208
+ InvalidIdentifierError: If the object identifier does not meet identifier requirements.
209
+ """
210
+ qualifier_pattern = (
211
+ rf"(?:(?P<first_qualifier>{VALID_IDENTIFIER_REGEX})\.)?"
212
+ rf"(?:(?P<second_qualifier>{VALID_IDENTIFIER_REGEX})\.)?"
213
+ rf"(?P<name>{VALID_IDENTIFIER_REGEX})(?P<signature>\(.*\))?"
214
+ )
215
+ result = re.fullmatch(qualifier_pattern, identifier)
216
+
217
+ if result is None:
218
+ exception = AnalysisException(f"{identifier} is not a valid identifier")
219
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
220
+ raise exception
221
+
222
+ unqualified_name = result.group("name")
223
+ if result.group("second_qualifier") is not None:
224
+ database = result.group("first_qualifier")
225
+ schema = result.group("second_qualifier")
226
+ else:
227
+ database = None
228
+ schema = result.group("first_qualifier")
229
+
230
+ signature = None
231
+ if result.group("signature"):
232
+ signature = result.group("signature")
233
+ return cls(
234
+ name=unqualified_name, schema=schema, database=database, signature=signature
235
+ )
236
+
237
+ def set_database(self, database: str | None) -> Self:
238
+ if database:
239
+ self._database = database
240
+ return self
241
+
242
+ def set_schema(self, schema: str | None) -> Self:
243
+ if schema:
244
+ self._schema = schema
245
+ return self
246
+
247
+ def set_name(self, name: str) -> Self:
248
+ self._name = name
249
+ return self
250
+
251
+ def to_dict(self) -> dict[str, str | None]:
252
+ """Return the dictionary representation of the instance."""
253
+ return {"name": self.name, "schema": self.schema, "database": self.database}
@@ -1,10 +1,47 @@
1
1
  #
2
2
  # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
3
  #
4
-
4
+ import contextlib
5
5
  import functools
6
+ import re
6
7
 
7
8
  from snowflake.snowpark import Session
9
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
10
+ create_file_format_statement,
11
+ )
12
+ from snowflake.snowpark_connect.utils.identifiers import FQN
13
+
14
+ _MINUS_AT_THE_BEGINNING_REGEX = re.compile(r"^-")
15
+
16
+
17
+ def cached_file_format(
18
+ session: Session, file_format: str, format_type_options: dict[str, str]
19
+ ) -> str:
20
+ """
21
+ Cache and return a file format name based on the given options.
22
+ """
23
+
24
+ function_name = _MINUS_AT_THE_BEGINNING_REGEX.sub(
25
+ "1", str(hash(frozenset(format_type_options.items())))
26
+ )
27
+ file_format_name = f"__SNOWPARK_CONNECT_FILE_FORMAT__{file_format}_{function_name}"
28
+ if file_format_name in session._file_formats:
29
+ return file_format_name
30
+
31
+ session.sql(
32
+ create_file_format_statement(
33
+ file_format_name,
34
+ file_format,
35
+ format_type_options,
36
+ temp=True,
37
+ if_not_exist=True,
38
+ use_scoped_temp_objects=False,
39
+ is_generated=True,
40
+ )
41
+ ).collect()
42
+
43
+ session._file_formats.add(file_format_name)
44
+ return file_format_name
8
45
 
9
46
 
10
47
  @functools.cache
@@ -33,3 +70,22 @@ def file_format(
33
70
  ).collect()
34
71
 
35
72
  return file_format_name
73
+
74
+
75
+ def get_table_type(
76
+ snowpark_table_name: str,
77
+ snowpark_session: Session,
78
+ ) -> str:
79
+ fqn = FQN.from_string(snowpark_table_name)
80
+ with contextlib.suppress(Exception):
81
+ if fqn.database is not None:
82
+ return snowpark_session.catalog.getTable(
83
+ table_name=fqn.name, schema=fqn.schema, database=fqn.database
84
+ ).table_type
85
+ elif fqn.schema is not None:
86
+ return snowpark_session.catalog.getTable(
87
+ table_name=fqn.name, schema=fqn.schema
88
+ ).table_type
89
+ else:
90
+ return snowpark_session.catalog.getTable(table_name=fqn.name).table_type
91
+ return "TABLE"
@@ -0,0 +1,125 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ from pyspark.errors import AnalysisException
6
+
7
+ import snowflake.snowpark.types as snowpark_type
8
+ from snowflake.snowpark import Session
9
+ from snowflake.snowpark._internal.type_utils import type_string_to_type_object
10
+ from snowflake.snowpark_connect.resources_initializer import (
11
+ RESOURCE_PATH,
12
+ SPARK_COMMON_UTILS_JAR,
13
+ SPARK_CONNECT_CLIENT_JAR,
14
+ SPARK_SQL_JAR,
15
+ )
16
+ from snowflake.snowpark_connect.utils.upload_java_jar import upload_java_udf_jar
17
+
18
+ CREATE_JAVA_UDF_PREFIX = "__SC_JAVA_UDF_"
19
+ PROCEDURE_NAME = "__SC_JAVA_SP_CREATE_JAVA_UDF"
20
+ SP_TEMPLATE = """
21
+ CREATE OR REPLACE TEMPORARY PROCEDURE __SC_JAVA_SP_CREATE_JAVA_UDF(udf_name VARCHAR, udf_class VARCHAR, imports ARRAY(VARCHAR))
22
+ RETURNS VARCHAR
23
+ LANGUAGE JAVA
24
+ RUNTIME_VERSION = 17
25
+ PACKAGES = ('com.snowflake:snowpark:latest')
26
+ __snowflake_udf_imports__
27
+ HANDLER = 'com.snowflake.snowpark_connect.procedures.JavaUDFCreator.process'
28
+ EXECUTE AS CALLER
29
+ ;
30
+ """
31
+
32
+
33
+ _is_initialized = False
34
+
35
+
36
+ def is_initialized() -> bool:
37
+ global _is_initialized
38
+ return _is_initialized
39
+
40
+
41
+ def set_java_udf_creator_initialized_state(value: bool) -> None:
42
+ global _is_initialized
43
+ _is_initialized = value
44
+
45
+
46
+ class JavaUdf:
47
+ """
48
+ Reference class for Java UDFs, providing similar properties like Python UserDefinedFunction.
49
+
50
+ This class serves as a lightweight reference to a Java UDF that has been created
51
+ in Snowflake, storing the essential metadata needed for function calls.
52
+ """
53
+
54
+ def __init__(
55
+ self,
56
+ name: str,
57
+ input_types: list[snowpark_type.DataType],
58
+ return_type: snowpark_type.DataType,
59
+ ) -> None:
60
+ """
61
+ Initialize a Java UDF reference.
62
+
63
+ Args:
64
+ name: The name of the UDF in Snowflake
65
+ input_types: List of input parameter types
66
+ return_type: The return type of the UDF
67
+ """
68
+ self.name = name
69
+ self._input_types = input_types
70
+ self._return_type = return_type
71
+
72
+
73
+ def get_quoted_imports(session: Session) -> str:
74
+ stage_resource_path = session.get_session_stage() + RESOURCE_PATH
75
+ spark_imports = {
76
+ f"{stage_resource_path}/{SPARK_CONNECT_CLIENT_JAR}",
77
+ f"{stage_resource_path}/{SPARK_COMMON_UTILS_JAR}",
78
+ f"{stage_resource_path}/{SPARK_SQL_JAR}",
79
+ f"{stage_resource_path}/java_udfs-1.0-SNAPSHOT.jar",
80
+ }
81
+
82
+ def quote_single(s: str) -> str:
83
+ """Helper function to wrap strings in single quotes for SQL."""
84
+ return "'" + s + "'"
85
+
86
+ return ", ".join(quote_single(x) for x in session._artifact_jars | spark_imports)
87
+
88
+
89
+ def create_snowflake_imports(session: Session) -> str:
90
+ from snowflake.snowpark_connect.resources_initializer import (
91
+ ensure_scala_udf_jars_uploaded,
92
+ )
93
+
94
+ # Make sure that the resource initializer thread is completed before creating Java UDFs since we depend on the jars
95
+ # uploaded by it.
96
+ ensure_scala_udf_jars_uploaded()
97
+
98
+ return f"IMPORTS = ({get_quoted_imports(session)})"
99
+
100
+
101
+ def create_java_udf(session: Session, function_name: str, java_class: str):
102
+ if not is_initialized():
103
+ upload_java_udf_jar(session)
104
+ session.sql(
105
+ SP_TEMPLATE.replace(
106
+ "__snowflake_udf_imports__", create_snowflake_imports(session)
107
+ )
108
+ ).collect()
109
+ set_java_udf_creator_initialized_state(True)
110
+ name = CREATE_JAVA_UDF_PREFIX + function_name
111
+ result = session.sql(
112
+ f"CALL {PROCEDURE_NAME}('{name}', '{java_class}', ARRAY_CONSTRUCT({get_quoted_imports(session)})::ARRAY(VARCHAR))"
113
+ ).collect()
114
+ result_value = result[0][0]
115
+ if not result_value:
116
+ raise AnalysisException(f"Can not load class {java_class}")
117
+ types = result_value.split(";")
118
+ input_types = [type_string_to_type_object(t) for t in types[:-1]]
119
+ output_type = types[-1]
120
+
121
+ return JavaUdf(
122
+ name,
123
+ input_types,
124
+ type_string_to_type_object(output_type),
125
+ )