snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,334 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+ import re
5
+ import uuid
6
+ from collections import Counter
7
+ from typing import Optional, Tuple
8
+
9
+ from pyspark.errors import AnalysisException
10
+ from pyspark.errors.exceptions.base import TempTableAlreadyExistsException
11
+
12
+ from snowflake.snowpark import DataFrame, Session
13
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import unquote_if_quoted
14
+ from snowflake.snowpark.exceptions import SnowparkSQLException
15
+ from snowflake.snowpark.types import StructField, StructType
16
+ from snowflake.snowpark_connect.column_name_handler import ColumnNameMap, ColumnNames
17
+ from snowflake.snowpark_connect.config import (
18
+ global_config,
19
+ sessions_config,
20
+ should_create_temporary_view_in_snowflake,
21
+ )
22
+ from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
23
+ from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
24
+ from snowflake.snowpark_connect.utils.context import get_spark_session_id
25
+ from snowflake.snowpark_connect.utils.identifiers import (
26
+ spark_to_sf_single_id,
27
+ spark_to_sf_single_id_with_unquoting,
28
+ )
29
+
30
+ _INTERNAL_VIEW_PREFIX = "__SC_RENAMED_V_"
31
+
32
+ _CREATE_VIEW_PATTERN = re.compile(r"create\s+or\s+replace\s+view", re.IGNORECASE)
33
+
34
+ _temp_views = SynchronizedDict[Tuple[str, str], DataFrameContainer]()
35
+
36
+
37
+ def register_temp_view(name: str, df: DataFrameContainer, replace: bool) -> None:
38
+ normalized_name = _normalize(name)
39
+ current_session_id = get_spark_session_id()
40
+ for key in list(_temp_views.keys()):
41
+ if _normalize(key[0]) == normalized_name and key[1] == current_session_id:
42
+ if replace:
43
+ _temp_views.remove(key)
44
+ break
45
+ else:
46
+ raise TempTableAlreadyExistsException(
47
+ f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
48
+ )
49
+
50
+ _temp_views[(name, current_session_id)] = df
51
+
52
+
53
+ def unregister_temp_view(name: str) -> bool:
54
+ normalized_name = _normalize(name)
55
+
56
+ for key in _temp_views.keys():
57
+ normalized_key = _normalize(key[0])
58
+ if normalized_name == normalized_key and key[1] == get_spark_session_id():
59
+ pop_result = _temp_views.remove(key)
60
+ return pop_result is not None
61
+ return False
62
+
63
+
64
+ def get_temp_view(name: str) -> Optional[DataFrameContainer]:
65
+ normalized_name = _normalize(name)
66
+ for key in _temp_views.keys():
67
+ normalized_key = _normalize(key[0])
68
+ if normalized_name == normalized_key and key[1] == get_spark_session_id():
69
+ return _temp_views.get(key)
70
+ return None
71
+
72
+
73
+ def get_temp_view_normalized_names() -> list[str]:
74
+ return [
75
+ _normalize(key[0])
76
+ for key in _temp_views.keys()
77
+ if key[1] == get_spark_session_id()
78
+ ]
79
+
80
+
81
+ def _normalize(name: str) -> str:
82
+ return name if global_config.spark_sql_caseSensitive else name.lower()
83
+
84
+
85
+ def assert_snowflake_view_does_not_exist_in_cache(name: str, replace: bool):
86
+ temp_view = get_temp_view(name)
87
+ if temp_view is not None and not replace:
88
+ raise AnalysisException(
89
+ f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
90
+ )
91
+
92
+
93
+ def assert_cached_view_does_not_exist_in_snowflake(
94
+ snowflake_view_name: list[str], replace: bool
95
+ ):
96
+ if len(snowflake_view_name) == 1:
97
+ name = unquote_if_quoted(snowflake_view_name[0])
98
+ sql_statement = f"SHOW VIEWS LIKE '{name}'"
99
+ else:
100
+ name = unquote_if_quoted(snowflake_view_name[1])
101
+ sql_statement = f"SHOW VIEWS LIKE '{name}' IN SCHEMA {snowflake_view_name[0]}"
102
+ if (
103
+ not replace
104
+ and len(Session.get_active_session().sql(sql_statement).collect()) > 0
105
+ ):
106
+ raise AnalysisException(
107
+ f"[TEMP_TABLE_OR_VIEW_ALREADY_EXISTS] Cannot create the temporary view `{name}` because it already exists."
108
+ )
109
+
110
+
111
+ def create_temporary_view_from_dataframe(
112
+ input_df_container: DataFrameContainer,
113
+ request_view_name: str,
114
+ is_global: bool,
115
+ replace: bool,
116
+ ) -> None:
117
+ input_df = input_df_container.dataframe
118
+
119
+ if is_global:
120
+ view_name = [global_config.spark_sql_globalTempDatabase, request_view_name]
121
+ else:
122
+ view_name = [request_view_name]
123
+ case_sensitive_view_name = ".".join(
124
+ [spark_to_sf_single_id_with_unquoting(part) for part in view_name]
125
+ )
126
+ snowflake_view_name = [
127
+ spark_to_sf_single_id_with_unquoting(part, True) for part in view_name
128
+ ]
129
+
130
+ if should_create_temporary_view_in_snowflake():
131
+ _create_snowflake_temporary_view(
132
+ input_df_container, snowflake_view_name, case_sensitive_view_name, replace
133
+ )
134
+ else:
135
+ store_temporary_view_as_dataframe(
136
+ input_df,
137
+ input_df_container.column_map,
138
+ input_df_container.column_map.get_spark_columns(),
139
+ input_df_container.column_map.get_snowpark_columns(),
140
+ case_sensitive_view_name,
141
+ snowflake_view_name,
142
+ replace,
143
+ )
144
+
145
+
146
+ def _create_snowflake_temporary_view(
147
+ input_df_container: DataFrameContainer,
148
+ snowflake_view_name: list[str],
149
+ stored_view_name: str,
150
+ replace: bool,
151
+ ):
152
+ column_map = input_df_container.column_map
153
+ input_df = input_df_container.dataframe
154
+
155
+ session_config = sessions_config[get_spark_session_id()]
156
+ duplicate_column_names_handling_mode = session_config[
157
+ "snowpark.connect.views.duplicate_column_names_handling_mode"
158
+ ]
159
+
160
+ # rename columns to match spark names
161
+ if duplicate_column_names_handling_mode == "rename":
162
+ # deduplicate column names by appending _DEDUP_1, _DEDUP_2, etc.
163
+ rename_map = _create_column_rename_map(column_map.columns, True)
164
+ input_df = input_df.rename(rename_map)
165
+ elif duplicate_column_names_handling_mode == "drop":
166
+ # Drop duplicate column names by removing all but the first occurrence.
167
+ duplicated_columns, remaining_columns = _find_duplicated_columns(
168
+ column_map.columns
169
+ )
170
+ rename_map = _create_column_rename_map(remaining_columns, False)
171
+ if len(duplicated_columns) > 0:
172
+ input_df = input_df.drop(*duplicated_columns)
173
+ input_df = input_df.rename(rename_map)
174
+ else:
175
+ # rename columns without deduplication
176
+ rename_map = _create_column_rename_map(column_map.columns, False)
177
+ input_df = input_df.rename(rename_map)
178
+
179
+ try:
180
+ create_snowflake_temporary_view(
181
+ input_df, snowflake_view_name, stored_view_name, replace
182
+ )
183
+ except SnowparkSQLException as exc:
184
+ if _is_error_caused_by_view_referencing_itself(exc) and replace:
185
+ # This error is caused by statement with self reference like `CREATE VIEW A AS SELECT X FROM A`.
186
+ _create_chained_view(input_df, snowflake_view_name)
187
+ else:
188
+ raise
189
+
190
+
191
+ def _create_column_rename_map(
192
+ columns: list[ColumnNames], rename_duplicated: bool
193
+ ) -> dict:
194
+ if rename_duplicated is False:
195
+ # if we are not renaming duplicated columns, we can just return the original names
196
+ return {
197
+ col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
198
+ for col in columns
199
+ }
200
+
201
+ column_counts = Counter()
202
+ not_renamed_cols = []
203
+ renamed_cols = []
204
+
205
+ for col in columns:
206
+ new_column_name = col.spark_name
207
+ normalized_name = new_column_name.lower()
208
+ column_counts[normalized_name] += 1
209
+
210
+ if column_counts[normalized_name] > 1:
211
+ new_column_name = (
212
+ f"{new_column_name}_DEDUP_{column_counts[normalized_name] - 1}"
213
+ )
214
+ renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
215
+ else:
216
+ not_renamed_cols.append(ColumnNames(new_column_name, col.snowpark_name, []))
217
+
218
+ if len(renamed_cols) == 0:
219
+ return {
220
+ col.snowpark_name: spark_to_sf_single_id(col.spark_name, is_column=True)
221
+ for col in not_renamed_cols
222
+ }
223
+
224
+ # we need to make sure that we don't have duplicated names after renaming
225
+ # columns that were not renamed in this iteration should have priority over renamed duplicates
226
+ return _create_column_rename_map(not_renamed_cols + renamed_cols, True)
227
+
228
+
229
+ def _find_duplicated_columns(
230
+ columns: list[ColumnNames],
231
+ ) -> (list[str], list[ColumnNames]):
232
+ duplicates = []
233
+ remaining_columns = []
234
+ seen = set()
235
+ for col in columns:
236
+ if col.spark_name in seen:
237
+ duplicates.append(col.snowpark_name)
238
+ else:
239
+ seen.add(col.spark_name)
240
+ remaining_columns.append(col)
241
+ return duplicates, remaining_columns
242
+
243
+
244
+ def _generate_random_builtin_view_name() -> str:
245
+ return _INTERNAL_VIEW_PREFIX + str(uuid.uuid4()).replace("-", "")
246
+
247
+
248
+ def _is_error_caused_by_view_referencing_itself(exc: Exception) -> bool:
249
+ return "view definition refers to view being defined" in str(exc).lower()
250
+
251
+
252
+ def _create_chained_view(input_df: DataFrame, view_name: list[str]) -> None:
253
+ """
254
+ In order to create a view, which references itself, Spark would here take the previous
255
+ definition of A and paste it in place of `FROM A`. Snowflake would fail in such case, so
256
+ as a workaround, we create a chain of internal views instead. This function:
257
+ 1. Renames previous definition of A to some internal name (instead of deleting).
258
+ 2. Adjusts the DDL of a new statement to reference the name of a renmaed internal view, instead of itself.
259
+ """
260
+
261
+ session = Session.get_active_session()
262
+
263
+ view_name = ".".join(view_name)
264
+
265
+ tmp_name = _generate_random_builtin_view_name()
266
+ old_name_replacement = _generate_random_builtin_view_name()
267
+
268
+ input_df.create_or_replace_temp_view(tmp_name)
269
+
270
+ session.sql(f"ALTER VIEW {view_name} RENAME TO {old_name_replacement}").collect()
271
+
272
+ ddl: str = session.sql(f"SELECT GET_DDL('VIEW', '{tmp_name}')").collect()[0][0]
273
+
274
+ ddl = ddl.replace(view_name, old_name_replacement)
275
+
276
+ # GET_DDL result doesn't contain `TEMPORARY`, it's likely a bug.
277
+ ddl = _CREATE_VIEW_PATTERN.sub("create or replace temp view", ddl)
278
+
279
+ session.sql(ddl).collect()
280
+
281
+ session.sql(f"ALTER VIEW {tmp_name} RENAME TO {view_name}").collect()
282
+
283
+
284
+ def store_temporary_view_as_dataframe(
285
+ input_df: DataFrame,
286
+ parent_column_map: ColumnNameMap,
287
+ spark_columns: list[str],
288
+ snowpark_columns: list[str],
289
+ view_name: str,
290
+ snowflake_view_name: list[str],
291
+ replace: bool,
292
+ ):
293
+ assert_cached_view_does_not_exist_in_snowflake(snowflake_view_name, replace)
294
+ schema = StructType(
295
+ [StructField(field.name, field.datatype) for field in input_df.schema.fields]
296
+ )
297
+ input_df_container = DataFrameContainer.create_with_column_mapping(
298
+ dataframe=input_df,
299
+ spark_column_names=spark_columns,
300
+ snowpark_column_names=snowpark_columns,
301
+ parent_column_name_map=parent_column_map,
302
+ cached_schema_getter=lambda: schema,
303
+ )
304
+
305
+ if replace:
306
+ try:
307
+ Session.get_active_session().sql(
308
+ "DROP VIEW IF EXISTS " + ".".join(snowflake_view_name)
309
+ ).collect()
310
+ except SnowparkSQLException as e:
311
+ # Spark allows for both table and temporary view to exist with the same name.
312
+ # Snowflake throws exception if we try to drop the view with doesn't exist but a table with the same name exists.
313
+ if (
314
+ "SQL compilation error: Object found is of type 'TABLE', not specified type 'VIEW'"
315
+ not in str(e)
316
+ ):
317
+ raise
318
+
319
+ register_temp_view(view_name, input_df_container, replace)
320
+
321
+
322
+ def create_snowflake_temporary_view(
323
+ input_df: DataFrame,
324
+ snowflake_view_name: list[str],
325
+ stored_view_name: str,
326
+ replace: bool,
327
+ comment: Optional[str] = None,
328
+ ) -> None:
329
+ assert_snowflake_view_does_not_exist_in_cache(stored_view_name, replace)
330
+ if replace:
331
+ unregister_temp_view(stored_view_name)
332
+ input_df.create_or_replace_temp_view(snowflake_view_name, comment=comment)
333
+ else:
334
+ input_df.create_temp_view(snowflake_view_name, comment=comment)
@@ -3,13 +3,9 @@
3
3
  #
4
4
 
5
5
  import functools
6
- import importlib.resources
7
- import tempfile
8
6
  import threading
9
7
  import typing
10
- import zipfile
11
8
  from collections.abc import Callable
12
- from pathlib import Path
13
9
  from types import ModuleType
14
10
  from typing import List, Optional, Tuple, Union
15
11
 
@@ -19,13 +15,15 @@ from snowflake.snowpark.functions import call_udf, udaf, udf, udtf
19
15
  from snowflake.snowpark.types import DataType, StructType
20
16
  from snowflake.snowpark_connect import tcm
21
17
  from snowflake.snowpark_connect.utils.telemetry import telemetry
18
+ from snowflake.snowpark_connect.utils.upload_java_jar import (
19
+ JAVA_UDFS_JAR_NAME,
20
+ upload_java_udf_jar,
21
+ )
22
22
 
23
23
  _lock = threading.RLock()
24
24
 
25
25
  _BUILTIN_UDF_PREFIX = "__SC_BUILTIN_"
26
26
 
27
- _JAVA_UDFS_JAR_NAME = "java_udfs-1.0-SNAPSHOT.jar"
28
-
29
27
 
30
28
  def init_builtin_udf_cache(session: Session) -> None:
31
29
  with _lock:
@@ -34,6 +32,7 @@ def init_builtin_udf_cache(session: Session) -> None:
34
32
  session._cached_udtfs = {}
35
33
  session._cached_java_udfs = {}
36
34
  session._cached_sql_udfs = {}
35
+ session._cached_sprocs = {}
37
36
 
38
37
 
39
38
  def _hash_types(types: list) -> str:
@@ -98,7 +97,11 @@ def cached_udaf(
98
97
  # Register the function outside the lock to avoid contention
99
98
  wrapped_func = udaf(
100
99
  udaf_type,
101
- name=name,
100
+ name=[
101
+ Session.get_active_session().get_current_database(),
102
+ Session.get_active_session().get_current_schema(),
103
+ name,
104
+ ],
102
105
  return_type=return_type,
103
106
  input_types=input_types,
104
107
  imports=imports,
@@ -114,7 +117,7 @@ def cached_udaf(
114
117
 
115
118
  if class_type is None:
116
119
  raise ValueError(
117
- "Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
120
+ "[snowpark_connect::internal_error] Type must be provided for cached_udaf. UDAF contains multiple functions hence it has to be represented by a type. Functions are not supported."
118
121
  )
119
122
  else:
120
123
  # return udaf
@@ -155,7 +158,11 @@ def cached_udf(
155
158
  # but this will not cause any issues.
156
159
  wrapped_func = udf(
157
160
  _null_safe_wrapper,
158
- name=name,
161
+ name=[
162
+ Session.get_active_session().get_current_database(),
163
+ Session.get_active_session().get_current_schema(),
164
+ name,
165
+ ],
159
166
  return_type=return_type,
160
167
  input_types=input_types,
161
168
  imports=imports,
@@ -205,7 +212,11 @@ def cached_udtf(
205
212
  # Register the function outside the lock to avoid contention
206
213
  wrapped_func = udtf(
207
214
  func,
208
- name=name,
215
+ name=[
216
+ Session.get_active_session().get_current_database(),
217
+ Session.get_active_session().get_current_schema(),
218
+ name,
219
+ ],
209
220
  output_schema=output_schema,
210
221
  input_types=input_types,
211
222
  imports=imports,
@@ -306,11 +317,20 @@ def register_cached_sql_udf(
306
317
  )
307
318
 
308
319
  with _lock:
309
- cache[function_name] = True
320
+ function_identifier = ".".join(
321
+ [
322
+ Session.get_active_session().get_current_database(),
323
+ Session.get_active_session().get_current_schema(),
324
+ function_name,
325
+ ]
326
+ )
327
+ cache[function_name] = function_identifier
328
+ else:
329
+ function_identifier = cache[function_name]
310
330
 
311
331
  return functools.partial(
312
332
  call_udf,
313
- function_name,
333
+ function_identifier,
314
334
  )
315
335
 
316
336
 
@@ -343,32 +363,7 @@ def register_cached_java_udf(
343
363
 
344
364
  if len(cache) == 0:
345
365
  # This is the first Java UDF being registered, so we need to upload the JAR with UDF definitions first
346
- jar_path = ""
347
- try:
348
- jar_path = importlib.resources.files(
349
- "snowflake.snowpark_connect.resources"
350
- ).joinpath(_JAVA_UDFS_JAR_NAME)
351
- except NotADirectoryError:
352
- # importlib.resource doesn't work in Stage Package method
353
- zip_path = Path(__file__).parent.parent.parent.parent
354
- jar_path_in_zip = (
355
- f"snowflake/snowpark_connect/resources/{_JAVA_UDFS_JAR_NAME}"
356
- )
357
- temp_dir = tempfile.gettempdir()
358
-
359
- with zipfile.ZipFile(zip_path, "r") as zip_ref:
360
- if jar_path_in_zip not in zip_ref.namelist():
361
- raise FileNotFoundError(f"{jar_path_in_zip} not found")
362
- zip_ref.extract(jar_path_in_zip, temp_dir)
363
-
364
- jar_path = f"{temp_dir}/{jar_path_in_zip}"
365
-
366
- upload_result = session.file.put(str(jar_path), stage, overwrite=True)
367
-
368
- if upload_result[0].status != "UPLOADED":
369
- raise RuntimeError(
370
- f"Failed to upload JAR with UDF definitions to stage: {upload_result[0].message}"
371
- )
366
+ upload_java_udf_jar(session)
372
367
 
373
368
  udf_is_cached = function_name in cache
374
369
 
@@ -378,15 +373,96 @@ def register_cached_java_udf(
378
373
  function_name,
379
374
  input_types,
380
375
  return_type,
381
- [f"{stage}/{_JAVA_UDFS_JAR_NAME}"],
376
+ [f"{stage}/snowflake/snowpark_connect/resources/{JAVA_UDFS_JAR_NAME}"],
382
377
  java_handler,
383
378
  packages,
384
379
  )
385
380
 
386
381
  with _lock:
387
- cache[function_name] = True
382
+ function_identifier = ".".join(
383
+ [
384
+ Session.get_active_session().get_current_database(),
385
+ Session.get_active_session().get_current_schema(),
386
+ function_name,
387
+ ]
388
+ )
389
+ cache[function_name] = function_identifier
390
+ else:
391
+ function_identifier = cache[function_name]
388
392
 
389
393
  return functools.partial(
390
394
  call_udf,
391
- function_name,
395
+ function_identifier,
396
+ )
397
+
398
+
399
+ def register_cached_sproc(
400
+ sproc_body: str,
401
+ handler_name: str,
402
+ input_arg_types: list[str],
403
+ return_type: str = "STRING",
404
+ runtime_version: str = "3.11",
405
+ packages: list[str] | None = None,
406
+ ) -> str:
407
+ """
408
+ Register a cached stored procedure that persists across schema/database changes.
409
+
410
+ Args:
411
+ sproc_body: The Python code for the stored procedure
412
+ handler_name: Name of the handler function in the sproc_body
413
+ input_arg_types: List of SQL types for input arguments (e.g. ['STRING', 'STRING'])
414
+ return_type: SQL return type (default: 'STRING')
415
+ runtime_version: Python runtime version (default: '3.11')
416
+ packages: List of Python packages to include
417
+
418
+ Returns:
419
+ Fully qualified stored procedure name for calling
420
+ """
421
+ if packages is None:
422
+ packages = ["snowflake-snowpark-python"]
423
+
424
+ # Create a unique hash based on the procedure content and signature
425
+ content_hash = _hash_types(
426
+ [sproc_body, handler_name, return_type, runtime_version]
427
+ + input_arg_types
428
+ + packages
392
429
  )
430
+
431
+ # Generate unique procedure name with hash
432
+ sproc_name = f"{_BUILTIN_UDF_PREFIX}SPROC_{content_hash}"
433
+
434
+ with _lock:
435
+ session = Session.get_active_session()
436
+ cache = session._cached_sprocs
437
+
438
+ # Create fully qualified name with current database and schema
439
+ fully_qualified_name = ".".join(
440
+ [session.get_current_database(), session.get_current_schema(), sproc_name]
441
+ )
442
+
443
+ if sproc_name in cache:
444
+ return cache[sproc_name]
445
+
446
+ args_str = ",".join(
447
+ f"arg{idx} {type_}" for idx, type_ in enumerate(input_arg_types)
448
+ )
449
+ packages_str = ",".join(f"'{pkg}'" for pkg in packages)
450
+
451
+ session.sql(
452
+ f"""
453
+ CREATE OR REPLACE TEMPORARY PROCEDURE {sproc_name}({args_str})
454
+ RETURNS {return_type}
455
+ LANGUAGE PYTHON
456
+ RUNTIME_VERSION = '{runtime_version}'
457
+ PACKAGES = ({packages_str})
458
+ HANDLER = '{handler_name}'
459
+ AS $$
460
+ {sproc_body}
461
+ $$
462
+ """
463
+ ).collect()
464
+
465
+ with _lock:
466
+ cache[sproc_name] = fully_qualified_name
467
+
468
+ return fully_qualified_name