snowpark-connect 0.27.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. snowflake/snowpark_connect/__init__.py +1 -0
  2. snowflake/snowpark_connect/analyze_plan/map_tree_string.py +8 -4
  3. snowflake/snowpark_connect/client/__init__.py +15 -0
  4. snowflake/snowpark_connect/client/error_utils.py +30 -0
  5. snowflake/snowpark_connect/client/exceptions.py +36 -0
  6. snowflake/snowpark_connect/client/query_results.py +90 -0
  7. snowflake/snowpark_connect/client/server.py +717 -0
  8. snowflake/snowpark_connect/client/utils/__init__.py +10 -0
  9. snowflake/snowpark_connect/client/utils/session.py +85 -0
  10. snowflake/snowpark_connect/column_name_handler.py +404 -243
  11. snowflake/snowpark_connect/column_qualifier.py +43 -0
  12. snowflake/snowpark_connect/config.py +309 -26
  13. snowflake/snowpark_connect/constants.py +2 -0
  14. snowflake/snowpark_connect/dataframe_container.py +102 -8
  15. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  16. snowflake/snowpark_connect/error/error_codes.py +50 -0
  17. snowflake/snowpark_connect/error/error_utils.py +172 -23
  18. snowflake/snowpark_connect/error/exceptions.py +13 -4
  19. snowflake/snowpark_connect/execute_plan/map_execution_command.py +15 -160
  20. snowflake/snowpark_connect/execute_plan/map_execution_root.py +26 -20
  21. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  22. snowflake/snowpark_connect/expression/error_utils.py +28 -0
  23. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  24. snowflake/snowpark_connect/expression/hybrid_column_map.py +53 -5
  25. snowflake/snowpark_connect/expression/integral_types_support.py +219 -0
  26. snowflake/snowpark_connect/expression/literal.py +37 -13
  27. snowflake/snowpark_connect/expression/map_cast.py +224 -15
  28. snowflake/snowpark_connect/expression/map_expression.py +80 -27
  29. snowflake/snowpark_connect/expression/map_extension.py +322 -12
  30. snowflake/snowpark_connect/expression/map_sql_expression.py +316 -81
  31. snowflake/snowpark_connect/expression/map_udf.py +86 -20
  32. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +451 -173
  33. snowflake/snowpark_connect/expression/map_unresolved_function.py +2964 -829
  34. snowflake/snowpark_connect/expression/map_unresolved_star.py +87 -23
  35. snowflake/snowpark_connect/expression/map_update_fields.py +70 -18
  36. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  37. snowflake/snowpark_connect/includes/jars/json4s-ast_2.13-3.7.0-M11.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/{scala-library-2.12.18.jar → sas-scala-udf_2.12-0.2.0.jar} +0 -0
  39. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.13-0.2.0.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/scala-reflect-2.13.16.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.13-3.5.6.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/{spark-connect-client-jvm_2.12-3.5.6.jar → spark-connect-client-jvm_2.13-3.5.6.jar} +0 -0
  43. snowflake/snowpark_connect/includes/jars/{spark-sql_2.12-3.5.6.jar → spark-sql_2.13-3.5.6.jar} +0 -0
  44. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py +1 -1
  45. snowflake/snowpark_connect/includes/python/pyspark/sql/connect/streaming/worker/listener_worker.py +1 -1
  46. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.py +12 -10
  47. snowflake/snowpark_connect/proto/snowflake_expression_ext_pb2.pyi +14 -2
  48. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.py +10 -8
  49. snowflake/snowpark_connect/proto/snowflake_relation_ext_pb2.pyi +13 -6
  50. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  51. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +297 -49
  52. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  53. snowflake/snowpark_connect/relation/io_utils.py +110 -10
  54. snowflake/snowpark_connect/relation/map_aggregate.py +239 -256
  55. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  56. snowflake/snowpark_connect/relation/map_column_ops.py +264 -96
  57. snowflake/snowpark_connect/relation/map_extension.py +263 -29
  58. snowflake/snowpark_connect/relation/map_join.py +683 -442
  59. snowflake/snowpark_connect/relation/map_local_relation.py +28 -1
  60. snowflake/snowpark_connect/relation/map_map_partitions.py +83 -8
  61. snowflake/snowpark_connect/relation/map_relation.py +48 -19
  62. snowflake/snowpark_connect/relation/map_row_ops.py +310 -91
  63. snowflake/snowpark_connect/relation/map_show_string.py +13 -6
  64. snowflake/snowpark_connect/relation/map_sql.py +1233 -222
  65. snowflake/snowpark_connect/relation/map_stats.py +48 -9
  66. snowflake/snowpark_connect/relation/map_subquery_alias.py +11 -2
  67. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  68. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +53 -14
  69. snowflake/snowpark_connect/relation/read/map_read.py +134 -43
  70. snowflake/snowpark_connect/relation/read/map_read_csv.py +326 -47
  71. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +21 -6
  72. snowflake/snowpark_connect/relation/read/map_read_json.py +324 -86
  73. snowflake/snowpark_connect/relation/read/map_read_parquet.py +146 -28
  74. snowflake/snowpark_connect/relation/read/map_read_partitioned_parquet.py +142 -0
  75. snowflake/snowpark_connect/relation/read/map_read_socket.py +15 -3
  76. snowflake/snowpark_connect/relation/read/map_read_table.py +86 -6
  77. snowflake/snowpark_connect/relation/read/map_read_text.py +22 -4
  78. snowflake/snowpark_connect/relation/read/metadata_utils.py +170 -0
  79. snowflake/snowpark_connect/relation/read/reader_config.py +42 -3
  80. snowflake/snowpark_connect/relation/read/utils.py +50 -5
  81. snowflake/snowpark_connect/relation/stage_locator.py +91 -55
  82. snowflake/snowpark_connect/relation/utils.py +128 -5
  83. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  84. snowflake/snowpark_connect/relation/write/map_write.py +929 -319
  85. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  86. snowflake/snowpark_connect/resources/java_udfs-1.0-SNAPSHOT.jar +0 -0
  87. snowflake/snowpark_connect/resources_initializer.py +171 -48
  88. snowflake/snowpark_connect/server.py +528 -473
  89. snowflake/snowpark_connect/server_common/__init__.py +503 -0
  90. snowflake/snowpark_connect/snowflake_session.py +65 -0
  91. snowflake/snowpark_connect/start_server.py +53 -5
  92. snowflake/snowpark_connect/type_mapping.py +349 -27
  93. snowflake/snowpark_connect/type_support.py +130 -0
  94. snowflake/snowpark_connect/typed_column.py +9 -7
  95. snowflake/snowpark_connect/utils/artifacts.py +9 -8
  96. snowflake/snowpark_connect/utils/cache.py +49 -27
  97. snowflake/snowpark_connect/utils/concurrent.py +36 -1
  98. snowflake/snowpark_connect/utils/context.py +195 -37
  99. snowflake/snowpark_connect/utils/describe_query_cache.py +68 -53
  100. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  101. snowflake/snowpark_connect/utils/expression_transformer.py +172 -0
  102. snowflake/snowpark_connect/utils/identifiers.py +137 -3
  103. snowflake/snowpark_connect/utils/io_utils.py +57 -1
  104. snowflake/snowpark_connect/utils/java_stored_procedure.py +151 -0
  105. snowflake/snowpark_connect/utils/java_udaf_utils.py +321 -0
  106. snowflake/snowpark_connect/utils/java_udtf_utils.py +239 -0
  107. snowflake/snowpark_connect/utils/jvm_udf_utils.py +281 -0
  108. snowflake/snowpark_connect/utils/open_telemetry.py +516 -0
  109. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  110. snowflake/snowpark_connect/utils/patch_spark_line_number.py +181 -0
  111. snowflake/snowpark_connect/utils/profiling.py +25 -8
  112. snowflake/snowpark_connect/utils/scala_udf_utils.py +185 -340
  113. snowflake/snowpark_connect/utils/sequence.py +21 -0
  114. snowflake/snowpark_connect/utils/session.py +64 -28
  115. snowflake/snowpark_connect/utils/snowpark_connect_logging.py +51 -9
  116. snowflake/snowpark_connect/utils/spcs_logger.py +290 -0
  117. snowflake/snowpark_connect/utils/telemetry.py +192 -40
  118. snowflake/snowpark_connect/utils/temporary_view_cache.py +67 -0
  119. snowflake/snowpark_connect/utils/temporary_view_helper.py +334 -0
  120. snowflake/snowpark_connect/utils/udf_cache.py +117 -41
  121. snowflake/snowpark_connect/utils/udf_helper.py +39 -37
  122. snowflake/snowpark_connect/utils/udf_utils.py +133 -14
  123. snowflake/snowpark_connect/utils/udtf_helper.py +8 -1
  124. snowflake/snowpark_connect/utils/udtf_utils.py +46 -31
  125. snowflake/snowpark_connect/utils/udxf_import_utils.py +9 -2
  126. snowflake/snowpark_connect/utils/upload_java_jar.py +57 -0
  127. snowflake/snowpark_connect/version.py +1 -1
  128. snowflake/snowpark_decoder/dp_session.py +6 -2
  129. snowflake/snowpark_decoder/spark_decoder.py +12 -0
  130. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-submit +14 -4
  131. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/METADATA +16 -7
  132. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/RECORD +139 -168
  133. snowflake/snowpark_connect/hidden_column.py +0 -39
  134. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  135. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  136. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  137. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  138. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  139. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  140. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  141. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  142. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  143. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  144. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  145. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  146. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  147. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  148. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  149. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  150. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  151. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  152. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  153. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  154. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  155. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  156. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  157. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  158. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  159. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  160. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  161. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  162. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  163. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  164. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  165. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  166. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  167. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  168. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  169. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  170. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  171. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  172. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  173. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  174. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  175. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  176. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  177. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  178. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  179. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  180. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  181. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  182. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  183. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  184. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  185. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  186. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  187. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  188. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  189. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  190. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  191. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  192. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  193. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  194. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-connect +0 -0
  195. {snowpark_connect-0.27.0.data → snowpark_connect-1.7.0.data}/scripts/snowpark-session +0 -0
  196. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/WHEEL +0 -0
  197. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE-binary +0 -0
  198. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/LICENSE.txt +0 -0
  199. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/licenses/NOTICE-binary +0 -0
  200. {snowpark_connect-0.27.0.dist-info → snowpark_connect-1.7.0.dist-info}/top_level.txt +0 -0
@@ -5,6 +5,7 @@
5
5
  import collections
6
6
  import re
7
7
  from collections.abc import Callable
8
+ from typing import Any
8
9
 
9
10
  import pyspark.sql.connect.proto.relations_pb2 as relation_proto
10
11
 
@@ -20,12 +21,31 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
20
21
  quote_name_without_upper_casing,
21
22
  )
22
23
  from snowflake.snowpark.column import METADATA_FILENAME
23
- from snowflake.snowpark.types import DataType, DoubleType, IntegerType, StringType
24
+ from snowflake.snowpark.types import (
25
+ DataType,
26
+ DoubleType,
27
+ IntegerType,
28
+ StringType,
29
+ StructType,
30
+ )
31
+ from snowflake.snowpark_connect.config import external_table_location
24
32
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
33
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
34
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
35
+ from snowflake.snowpark_connect.relation.read.map_read_partitioned_parquet import (
36
+ read_partitioned_parquet_from_external_table,
37
+ use_external_table,
38
+ )
39
+ from snowflake.snowpark_connect.relation.read.metadata_utils import (
40
+ add_filename_metadata_to_reader,
41
+ )
25
42
  from snowflake.snowpark_connect.relation.read.reader_config import ReaderWriterConfig
26
43
  from snowflake.snowpark_connect.relation.read.utils import (
44
+ apply_metadata_exclusion_pattern,
27
45
  rename_columns_as_snowflake_standard,
28
46
  )
47
+ from snowflake.snowpark_connect.type_support import emulate_integral_types
48
+ from snowflake.snowpark_connect.utils.io_utils import cached_file_format
29
49
  from snowflake.snowpark_connect.utils.telemetry import (
30
50
  SnowparkConnectNotImplementedError,
31
51
  )
@@ -33,7 +53,7 @@ from snowflake.snowpark_connect.utils.telemetry import (
33
53
 
34
54
  def map_read_parquet(
35
55
  rel: relation_proto.Relation,
36
- schema: str | None,
56
+ schema: StructType | None,
37
57
  session: snowpark.Session,
38
58
  paths: list[str],
39
59
  options: ReaderWriterConfig,
@@ -41,28 +61,62 @@ def map_read_parquet(
41
61
  """Read a Parquet file into a Snowpark DataFrame."""
42
62
 
43
63
  if rel.read.is_streaming is True:
44
- raise SnowparkConnectNotImplementedError(
64
+ exception = SnowparkConnectNotImplementedError(
45
65
  "Streaming is not supported for Parquet files."
46
66
  )
67
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
68
+ raise exception
47
69
 
48
- snowpark_options = options.convert_to_snowpark_args()
49
- assert schema is None, "Read PARQUET does not support user schema"
70
+ converted_snowpark_options = options.convert_to_snowpark_args()
71
+ file_format_options = _parse_parquet_snowpark_options(converted_snowpark_options)
72
+ raw_options = rel.read.data_source.options
50
73
  assert len(paths) > 0, "Read PARQUET expects at least one path"
51
74
 
52
- reader = session.read.options(snowpark_options)
75
+ snowpark_options = {
76
+ # Setting these two options prevents a significant number of additional CREATE TEMPORARY
77
+ # FILE FORMAT and DROP FILE FORMAT queries. If FORMAT_NAME is not set, the Snowpark DF reader
78
+ # will eagerly issue a CREATE TEMPORARY FILE FORMAT when inferring the schema of the result;
79
+ # if ENFORCE_EXISTING_FILE_FORMAT is not set, an additional CREATE ... command will be
80
+ # issued when the lazy DF is materialized by a cache_result call.
81
+ "FORMAT_NAME": converted_snowpark_options.get(
82
+ "FORMAT_NAME",
83
+ cached_file_format(session, "parquet", file_format_options),
84
+ ),
85
+ "ENFORCE_EXISTING_FILE_FORMAT": True,
86
+ }
87
+
88
+ if "PATTERN" in converted_snowpark_options:
89
+ snowpark_options["PATTERN"] = converted_snowpark_options.get("PATTERN")
90
+
91
+ apply_metadata_exclusion_pattern(snowpark_options)
92
+
93
+ reader = add_filename_metadata_to_reader(
94
+ session.read.options(snowpark_options), raw_options
95
+ )
53
96
 
54
97
  if len(paths) == 1:
55
- df = _read_parquet_with_partitions(session, reader, paths[0])
98
+ df, read_using_external_table = _read_parquet_with_partitions(
99
+ session, reader, paths[0], schema, snowpark_options
100
+ )
101
+ can_be_cached = not read_using_external_table
56
102
  else:
57
103
  is_merge_schema = options.config.get("mergeschema")
58
- df = _read_parquet_with_partitions(session, reader, paths[0])
104
+ df, read_using_external_table = _read_parquet_with_partitions(
105
+ session, reader, paths[0], schema, snowpark_options
106
+ )
107
+ can_be_cached = not read_using_external_table
59
108
  schema_cols = df.columns
60
109
  for p in paths[1:]:
61
110
  reader._user_schema = None
111
+ partition_df, read_using_external_table = _read_parquet_with_partitions(
112
+ session, reader, p, schema, snowpark_options
113
+ )
62
114
  df = df.union_all_by_name(
63
- _read_parquet_with_partitions(session, reader, p),
115
+ partition_df,
64
116
  allow_missing_columns=True,
65
117
  )
118
+ can_be_cached = can_be_cached and not read_using_external_table
119
+
66
120
  if not is_merge_schema:
67
121
  df = df.select(*schema_cols)
68
122
 
@@ -73,34 +127,92 @@ def map_read_parquet(
73
127
  dataframe=renamed_df,
74
128
  spark_column_names=[analyzer_utils.unquote_if_quoted(c) for c in df.columns],
75
129
  snowpark_column_names=snowpark_column_names,
76
- snowpark_column_types=[f.datatype for f in df.schema.fields],
130
+ snowpark_column_types=[
131
+ emulate_integral_types(f.datatype) for f in df.schema.fields
132
+ ],
133
+ can_be_cached=can_be_cached,
77
134
  )
78
135
 
79
136
 
80
137
  def _read_parquet_with_partitions(
81
- session: Session, reader: DataFrameReader, path: str
82
- ) -> DataFrame:
83
- """Reads parquet files and adds partition columns from subdirectories."""
138
+ session: Session,
139
+ reader: DataFrameReader,
140
+ path: str,
141
+ schema: StructType | None,
142
+ snowpark_options: dict[str, Any],
143
+ ) -> tuple[DataFrame, bool]:
144
+ """
145
+ Reads parquet files and adds partition columns from subdirectories.
146
+ Returns a tuple of read DataFrame and a boolean indicating if DataFrame was read from external table.
147
+ """
84
148
 
85
149
  partition_columns, inferred_types = _discover_partition_columns(session, path)
86
- df = reader.with_metadata(METADATA_FILENAME).parquet(path)
87
150
 
88
- if not partition_columns:
89
- return df.drop(METADATA_FILENAME)
151
+ def _get_df() -> DataFrame:
152
+ if not partition_columns:
153
+ return reader.parquet(path)
154
+ else:
155
+ # In case of too big overhead we can always optimize by using option: MAX_FILE_COUNT and allow user to define how many files should be scanned
156
+ df = reader.with_metadata(METADATA_FILENAME).parquet(path)
157
+
158
+ for col_name in partition_columns:
159
+ quoted_col_name = quote_name_without_upper_casing(col_name)
160
+ escaped_col_name = re.escape(col_name)
161
+ regex_pattern = rf"{escaped_col_name}=([^/]+)"
162
+
163
+ raw_value = snowpark_fn.regexp_extract(
164
+ METADATA_FILENAME, regex_pattern, 1
165
+ )
166
+ value_or_null = snowpark_fn.when(raw_value == "", None).otherwise(
167
+ raw_value
168
+ )
169
+
170
+ df = df.with_column(
171
+ quoted_col_name,
172
+ snowpark_fn.cast(value_or_null, inferred_types[col_name]),
173
+ )
174
+ return df.drop(METADATA_FILENAME)
175
+
176
+ if use_external_table(session, path):
177
+ if schema is None:
178
+ schema = _get_df().schema
179
+ return (
180
+ read_partitioned_parquet_from_external_table(
181
+ session,
182
+ schema,
183
+ external_table_location(),
184
+ path[1:-1],
185
+ partition_columns,
186
+ inferred_types,
187
+ snowpark_options,
188
+ ),
189
+ True,
190
+ )
191
+ else:
192
+ # TODO: SNOW-2736756 support user schema
193
+ assert schema is None, "Read PARQUET does not support user schema"
194
+ return _get_df(), False
90
195
 
91
- for col_name in partition_columns:
92
- quoted_col_name = quote_name_without_upper_casing(col_name)
93
- escaped_col_name = re.escape(col_name)
94
- regex_pattern = rf"{escaped_col_name}=([^/]+)"
95
196
 
96
- raw_value = snowpark_fn.regexp_extract(METADATA_FILENAME, regex_pattern, 1)
97
- value_or_null = snowpark_fn.when(raw_value == "", None).otherwise(raw_value)
197
+ _parquet_file_format_allowed_options = {
198
+ "COMPRESSION",
199
+ "SNAPPY_COMPRESSION",
200
+ "BINARY_AS_TEXT",
201
+ "TRIM_SPACE",
202
+ "USE_LOGICAL_TYPE",
203
+ "USE_VECTORIZED_SCANNER",
204
+ "REPLACE_INVALID_CHARACTERS",
205
+ "NULL_IF",
206
+ }
98
207
 
99
- df = df.with_column(
100
- quoted_col_name, snowpark_fn.cast(value_or_null, inferred_types[col_name])
101
- )
102
208
 
103
- return df.drop(METADATA_FILENAME)
209
+ def _parse_parquet_snowpark_options(snowpark_options: dict[str, Any]) -> dict[str, Any]:
210
+ file_format_options = dict()
211
+ for key, value in snowpark_options.items():
212
+ upper_key = key.upper()
213
+ if upper_key in _parquet_file_format_allowed_options:
214
+ file_format_options[upper_key] = value
215
+ return file_format_options
104
216
 
105
217
 
106
218
  def _extract_partitions_from_path(path: str) -> dict[str, str]:
@@ -149,10 +261,14 @@ def _discover_partition_columns(
149
261
  if i not in dir_level_to_column_name:
150
262
  dir_level_to_column_name[i] = key
151
263
  elif dir_level_to_column_name[i] != key:
152
- raise ValueError(
264
+ exception = ValueError(
153
265
  f"Conflicting partition column names detected: '{dir_level_to_column_name[i]}' and '{key}' "
154
266
  f"at the same directory level"
155
267
  )
268
+ attach_custom_error_code(
269
+ exception, ErrorCodes.INVALID_OPERATION
270
+ )
271
+ raise exception
156
272
 
157
273
  partition_columns_values[key].add(value)
158
274
 
@@ -160,10 +276,12 @@ def _discover_partition_columns(
160
276
  for level in sorted(dir_level_to_column_name.keys()):
161
277
  col_name = dir_level_to_column_name[level]
162
278
  if col_name in seen_columns:
163
- raise ValueError(
279
+ exception = ValueError(
164
280
  f"Found partition column '{col_name}' at multiple directory levels. "
165
281
  f"A partition column can only appear at a single level."
166
282
  )
283
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
284
+ raise exception
167
285
  seen_columns.add(col_name)
168
286
 
169
287
  ordered_columns = [
@@ -0,0 +1,142 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ import re
6
+ from copy import deepcopy
7
+ from typing import Any
8
+
9
+ from snowflake import snowpark
10
+ from snowflake.snowpark import Session
11
+ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
12
+ quote_name_without_upper_casing,
13
+ unquote_if_quoted,
14
+ )
15
+ from snowflake.snowpark.functions import col, lit
16
+ from snowflake.snowpark.types import ArrayType, DataType, MapType, StructType
17
+ from snowflake.snowpark_connect.config import external_table_location
18
+ from snowflake.snowpark_connect.utils.context import (
19
+ get_spark_session_id,
20
+ register_request_external_table,
21
+ )
22
+ from snowflake.snowpark_connect.utils.io_utils import cached_file_format
23
+ from snowflake.snowpark_connect.utils.scala_udf_utils import map_type_to_snowflake_type
24
+
25
+ STRUCTURED_TYPE_PATTERN = re.compile(r"\([^)]*\)")
26
+
27
+
28
+ def use_external_table(session: Session, path: str) -> bool:
29
+ external_table_path = external_table_location()
30
+ stripped_path = path[1:-1]
31
+
32
+ is_external_table_path_defined = external_table_path is not None
33
+ is_stage = stripped_path.startswith("@")
34
+
35
+ return (
36
+ is_external_table_path_defined
37
+ and is_stage
38
+ and _is_external_stage(session, stripped_path)
39
+ )
40
+
41
+
42
+ def _is_external_stage(session: Session, path: str) -> bool:
43
+ try:
44
+ stage_description = (
45
+ session.sql(f"DESCRIBE STAGE {path.split('/')[0][1:]}")
46
+ .filter(col('"property"') == lit("URL"))
47
+ .collect()
48
+ )
49
+ return stage_description[0]["property_value"] != ""
50
+ except Exception:
51
+ return False
52
+
53
+
54
+ def _get_count_of_non_partition_path_parts(path: str) -> int:
55
+ count = 0
56
+ # First element of a path is a stage identifier we need to ignore it to count relative path parts
57
+ for element in path.split("/")[1:]:
58
+ if "=" in element:
59
+ break
60
+ count += 1
61
+ return count
62
+
63
+
64
+ def read_partitioned_parquet_from_external_table(
65
+ session: Session,
66
+ schema: StructType,
67
+ external_table_path: str,
68
+ path: str,
69
+ partition_columns: list[str],
70
+ inferred_types: dict[str, DataType],
71
+ snowpark_options: dict[str, Any],
72
+ ) -> snowpark.DataFrame:
73
+ skip_path_parts = _get_count_of_non_partition_path_parts(path)
74
+ snowpark_partition_columns = ", ".join(
75
+ [quote_name_without_upper_casing(col) for col in partition_columns]
76
+ )
77
+ snowpark_typed_partition_columns = ", ".join(
78
+ [
79
+ f"{quote_name_without_upper_casing(col)} {map_type_to_snowflake_type(inferred_types[col])} as (split_part(split_part(METADATA$FILENAME, '/', {i + skip_path_parts}), '=', 2)::{map_type_to_snowflake_type(inferred_types[col])})"
80
+ for col, i in zip(partition_columns, range(len(partition_columns)))
81
+ ]
82
+ )
83
+ snowpark_schema_columns = ",".join(
84
+ [
85
+ f"{field.name} {_map_snowpark_type_to_simplified_snowflake_type(field.datatype)} as (value:{field.name}::{_map_snowpark_type_to_simplified_snowflake_type(field.datatype)})"
86
+ for field in schema.fields
87
+ if unquote_if_quoted(field.name) not in snowpark_partition_columns
88
+ ]
89
+ )
90
+
91
+ table_name = f"{external_table_path}.{quote_name_without_upper_casing(path + get_spark_session_id())}"
92
+ snowpark_options_copy = deepcopy(snowpark_options)
93
+ # These options are only used in the Snowpark Python reader, but not the actual emitted SQL.
94
+ snowpark_options_copy.pop("PATTERN")
95
+ snowpark_options_copy.pop("FORMAT_NAME")
96
+ snowpark_options_copy.pop("ENFORCE_EXISTING_FILE_FORMAT")
97
+ file_format_name = cached_file_format(session, "parquet", snowpark_options_copy)
98
+ session.sql(
99
+ f"""
100
+ CREATE OR REPLACE EXTERNAL TABLE {table_name} (
101
+ {snowpark_typed_partition_columns},
102
+ {snowpark_schema_columns}
103
+ )
104
+ PARTITION BY ({snowpark_partition_columns})
105
+ WITH LOCATION = {path}
106
+ FILE_FORMAT = {file_format_name}
107
+ PATTERN = '{snowpark_options.get('PATTERN', '.*')}'
108
+ AUTO_REFRESH = false
109
+ """
110
+ ).collect()
111
+ register_request_external_table(table_name)
112
+ map_fields = ", ".join(
113
+ [
114
+ f"{field.name}::{_map_snowpark_type_to_snowflake(field.datatype)} as {field.name}"
115
+ if isinstance(field.datatype, (StructType, MapType, ArrayType))
116
+ else field.name
117
+ for field in schema.fields
118
+ ]
119
+ )
120
+ return session.sql(f"SELECT {map_fields} FROM {table_name}")
121
+
122
+
123
+ def _map_snowpark_type_to_simplified_snowflake_type(datatype: DataType) -> str:
124
+ if isinstance(datatype, StructType):
125
+ return "OBJECT"
126
+ elif isinstance(datatype, MapType):
127
+ return "VARIANT"
128
+ else:
129
+ return STRUCTURED_TYPE_PATTERN.sub("", map_type_to_snowflake_type(datatype))
130
+
131
+
132
+ def _map_snowpark_type_to_snowflake(datatype: DataType) -> str:
133
+ if isinstance(datatype, StructType):
134
+ object_fields = ", ".join(
135
+ [
136
+ f"{field.name} { _map_snowpark_type_to_snowflake(field.datatype)}"
137
+ for field in datatype.fields
138
+ ]
139
+ )
140
+ return f"OBJECT({object_fields})"
141
+ else:
142
+ return map_type_to_snowflake_type(datatype)
@@ -9,6 +9,9 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
9
9
 
10
10
  from snowflake import snowpark
11
11
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
12
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
13
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
14
+ from snowflake.snowpark_connect.type_support import emulate_integral_types
12
15
  from snowflake.snowpark_connect.utils.telemetry import (
13
16
  SnowparkConnectNotImplementedError,
14
17
  )
@@ -30,7 +33,9 @@ def map_read_socket(
30
33
  host = options.get("host", None)
31
34
  port = options.get("port", None)
32
35
  if not host or not port:
33
- raise ValueError("Host and port must be provided in options.")
36
+ exception = ValueError("Host and port must be provided in options.")
37
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
38
+ raise exception
34
39
  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
35
40
  try:
36
41
  s.connect((host, int(port)))
@@ -54,10 +59,17 @@ def map_read_socket(
54
59
  dataframe=df,
55
60
  spark_column_names=[spark_cname],
56
61
  snowpark_column_names=[snowpark_cname],
62
+ snowpark_column_types=[
63
+ emulate_integral_types(f.datatype) for f in df.schema.fields
64
+ ],
57
65
  )
58
66
  except OSError as e:
59
- raise Exception(f"Error connecting to {host}:{port} - {e}")
67
+ exception = Exception(f"Error connecting to {host}:{port} - {e}")
68
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
69
+ raise exception
60
70
  else:
61
- raise SnowparkConnectNotImplementedError(
71
+ exception = SnowparkConnectNotImplementedError(
62
72
  "Socket reads are only supported in streaming mode."
63
73
  )
74
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
75
+ raise exception
@@ -11,11 +11,21 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
11
11
  unquote_if_quoted,
12
12
  )
13
13
  from snowflake.snowpark.exceptions import SnowparkSQLException
14
+ from snowflake.snowpark.types import StructField, StructType
15
+ from snowflake.snowpark_connect.column_name_handler import (
16
+ ColumnNameMap,
17
+ make_column_names_snowpark_compatible,
18
+ )
19
+ from snowflake.snowpark_connect.column_qualifier import ColumnQualifier
14
20
  from snowflake.snowpark_connect.config import auto_uppercase_non_column_identifiers
15
21
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
22
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
23
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
16
24
  from snowflake.snowpark_connect.relation.read.utils import (
17
25
  rename_columns_as_snowflake_standard,
18
26
  )
27
+ from snowflake.snowpark_connect.type_support import emulate_integral_types
28
+ from snowflake.snowpark_connect.utils.context import get_processed_views
19
29
  from snowflake.snowpark_connect.utils.identifiers import (
20
30
  split_fully_qualified_spark_name,
21
31
  )
@@ -23,6 +33,7 @@ from snowflake.snowpark_connect.utils.session import _get_current_snowpark_sessi
23
33
  from snowflake.snowpark_connect.utils.telemetry import (
24
34
  SnowparkConnectNotImplementedError,
25
35
  )
36
+ from snowflake.snowpark_connect.utils.temporary_view_helper import get_temp_view
26
37
 
27
38
 
28
39
  def post_process_df(
@@ -48,8 +59,10 @@ def post_process_df(
48
59
  dataframe=renamed_df,
49
60
  spark_column_names=true_names,
50
61
  snowpark_column_names=snowpark_column_names,
51
- snowpark_column_types=[f.datatype for f in df.schema.fields],
52
- column_qualifiers=[name_parts] * len(true_names)
62
+ snowpark_column_types=[
63
+ emulate_integral_types(f.datatype) for f in df.schema.fields
64
+ ],
65
+ column_qualifiers=[{ColumnQualifier(tuple(name_parts))} for _ in true_names]
53
66
  if source_table_name
54
67
  else None,
55
68
  )
@@ -57,22 +70,85 @@ def post_process_df(
57
70
  # Check if this is a table/view not found error
58
71
  # Snowflake error codes: 002003 (42S02) - Object does not exist or not authorized
59
72
  if hasattr(e, "sql_error_code") and e.sql_error_code == 2003:
60
- raise AnalysisException(
73
+ exception = AnalysisException(
61
74
  f"[TABLE_OR_VIEW_NOT_FOUND] The table or view cannot be found. {source_table_name}"
62
- ) from None # Suppress original exception to reduce message size
75
+ )
76
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
77
+ raise exception from None # Suppress original exception to reduce message size
63
78
  # Re-raise if it's not a table not found error
64
79
  raise
65
80
 
66
81
 
82
+ def _get_temporary_view(
83
+ temp_view: DataFrameContainer, table_name: str, plan_id: int
84
+ ) -> DataFrameContainer:
85
+ fields_names = [field.name for field in temp_view.dataframe.schema.fields]
86
+ fields_types = [field.datatype for field in temp_view.dataframe.schema.fields]
87
+
88
+ snowpark_column_names = make_column_names_snowpark_compatible(
89
+ temp_view.column_map.get_spark_columns(), plan_id
90
+ )
91
+ # Rename columns in dataframe to prevent conflicting names during joins
92
+ renamed_df = temp_view.dataframe.select(
93
+ *(
94
+ temp_view.dataframe.col(orig).alias(alias)
95
+ for orig, alias in zip(fields_names, snowpark_column_names)
96
+ )
97
+ )
98
+ # do not flatten initial rename when reading table
99
+ # TODO: remove once SNOW-2203826 is done
100
+ if renamed_df._select_statement is not None:
101
+ renamed_df._select_statement.flatten_disabled = True
102
+
103
+ new_column_map = ColumnNameMap(
104
+ spark_column_names=temp_view.column_map.get_spark_columns(),
105
+ snowpark_column_names=snowpark_column_names,
106
+ column_metadata=temp_view.column_map.column_metadata,
107
+ column_qualifiers=[
108
+ {ColumnQualifier(tuple(split_fully_qualified_spark_name(table_name)))}
109
+ for _ in range(len(temp_view.column_map.get_spark_columns()))
110
+ ],
111
+ parent_column_name_map=temp_view.column_map.get_parent_column_name_map(),
112
+ )
113
+
114
+ schema = StructType(
115
+ [
116
+ StructField(name, type, _is_column=False)
117
+ for name, type in zip(snowpark_column_names, fields_types)
118
+ ]
119
+ )
120
+ return DataFrameContainer(
121
+ dataframe=renamed_df,
122
+ column_map=new_column_map,
123
+ table_name=temp_view.table_name,
124
+ alias=temp_view.alias,
125
+ partition_hint=temp_view.partition_hint,
126
+ cached_schema_getter=lambda: schema,
127
+ )
128
+
129
+
67
130
  def get_table_from_name(
68
131
  table_name: str, session: snowpark.Session, plan_id: int
69
132
  ) -> DataFrameContainer:
70
133
  """Get table from name returning a container."""
134
+
135
+ # Verify if recursive view read is not attempted
136
+ if table_name in get_processed_views():
137
+ exception = AnalysisException(
138
+ f"[RECURSIVE_VIEW] Recursive view `{table_name}` detected (cycle: `{table_name}` -> `{table_name}`)"
139
+ )
140
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
141
+ raise exception
142
+
71
143
  snowpark_name = ".".join(
72
144
  quote_name_without_upper_casing(part)
73
145
  for part in split_fully_qualified_spark_name(table_name)
74
146
  )
75
147
 
148
+ temp_view = get_temp_view(snowpark_name)
149
+ if temp_view:
150
+ return _get_temporary_view(temp_view, table_name, plan_id)
151
+
76
152
  if auto_uppercase_non_column_identifiers():
77
153
  snowpark_name = snowpark_name.upper()
78
154
 
@@ -101,10 +177,14 @@ def map_read_table(
101
177
  and rel.read.data_source.format.lower() == "iceberg"
102
178
  ):
103
179
  if len(rel.read.data_source.paths) != 1:
104
- raise SnowparkConnectNotImplementedError(
180
+ exception = SnowparkConnectNotImplementedError(
105
181
  f"Unexpected paths: {rel.read.data_source.paths}"
106
182
  )
183
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
184
+ raise exception
107
185
  table_identifier = rel.read.data_source.paths[0]
108
186
  else:
109
- raise ValueError("The relation must have a table identifier.")
187
+ exception = ValueError("The relation must have a table identifier.")
188
+ attach_custom_error_code(exception, ErrorCodes.INVALID_INPUT)
189
+ raise exception
110
190
  return get_table_from_name(table_identifier, session, rel.common.plan_id)
@@ -8,10 +8,13 @@ import pyspark.sql.connect.proto.relations_pb2 as relation_proto
8
8
 
9
9
  from snowflake import snowpark
10
10
  from snowflake.snowpark_connect.dataframe_container import DataFrameContainer
11
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
12
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
11
13
  from snowflake.snowpark_connect.relation.read.utils import (
12
14
  get_spark_column_names_from_snowpark_columns,
13
15
  rename_columns_as_snowflake_standard,
14
16
  )
17
+ from snowflake.snowpark_connect.type_support import emulate_integral_types
15
18
  from snowflake.snowpark_connect.utils.io_utils import file_format
16
19
  from snowflake.snowpark_connect.utils.telemetry import (
17
20
  SnowparkConnectNotImplementedError,
@@ -24,11 +27,17 @@ def get_file_paths_from_stage(
24
27
  ) -> typing.List[str]:
25
28
  files_paths = []
26
29
  for listed_path_row in session.sql(f"LIST {path}").collect():
30
+ # Skip _SUCCESS marker files
31
+ if listed_path_row[0].endswith("_SUCCESS"):
32
+ continue
33
+
27
34
  listed_path = listed_path_row[0].split("/")
28
35
  if listed_path_row[0].startswith("s3://") or listed_path_row[0].startswith(
29
36
  "s3a://"
30
37
  ):
31
38
  listed_path = listed_path[3:]
39
+ elif listed_path_row[0].startswith("azure://"):
40
+ listed_path = listed_path[4:]
32
41
  else:
33
42
  listed_path = listed_path[1:]
34
43
  files_paths.append("/".join(listed_path))
@@ -43,7 +52,12 @@ def read_text(
43
52
  ) -> snowpark.DataFrame:
44
53
  # TODO: handle stage name with double quotes
45
54
  files_paths = get_file_paths_from_stage(path, session)
46
- stage_name = path.split("/")[0]
55
+ # Remove matching quotes from both ends of the path to get the stage name, if present.
56
+ if path and len(path) > 1 and path[0] == path[-1] and path[0] in ('"', "'"):
57
+ unquoted_path = path[1:-1]
58
+ else:
59
+ unquoted_path = path
60
+ stage_name = unquoted_path.split("/")[0]
47
61
  line_sep = options.get("lineSep") or "\n"
48
62
  column_name = (
49
63
  schema[0].name if schema is not None and len(schema.fields) > 0 else '"value"'
@@ -59,7 +73,7 @@ def read_text(
59
73
  )
60
74
  for fp in files_paths:
61
75
  content = session.sql(
62
- f"SELECT T.$1 AS {default_column_name} FROM {stage_name}/{fp} (FILE_FORMAT => {text_file_format}) AS T"
76
+ f"SELECT T.$1 AS {default_column_name} FROM '{stage_name}/{fp}' (FILE_FORMAT => {text_file_format}) AS T"
63
77
  ).collect()
64
78
  for row in content:
65
79
  result.append(row[0])
@@ -77,9 +91,11 @@ def map_read_text(
77
91
  """
78
92
  if rel.read.is_streaming is True:
79
93
  # TODO: Structured streaming implementation.
80
- raise SnowparkConnectNotImplementedError(
94
+ exception = SnowparkConnectNotImplementedError(
81
95
  "Streaming is not supported for CSV files."
82
96
  )
97
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
98
+ raise exception
83
99
 
84
100
  df = read_text(paths[0], schema, session, rel.read.data_source.options)
85
101
  if len(paths) > 1:
@@ -102,5 +118,7 @@ def map_read_text(
102
118
  dataframe=renamed_df,
103
119
  spark_column_names=spark_column_names,
104
120
  snowpark_column_names=snowpark_column_names,
105
- snowpark_column_types=[f.datatype for f in df.schema.fields],
121
+ snowpark_column_types=[
122
+ emulate_integral_types(f.datatype) for f in df.schema.fields
123
+ ],
106
124
  )