snowpark-connect 0.32.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (106) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +91 -40
  2. snowflake/snowpark_connect/column_qualifier.py +0 -4
  3. snowflake/snowpark_connect/config.py +9 -0
  4. snowflake/snowpark_connect/expression/hybrid_column_map.py +5 -4
  5. snowflake/snowpark_connect/expression/literal.py +12 -12
  6. snowflake/snowpark_connect/expression/map_sql_expression.py +18 -4
  7. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +150 -29
  8. snowflake/snowpark_connect/expression/map_unresolved_function.py +93 -55
  9. snowflake/snowpark_connect/relation/map_aggregate.py +156 -257
  10. snowflake/snowpark_connect/relation/map_column_ops.py +19 -0
  11. snowflake/snowpark_connect/relation/map_join.py +454 -252
  12. snowflake/snowpark_connect/relation/map_row_ops.py +136 -54
  13. snowflake/snowpark_connect/relation/map_sql.py +335 -90
  14. snowflake/snowpark_connect/relation/read/map_read.py +9 -1
  15. snowflake/snowpark_connect/relation/read/map_read_csv.py +19 -2
  16. snowflake/snowpark_connect/relation/read/map_read_json.py +90 -2
  17. snowflake/snowpark_connect/relation/read/map_read_parquet.py +3 -0
  18. snowflake/snowpark_connect/relation/read/map_read_text.py +4 -0
  19. snowflake/snowpark_connect/relation/read/reader_config.py +10 -0
  20. snowflake/snowpark_connect/relation/read/utils.py +41 -0
  21. snowflake/snowpark_connect/relation/utils.py +50 -2
  22. snowflake/snowpark_connect/relation/write/map_write.py +251 -292
  23. snowflake/snowpark_connect/resources_initializer.py +25 -13
  24. snowflake/snowpark_connect/server.py +9 -24
  25. snowflake/snowpark_connect/type_mapping.py +2 -0
  26. snowflake/snowpark_connect/typed_column.py +2 -2
  27. snowflake/snowpark_connect/utils/context.py +0 -14
  28. snowflake/snowpark_connect/utils/expression_transformer.py +163 -0
  29. snowflake/snowpark_connect/utils/sequence.py +21 -0
  30. snowflake/snowpark_connect/utils/session.py +4 -1
  31. snowflake/snowpark_connect/utils/udf_helper.py +1 -0
  32. snowflake/snowpark_connect/utils/udtf_helper.py +3 -0
  33. snowflake/snowpark_connect/version.py +1 -1
  34. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/METADATA +4 -2
  35. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/RECORD +43 -104
  36. snowflake/snowpark_connect/includes/jars/antlr4-runtime-4.9.3.jar +0 -0
  37. snowflake/snowpark_connect/includes/jars/commons-cli-1.5.0.jar +0 -0
  38. snowflake/snowpark_connect/includes/jars/commons-codec-1.16.1.jar +0 -0
  39. snowflake/snowpark_connect/includes/jars/commons-collections-3.2.2.jar +0 -0
  40. snowflake/snowpark_connect/includes/jars/commons-collections4-4.4.jar +0 -0
  41. snowflake/snowpark_connect/includes/jars/commons-compiler-3.1.9.jar +0 -0
  42. snowflake/snowpark_connect/includes/jars/commons-compress-1.26.0.jar +0 -0
  43. snowflake/snowpark_connect/includes/jars/commons-crypto-1.1.0.jar +0 -0
  44. snowflake/snowpark_connect/includes/jars/commons-dbcp-1.4.jar +0 -0
  45. snowflake/snowpark_connect/includes/jars/commons-io-2.16.1.jar +0 -0
  46. snowflake/snowpark_connect/includes/jars/commons-lang-2.6.jar +0 -0
  47. snowflake/snowpark_connect/includes/jars/commons-lang3-3.12.0.jar +0 -0
  48. snowflake/snowpark_connect/includes/jars/commons-logging-1.1.3.jar +0 -0
  49. snowflake/snowpark_connect/includes/jars/commons-math3-3.6.1.jar +0 -0
  50. snowflake/snowpark_connect/includes/jars/commons-pool-1.5.4.jar +0 -0
  51. snowflake/snowpark_connect/includes/jars/commons-text-1.10.0.jar +0 -0
  52. snowflake/snowpark_connect/includes/jars/hadoop-client-api-trimmed-3.3.4.jar +0 -0
  53. snowflake/snowpark_connect/includes/jars/jackson-annotations-2.15.2.jar +0 -0
  54. snowflake/snowpark_connect/includes/jars/jackson-core-2.15.2.jar +0 -0
  55. snowflake/snowpark_connect/includes/jars/jackson-core-asl-1.9.13.jar +0 -0
  56. snowflake/snowpark_connect/includes/jars/jackson-databind-2.15.2.jar +0 -0
  57. snowflake/snowpark_connect/includes/jars/jackson-dataformat-yaml-2.15.2.jar +0 -0
  58. snowflake/snowpark_connect/includes/jars/jackson-datatype-jsr310-2.15.2.jar +0 -0
  59. snowflake/snowpark_connect/includes/jars/jackson-module-scala_2.12-2.15.2.jar +0 -0
  60. snowflake/snowpark_connect/includes/jars/json4s-ast_2.12-3.7.0-M11.jar +0 -0
  61. snowflake/snowpark_connect/includes/jars/json4s-core_2.12-3.7.0-M11.jar +0 -0
  62. snowflake/snowpark_connect/includes/jars/json4s-jackson_2.12-3.7.0-M11.jar +0 -0
  63. snowflake/snowpark_connect/includes/jars/json4s-native_2.12-3.7.0-M11.jar +0 -0
  64. snowflake/snowpark_connect/includes/jars/json4s-scalap_2.12-3.7.0-M11.jar +0 -0
  65. snowflake/snowpark_connect/includes/jars/kryo-shaded-4.0.2.jar +0 -0
  66. snowflake/snowpark_connect/includes/jars/log4j-1.2-api-2.20.0.jar +0 -0
  67. snowflake/snowpark_connect/includes/jars/log4j-api-2.20.0.jar +0 -0
  68. snowflake/snowpark_connect/includes/jars/log4j-core-2.20.0.jar +0 -0
  69. snowflake/snowpark_connect/includes/jars/log4j-slf4j2-impl-2.20.0.jar +0 -0
  70. snowflake/snowpark_connect/includes/jars/paranamer-2.8.3.jar +0 -0
  71. snowflake/snowpark_connect/includes/jars/paranamer-2.8.jar +0 -0
  72. snowflake/snowpark_connect/includes/jars/sas-scala-udf_2.12-0.1.0.jar +0 -0
  73. snowflake/snowpark_connect/includes/jars/scala-collection-compat_2.12-2.7.0.jar +0 -0
  74. snowflake/snowpark_connect/includes/jars/scala-library-2.12.18.jar +0 -0
  75. snowflake/snowpark_connect/includes/jars/scala-parser-combinators_2.12-2.3.0.jar +0 -0
  76. snowflake/snowpark_connect/includes/jars/scala-reflect-2.12.18.jar +0 -0
  77. snowflake/snowpark_connect/includes/jars/scala-xml_2.12-2.1.0.jar +0 -0
  78. snowflake/snowpark_connect/includes/jars/slf4j-api-2.0.7.jar +0 -0
  79. snowflake/snowpark_connect/includes/jars/spark-catalyst_2.12-3.5.6.jar +0 -0
  80. snowflake/snowpark_connect/includes/jars/spark-common-utils_2.12-3.5.6.jar +0 -0
  81. snowflake/snowpark_connect/includes/jars/spark-connect-client-jvm_2.12-3.5.6.jar +0 -0
  82. snowflake/snowpark_connect/includes/jars/spark-core_2.12-3.5.6.jar +0 -0
  83. snowflake/snowpark_connect/includes/jars/spark-graphx_2.12-3.5.6.jar +0 -0
  84. snowflake/snowpark_connect/includes/jars/spark-hive-thriftserver_2.12-3.5.6.jar +0 -0
  85. snowflake/snowpark_connect/includes/jars/spark-hive_2.12-3.5.6.jar +0 -0
  86. snowflake/snowpark_connect/includes/jars/spark-kvstore_2.12-3.5.6.jar +0 -0
  87. snowflake/snowpark_connect/includes/jars/spark-launcher_2.12-3.5.6.jar +0 -0
  88. snowflake/snowpark_connect/includes/jars/spark-mesos_2.12-3.5.6.jar +0 -0
  89. snowflake/snowpark_connect/includes/jars/spark-mllib-local_2.12-3.5.6.jar +0 -0
  90. snowflake/snowpark_connect/includes/jars/spark-network-common_2.12-3.5.6.jar +0 -0
  91. snowflake/snowpark_connect/includes/jars/spark-network-shuffle_2.12-3.5.6.jar +0 -0
  92. snowflake/snowpark_connect/includes/jars/spark-repl_2.12-3.5.6.jar +0 -0
  93. snowflake/snowpark_connect/includes/jars/spark-sketch_2.12-3.5.6.jar +0 -0
  94. snowflake/snowpark_connect/includes/jars/spark-sql-api_2.12-3.5.6.jar +0 -0
  95. snowflake/snowpark_connect/includes/jars/spark-sql_2.12-3.5.6.jar +0 -0
  96. snowflake/snowpark_connect/includes/jars/spark-tags_2.12-3.5.6.jar +0 -0
  97. snowflake/snowpark_connect/includes/jars/spark-unsafe_2.12-3.5.6.jar +0 -0
  98. snowflake/snowpark_connect/includes/jars/spark-yarn_2.12-3.5.6.jar +0 -0
  99. {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-connect +0 -0
  100. {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-session +0 -0
  101. {snowpark_connect-0.32.0.data → snowpark_connect-1.0.0.data}/scripts/snowpark-submit +0 -0
  102. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/WHEEL +0 -0
  103. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/LICENSE-binary +0 -0
  104. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/LICENSE.txt +0 -0
  105. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/licenses/NOTICE-binary +0 -0
  106. {snowpark_connect-0.32.0.dist-info → snowpark_connect-1.0.0.dist-info}/top_level.txt +0 -0
@@ -45,6 +45,61 @@ from snowflake.snowpark_connect.utils.telemetry import (
45
45
  )
46
46
 
47
47
 
48
+ def cast_columns(
49
+ df_container: DataFrameContainer,
50
+ df_dtypes: list[snowpark.types.DataType],
51
+ target_dtypes: list[snowpark.types.DataType],
52
+ column_map: ColumnNameMap,
53
+ ):
54
+ df: snowpark.DataFrame = df_container.dataframe
55
+ if df_dtypes == target_dtypes:
56
+ return df_container
57
+ # Use cached schema if available to avoid triggering extra queries
58
+ if (
59
+ hasattr(df_container, "cached_schema_getter")
60
+ and df_container.cached_schema_getter is not None
61
+ ):
62
+ df_schema = df_container.cached_schema_getter()
63
+ else:
64
+ df_schema = df.schema # Get current schema
65
+ new_columns = []
66
+
67
+ for i, field in enumerate(df_schema.fields):
68
+ col_name = field.name
69
+ current_type = field.datatype
70
+ target_type = target_dtypes[i]
71
+
72
+ if current_type != target_type:
73
+ new_columns.append(df[col_name].cast(target_type).alias(col_name))
74
+ else:
75
+ new_columns.append(df[col_name])
76
+
77
+ new_df = df.select(new_columns)
78
+ return DataFrameContainer.create_with_column_mapping(
79
+ dataframe=new_df,
80
+ spark_column_names=column_map.get_spark_columns(),
81
+ snowpark_column_names=column_map.get_snowpark_columns(),
82
+ snowpark_column_types=target_dtypes,
83
+ column_metadata=column_map.column_metadata,
84
+ parent_column_name_map=column_map,
85
+ )
86
+
87
+
88
+ def get_schema_from_result(
89
+ result: DataFrameContainer,
90
+ ) -> StructType:
91
+ """
92
+ Get schema from a DataFrameContainer, using cached schema if available to avoid extra queries.
93
+ """
94
+ if (
95
+ hasattr(result, "cached_schema_getter")
96
+ and result.cached_schema_getter is not None
97
+ ):
98
+ return result.cached_schema_getter()
99
+ else:
100
+ return result.dataframe.schema
101
+
102
+
48
103
  def map_deduplicate(
49
104
  rel: relation_proto.Relation,
50
105
  ) -> DataFrameContainer:
@@ -205,21 +260,8 @@ def map_union(
205
260
 
206
261
  # workaround for unstructured type vs structured type
207
262
  # Use cached schema if available to avoid triggering extra queries
208
- if (
209
- hasattr(left_result, "cached_schema_getter")
210
- and left_result.cached_schema_getter is not None
211
- ):
212
- left_schema = left_result.cached_schema_getter()
213
- else:
214
- left_schema = left_df.schema
215
-
216
- if (
217
- hasattr(right_result, "cached_schema_getter")
218
- and right_result.cached_schema_getter is not None
219
- ):
220
- right_schema = right_result.cached_schema_getter()
221
- else:
222
- right_schema = right_df.schema
263
+ left_schema = get_schema_from_result(left_result)
264
+ right_schema = get_schema_from_result(right_result)
223
265
 
224
266
  left_dtypes = [field.datatype for field in left_schema.fields]
225
267
  right_dtypes = [field.datatype for field in right_schema.fields]
@@ -257,6 +299,29 @@ def map_union(
257
299
  # Union of any type with null type is of the other type
258
300
  target_left_dtypes.append(other_t)
259
301
  target_right_dtypes.append(other_t)
302
+ case (snowpark.types.DecimalType(), snowpark.types.DecimalType()):
303
+ # Widen decimal types to accommodate both sides
304
+ # Calculate the maximum scale and maximum integer digits
305
+ left_integer_digits = left_type.precision - left_type.scale
306
+ right_integer_digits = right_type.precision - right_type.scale
307
+
308
+ # The common type needs to accommodate:
309
+ # - The maximum number of digits after the decimal point (scale)
310
+ # - The maximum number of digits before the decimal point (integer digits)
311
+ common_scale = max(left_type.scale, right_type.scale)
312
+ common_integer_digits = max(
313
+ left_integer_digits, right_integer_digits
314
+ )
315
+ common_precision = min(38, common_scale + common_integer_digits)
316
+
317
+ # Ensure scale doesn't exceed precision
318
+ common_scale = min(common_scale, common_precision)
319
+
320
+ common_type = snowpark.types.DecimalType(
321
+ common_precision, common_scale
322
+ )
323
+ target_left_dtypes.append(common_type)
324
+ target_right_dtypes.append(common_type)
260
325
  case (snowpark.types.BooleanType(), _) | (
261
326
  _,
262
327
  snowpark.types.BooleanType(),
@@ -272,49 +337,24 @@ def map_union(
272
337
  raise exception
273
338
  target_left_dtypes.append(left_type)
274
339
  target_right_dtypes.append(right_type)
340
+ case (
341
+ snowpark.types.TimestampType()
342
+ | snowpark.types.DateType()
343
+ | snowpark.types._NumericType(),
344
+ snowpark.types.StringType(),
345
+ ) | (
346
+ snowpark.types.StringType(),
347
+ snowpark.types.TimestampType()
348
+ | snowpark.types.DateType()
349
+ | snowpark.types._NumericType(),
350
+ ) if not spark_sql_ansi_enabled:
351
+ common_type = snowpark.types.StringType()
352
+ target_left_dtypes.append(common_type)
353
+ target_right_dtypes.append(common_type)
275
354
  case _:
276
355
  target_left_dtypes.append(left_type)
277
356
  target_right_dtypes.append(right_type)
278
357
 
279
- def cast_columns(
280
- df_container: DataFrameContainer,
281
- df_dtypes: list[snowpark.types.DataType],
282
- target_dtypes: list[snowpark.types.DataType],
283
- column_map: ColumnNameMap,
284
- ):
285
- df: snowpark.DataFrame = df_container.dataframe
286
- if df_dtypes == target_dtypes:
287
- return df_container
288
- # Use cached schema if available to avoid triggering extra queries
289
- if (
290
- hasattr(df_container, "cached_schema_getter")
291
- and df_container.cached_schema_getter is not None
292
- ):
293
- df_schema = df_container.cached_schema_getter()
294
- else:
295
- df_schema = df.schema # Get current schema
296
- new_columns = []
297
-
298
- for i, field in enumerate(df_schema.fields):
299
- col_name = field.name
300
- current_type = field.datatype
301
- target_type = target_dtypes[i]
302
-
303
- if current_type != target_type:
304
- new_columns.append(df[col_name].cast(target_type).alias(col_name))
305
- else:
306
- new_columns.append(df[col_name])
307
-
308
- new_df = df.select(new_columns)
309
- return DataFrameContainer.create_with_column_mapping(
310
- dataframe=new_df,
311
- spark_column_names=column_map.get_spark_columns(),
312
- snowpark_column_names=column_map.get_snowpark_columns(),
313
- snowpark_column_types=target_dtypes,
314
- column_metadata=column_map.column_metadata,
315
- parent_column_name_map=column_map,
316
- )
317
-
318
358
  left_result = cast_columns(
319
359
  left_result,
320
360
  left_dtypes,
@@ -527,6 +567,48 @@ def map_except(
527
567
  left_df = left_result.dataframe
528
568
  right_df = right_result.dataframe
529
569
 
570
+ # workaround for unstructured type vs structured type
571
+ # Use cached schema if available to avoid triggering extra queries
572
+ left_schema = get_schema_from_result(left_result)
573
+ right_schema = get_schema_from_result(right_result)
574
+
575
+ left_dtypes = [field.datatype for field in left_schema.fields]
576
+ right_dtypes = [field.datatype for field in right_schema.fields]
577
+
578
+ if left_dtypes != right_dtypes and not rel.set_op.by_name:
579
+ if len(left_dtypes) != len(right_dtypes):
580
+ exception = AnalysisException("UNION: the number of columns must match")
581
+ attach_custom_error_code(exception, ErrorCodes.INVALID_OPERATION)
582
+ raise exception
583
+ target_left_dtypes, target_right_dtypes = [], []
584
+ for left_type, right_type in zip(left_dtypes, right_dtypes):
585
+ match (left_type, right_type):
586
+ case (snowpark.types._NumericType(), snowpark.types.StringType()) | (
587
+ snowpark.types.StringType(),
588
+ snowpark.types._NumericType(),
589
+ ):
590
+ common_type = snowpark.types.StringType()
591
+ target_left_dtypes.append(common_type)
592
+ target_right_dtypes.append(common_type)
593
+ case _:
594
+ target_left_dtypes.append(left_type)
595
+ target_right_dtypes.append(right_type)
596
+
597
+ left_result = cast_columns(
598
+ left_result,
599
+ left_dtypes,
600
+ target_left_dtypes,
601
+ left_result.column_map,
602
+ )
603
+ right_result = cast_columns(
604
+ right_result,
605
+ right_dtypes,
606
+ target_right_dtypes,
607
+ right_result.column_map,
608
+ )
609
+ left_df = left_result.dataframe
610
+ right_df = right_result.dataframe
611
+
530
612
  if rel.set_op.is_all:
531
613
  # Snowflake except removes all duplicated rows. In order to handle the case,
532
614
  # we add a partition row number column to the df to make duplicated rows unique to