snowpark-connect 0.30.1__py3-none-any.whl → 0.31.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of snowpark-connect might be problematic. Click here for more details.

Files changed (78) hide show
  1. snowflake/snowpark_connect/column_name_handler.py +150 -25
  2. snowflake/snowpark_connect/config.py +51 -16
  3. snowflake/snowpark_connect/date_time_format_mapping.py +71 -13
  4. snowflake/snowpark_connect/error/error_codes.py +50 -0
  5. snowflake/snowpark_connect/error/error_utils.py +142 -22
  6. snowflake/snowpark_connect/error/exceptions.py +13 -4
  7. snowflake/snowpark_connect/execute_plan/map_execution_command.py +5 -1
  8. snowflake/snowpark_connect/execute_plan/map_execution_root.py +5 -1
  9. snowflake/snowpark_connect/execute_plan/utils.py +5 -1
  10. snowflake/snowpark_connect/expression/function_defaults.py +9 -2
  11. snowflake/snowpark_connect/expression/literal.py +7 -1
  12. snowflake/snowpark_connect/expression/map_cast.py +17 -5
  13. snowflake/snowpark_connect/expression/map_expression.py +48 -4
  14. snowflake/snowpark_connect/expression/map_extension.py +25 -5
  15. snowflake/snowpark_connect/expression/map_sql_expression.py +65 -30
  16. snowflake/snowpark_connect/expression/map_udf.py +10 -2
  17. snowflake/snowpark_connect/expression/map_unresolved_attribute.py +33 -9
  18. snowflake/snowpark_connect/expression/map_unresolved_function.py +627 -205
  19. snowflake/snowpark_connect/expression/map_unresolved_star.py +5 -1
  20. snowflake/snowpark_connect/expression/map_update_fields.py +14 -4
  21. snowflake/snowpark_connect/expression/map_window_function.py +18 -3
  22. snowflake/snowpark_connect/relation/catalogs/abstract_spark_catalog.py +65 -17
  23. snowflake/snowpark_connect/relation/catalogs/snowflake_catalog.py +34 -12
  24. snowflake/snowpark_connect/relation/catalogs/utils.py +12 -4
  25. snowflake/snowpark_connect/relation/io_utils.py +6 -1
  26. snowflake/snowpark_connect/relation/map_catalog.py +5 -1
  27. snowflake/snowpark_connect/relation/map_column_ops.py +88 -56
  28. snowflake/snowpark_connect/relation/map_extension.py +28 -8
  29. snowflake/snowpark_connect/relation/map_join.py +21 -10
  30. snowflake/snowpark_connect/relation/map_local_relation.py +5 -1
  31. snowflake/snowpark_connect/relation/map_relation.py +33 -7
  32. snowflake/snowpark_connect/relation/map_row_ops.py +23 -7
  33. snowflake/snowpark_connect/relation/map_sql.py +91 -24
  34. snowflake/snowpark_connect/relation/map_stats.py +5 -1
  35. snowflake/snowpark_connect/relation/map_udtf.py +14 -4
  36. snowflake/snowpark_connect/relation/read/jdbc_read_dbapi.py +49 -13
  37. snowflake/snowpark_connect/relation/read/map_read.py +15 -3
  38. snowflake/snowpark_connect/relation/read/map_read_csv.py +11 -3
  39. snowflake/snowpark_connect/relation/read/map_read_jdbc.py +17 -5
  40. snowflake/snowpark_connect/relation/read/map_read_json.py +8 -2
  41. snowflake/snowpark_connect/relation/read/map_read_parquet.py +13 -3
  42. snowflake/snowpark_connect/relation/read/map_read_socket.py +11 -3
  43. snowflake/snowpark_connect/relation/read/map_read_table.py +15 -5
  44. snowflake/snowpark_connect/relation/read/map_read_text.py +5 -1
  45. snowflake/snowpark_connect/relation/read/metadata_utils.py +5 -1
  46. snowflake/snowpark_connect/relation/stage_locator.py +5 -1
  47. snowflake/snowpark_connect/relation/write/jdbc_write_dbapi.py +19 -3
  48. snowflake/snowpark_connect/relation/write/map_write.py +131 -34
  49. snowflake/snowpark_connect/relation/write/map_write_jdbc.py +8 -2
  50. snowflake/snowpark_connect/resources_initializer.py +5 -1
  51. snowflake/snowpark_connect/server.py +72 -19
  52. snowflake/snowpark_connect/type_mapping.py +54 -17
  53. snowflake/snowpark_connect/utils/context.py +42 -1
  54. snowflake/snowpark_connect/utils/describe_query_cache.py +3 -0
  55. snowflake/snowpark_connect/utils/env_utils.py +5 -1
  56. snowflake/snowpark_connect/utils/identifiers.py +11 -3
  57. snowflake/snowpark_connect/utils/pandas_udtf_utils.py +8 -4
  58. snowflake/snowpark_connect/utils/profiling.py +25 -8
  59. snowflake/snowpark_connect/utils/scala_udf_utils.py +11 -3
  60. snowflake/snowpark_connect/utils/session.py +5 -1
  61. snowflake/snowpark_connect/utils/telemetry.py +6 -0
  62. snowflake/snowpark_connect/utils/temporary_view_cache.py +5 -1
  63. snowflake/snowpark_connect/utils/udf_cache.py +5 -3
  64. snowflake/snowpark_connect/utils/udf_helper.py +20 -6
  65. snowflake/snowpark_connect/utils/udf_utils.py +4 -4
  66. snowflake/snowpark_connect/utils/udtf_helper.py +5 -1
  67. snowflake/snowpark_connect/utils/udtf_utils.py +34 -26
  68. snowflake/snowpark_connect/version.py +1 -1
  69. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/METADATA +3 -2
  70. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/RECORD +78 -77
  71. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-connect +0 -0
  72. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-session +0 -0
  73. {snowpark_connect-0.30.1.data → snowpark_connect-0.31.0.data}/scripts/snowpark-submit +0 -0
  74. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/WHEEL +0 -0
  75. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE-binary +0 -0
  76. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/LICENSE.txt +0 -0
  77. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/licenses/NOTICE-binary +0 -0
  78. {snowpark_connect-0.30.1.dist-info → snowpark_connect-0.31.0.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,12 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
20
20
  from snowflake.snowpark._internal.utils import quote_name
21
21
  from snowflake.snowpark.types import StructType
22
22
  from snowflake.snowpark_connect.config import global_config
23
- from snowflake.snowpark_connect.utils.context import get_current_operation_scope
23
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
24
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
25
+ from snowflake.snowpark_connect.utils.context import (
26
+ get_current_operation_scope,
27
+ get_is_processing_order_by,
28
+ )
24
29
  from snowflake.snowpark_connect.utils.identifiers import (
25
30
  split_fully_qualified_spark_name,
26
31
  )
@@ -142,7 +147,7 @@ class ColumnNameMap:
142
147
  parent_column_name_map: parent ColumnNameMap
143
148
  """
144
149
  self.columns: list[ColumnNames] = []
145
- self.spark_to_col = defaultdict(list)
150
+ self.spark_to_col: defaultdict[str, list[ColumnNames]] = defaultdict(list)
146
151
  self.uppercase_spark_to_col = defaultdict(list)
147
152
  self.snowpark_to_col = defaultdict(list)
148
153
  self.is_case_sensitive = is_case_sensitive
@@ -353,18 +358,100 @@ class ColumnNameMap:
353
358
 
354
359
  snowpark_names_len = len(snowpark_names)
355
360
  if snowpark_names_len > 1:
356
- raise AnalysisException(
357
- f"Ambiguous spark column name {spark_column_name}, potential snowpark column names {snowpark_names}"
358
- )
361
+ # Check if this is a case where we have identical expressions that can be safely resolved to the first one
362
+ # This commonly happens with GROUP BY expressions that also appear in SELECT clauses
363
+ if (
364
+ get_is_processing_order_by()
365
+ and self._can_resolve_ambiguous_identical_expressions(
366
+ resolved_name, snowpark_names
367
+ )
368
+ ):
369
+ # All the ambiguous columns represent the same expression, so we can safely use the first one
370
+ return snowpark_names[0]
371
+ else:
372
+ exception = AnalysisException(
373
+ f"Ambiguous spark column name {spark_column_name}, potential snowpark column names {snowpark_names}"
374
+ )
375
+ attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
376
+ raise exception
359
377
  elif snowpark_names_len == 0:
360
378
  if allow_non_exists:
361
379
  return None
362
380
  else:
363
- raise AnalysisException(
381
+ exception = AnalysisException(
364
382
  f"Spark column name {spark_column_name} does not exist"
365
383
  )
384
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
385
+ raise exception
366
386
  return snowpark_names[0]
367
387
 
388
+ def _can_resolve_ambiguous_identical_expressions(
389
+ self, spark_column_name: str, snowpark_names: list[str]
390
+ ) -> bool:
391
+ """
392
+ Determine if ambiguous columns represent identical expressions that can be safely resolved to the first one.
393
+
394
+ This handles the common case where the same expression (like a UDF call) appears multiple times
395
+ in a SELECT clause within a GROUP BY query. Since they're the same expression operating on the
396
+ same grouped data, they will have identical values, so we can safely resolve to any of them.
397
+
398
+ Args:
399
+ spark_column_name: The Spark column name that has multiple mappings, make sure resolve this reforehand
400
+ snowpark_names: List of Snowpark column names that map to this Spark column name
401
+
402
+ Returns:
403
+ True if we can safely resolve to the first snowpark column, False otherwise
404
+ """
405
+ if spark_column_name not in self.spark_to_col:
406
+ return False
407
+
408
+ columns = self.spark_to_col[spark_column_name]
409
+
410
+ # If we don't have multiple columns, there's no ambiguity to resolve
411
+ if len(columns) <= 1:
412
+ return False
413
+
414
+ # Check if all the snowpark names correspond to columns that have identical underlying expressions
415
+ # We'll compare the actual column objects to see if they represent the same computation
416
+ first_column = columns[0]
417
+
418
+ for column in columns[1:]:
419
+ # Check snowpark_type attribute
420
+ # If one has the attribute but the other doesn't, they're different
421
+ if hasattr(first_column, "snowpark_type") != hasattr(
422
+ column, "snowpark_type"
423
+ ):
424
+ return False
425
+ # If both have the attribute and values differ, they're different expressions
426
+ if (
427
+ hasattr(first_column, "snowpark_type")
428
+ and hasattr(column, "snowpark_type")
429
+ and first_column.snowpark_type != column.snowpark_type
430
+ ):
431
+ return False
432
+
433
+ # Check qualifiers attribute
434
+ # If one has the attribute but the other doesn't, they're different
435
+ if hasattr(first_column, "qualifiers") != hasattr(column, "qualifiers"):
436
+ return False
437
+ # If both have the attribute and values differ, they might be from different contexts
438
+ if (
439
+ hasattr(first_column, "qualifiers")
440
+ and hasattr(column, "qualifiers")
441
+ and first_column.qualifiers != column.qualifiers
442
+ ):
443
+ return False
444
+
445
+ # Additional safety check: ensure all snowpark names are actually in our mapping
446
+ for snowpark_name in snowpark_names:
447
+ if snowpark_name not in self.snowpark_to_col:
448
+ return False
449
+
450
+ # If we reach here, the columns appear to be identical expressions from the same context
451
+ # This commonly happens in GROUP BY scenarios where the same expression appears in both
452
+ # the grouping clause and the select clause
453
+ return True
454
+
368
455
  def get_spark_column_names_from_snowpark_column_names(
369
456
  self,
370
457
  snowpark_column_names: list[str],
@@ -390,16 +477,20 @@ class ColumnNameMap:
390
477
  )
391
478
  spark_names_len = len(spark_names)
392
479
  if spark_names_len > 1:
393
- raise AnalysisException(
480
+ exception = AnalysisException(
394
481
  f"Ambiguous snowpark column name {snowpark_column_name}, potential spark column names {spark_names}"
395
482
  )
483
+ attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
484
+ raise exception
396
485
  elif spark_names_len == 0:
397
486
  if allow_non_exists:
398
487
  return None
399
488
  else:
400
- raise AnalysisException(
489
+ exception = AnalysisException(
401
490
  f"Snowpark column name {snowpark_column_name} does not exist"
402
491
  )
492
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
493
+ raise exception
403
494
  return spark_names[0]
404
495
 
405
496
  def get_spark_column_name(self, idx: int) -> str:
@@ -604,14 +695,18 @@ class JoinColumnNameMap(ColumnNameMap):
604
695
  if allow_non_exists:
605
696
  return None
606
697
  else:
607
- raise AnalysisException(
698
+ exception = AnalysisException(
608
699
  f"Spark column name {spark_column_name} does not exist in either left or right DataFrame"
609
700
  )
701
+ attach_custom_error_code(exception, ErrorCodes.COLUMN_NOT_FOUND)
702
+ raise exception
610
703
 
611
704
  if (snowpark_column_name_in_right is not None) and (
612
705
  snowpark_column_name_in_left is not None
613
706
  ):
614
- raise AnalysisException(f"Ambiguous column name {spark_column_name}")
707
+ exception = AnalysisException(f"Ambiguous column name {spark_column_name}")
708
+ attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
709
+ raise exception
615
710
 
616
711
  snowpark_name = (
617
712
  snowpark_column_name_in_right
@@ -637,57 +732,85 @@ class JoinColumnNameMap(ColumnNameMap):
637
732
  def get_snowpark_column_names_from_spark_column_names(
638
733
  self, spark_column_names: list[str], return_first: bool = False
639
734
  ) -> list[str]:
640
- raise NotImplementedError("Method not implemented!")
735
+ exception = NotImplementedError("Method not implemented!")
736
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
737
+ raise exception
641
738
 
642
739
  def get_spark_column_names_from_snowpark_column_names(
643
740
  self,
644
741
  snowpark_column_names: list[str],
645
742
  ) -> list[str]:
646
- raise NotImplementedError("Method not implemented!")
743
+ exception = NotImplementedError("Method not implemented!")
744
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
745
+ raise exception
647
746
 
648
747
  def get_spark_column_name_from_snowpark_column_name(
649
748
  self, snowpark_column_name: str
650
749
  ) -> str:
651
- raise NotImplementedError("Method not implemented!")
750
+ exception = NotImplementedError("Method not implemented!")
751
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
752
+ raise exception
652
753
 
653
754
  def get_spark_columns(self) -> list[str]:
654
- raise NotImplementedError("Method not implemented!")
755
+ exception = NotImplementedError("Method not implemented!")
756
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
757
+ raise exception
655
758
 
656
759
  def get_snowpark_columns(self) -> list[str]:
657
- raise NotImplementedError("Method not implemented!")
760
+ exception = NotImplementedError("Method not implemented!")
761
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
762
+ raise exception
658
763
 
659
764
  def get_snowpark_columns_after_drop(self, cols_to_drop: list[str]) -> list[str]:
660
- raise NotImplementedError("Method not implemented!")
765
+ exception = NotImplementedError("Method not implemented!")
766
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
767
+ raise exception
661
768
 
662
769
  def get_renamed_nested_column_name(self, name) -> str | None:
663
- raise NotImplementedError("Method not implemented!")
770
+ exception = NotImplementedError("Method not implemented!")
771
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
772
+ raise exception
664
773
 
665
774
  def has_spark_column(self, spark_column_name: str) -> bool:
666
- raise NotImplementedError("Method not implemented!")
775
+ exception = NotImplementedError("Method not implemented!")
776
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
777
+ raise exception
667
778
 
668
779
  def snowpark_to_spark_map(self) -> dict[str, str]:
669
- raise NotImplementedError("Method not implemented!")
780
+ exception = NotImplementedError("Method not implemented!")
781
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
782
+ raise exception
670
783
 
671
784
  def spark_to_snowpark_for_pattern(self, pattern: str) -> list[tuple[str, str]]:
672
- raise NotImplementedError("Method not implemented!")
785
+ exception = NotImplementedError("Method not implemented!")
786
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
787
+ raise exception
673
788
 
674
789
  def with_columns(
675
790
  self, new_spark_columns: list[str], new_snowpark_columns: list[str]
676
791
  ) -> tuple[list[str], list[str], list[list[str]]]:
677
- raise NotImplementedError("Method not implemented!")
792
+ exception = NotImplementedError("Method not implemented!")
793
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
794
+ raise exception
678
795
 
679
796
  def get_qualifiers(self) -> list[list[str]]:
680
- raise NotImplementedError("Method not implemented!")
797
+ exception = NotImplementedError("Method not implemented!")
798
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
799
+ raise exception
681
800
 
682
801
  def get_qualifiers_for_columns_after_drop(
683
802
  self, cols_to_drop: list[str]
684
803
  ) -> list[list[str]]:
685
- raise NotImplementedError("Method not implemented!")
804
+ exception = NotImplementedError("Method not implemented!")
805
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
806
+ raise exception
686
807
 
687
808
  def get_spark_and_snowpark_columns_with_qualifier_for_qualifier(
688
809
  self, qualifiers_input: list[str]
689
810
  ) -> tuple[list[str], list[str], list[list[str]]]:
690
- raise NotImplementedError("Method not implemented!")
811
+ exception = NotImplementedError("Method not implemented!")
812
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
813
+ raise exception
691
814
 
692
815
  def get_qualifier_for_spark_column(self, spark_column_name: str) -> list[str]:
693
816
 
@@ -699,6 +822,8 @@ class JoinColumnNameMap(ColumnNameMap):
699
822
  )
700
823
 
701
824
  if (len(qualifier_left) > 0) and (len(qualifier_right) > 0):
702
- raise AnalysisException(f"Ambiguous column name {spark_column_name}")
825
+ exception = AnalysisException(f"Ambiguous column name {spark_column_name}")
826
+ attach_custom_error_code(exception, ErrorCodes.AMBIGUOUS_COLUMN_NAME)
827
+ raise exception
703
828
 
704
829
  return qualifier_right if len(qualifier_left) == 0 else qualifier_left
@@ -21,8 +21,13 @@ from snowflake.snowpark._internal.analyzer.analyzer_utils import (
21
21
  )
22
22
  from snowflake.snowpark.exceptions import SnowparkSQLException
23
23
  from snowflake.snowpark.types import TimestampTimeZone, TimestampType
24
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
25
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
24
26
  from snowflake.snowpark_connect.utils.concurrent import SynchronizedDict
25
- from snowflake.snowpark_connect.utils.context import get_session_id
27
+ from snowflake.snowpark_connect.utils.context import (
28
+ get_jpype_jclass_lock,
29
+ get_session_id,
30
+ )
26
31
  from snowflake.snowpark_connect.utils.external_udxf_cache import (
27
32
  clear_external_udxf_cache,
28
33
  )
@@ -291,7 +296,6 @@ class SessionConfig:
291
296
  """This class contains the session configuration for the Spark Server."""
292
297
 
293
298
  default_session_config = {
294
- "snowpark.connect.sql.identifiers.auto-uppercase": "all_except_columns",
295
299
  "snowpark.connect.sql.passthrough": "false",
296
300
  "snowpark.connect.cte.optimization_enabled": "false",
297
301
  "snowpark.connect.udtf.compatibility_mode": "false",
@@ -359,9 +363,11 @@ def route_config_proto(
359
363
  if not pair.HasField("value"):
360
364
  from pyspark.errors import IllegalArgumentException
361
365
 
362
- raise IllegalArgumentException(
366
+ exception = IllegalArgumentException(
363
367
  f"Cannot set config '{pair.key}' to None"
364
368
  )
369
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
370
+ raise exception
365
371
 
366
372
  set_config_param(
367
373
  config.session_id, pair.key, pair.value, snowpark_session
@@ -444,7 +450,11 @@ def route_config_proto(
444
450
  pair.value = str(global_config.is_modifiable(key)).lower()
445
451
  return res
446
452
  case _:
447
- raise SnowparkConnectNotImplementedError(f"Unexpected request {config}")
453
+ exception = SnowparkConnectNotImplementedError(
454
+ f"Unexpected request {config}"
455
+ )
456
+ attach_custom_error_code(exception, ErrorCodes.UNSUPPORTED_OPERATION)
457
+ raise exception
448
458
 
449
459
 
450
460
  def set_config_param(
@@ -484,19 +494,27 @@ def _verify_static_config_not_modified(key: str) -> None:
484
494
  # https://github.com/apache/spark/blob/v3.5.3/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala#L161
485
495
  # Spark does not allow to modify static configurations at runtime.
486
496
  if global_config.is_static_config(key) and global_config.is_set(key):
487
- raise ValueError(f"Cannot modify the value of a static config: {key}")
497
+ exception = ValueError(f"Cannot modify the value of a static config: {key}")
498
+ attach_custom_error_code(exception, ErrorCodes.CONFIG_CHANGE_NOT_ALLOWED)
499
+ raise exception
488
500
 
489
501
 
490
502
  def _verify_is_valid_config_value(key: str, value: Any) -> None:
491
503
  if key in CONFIG_ALLOWED_VALUES and value not in CONFIG_ALLOWED_VALUES[key]:
492
- raise ValueError(
504
+ exception = ValueError(
493
505
  f"Invalid value '{value}' for key '{key}'. Allowed values: {', '.join(CONFIG_ALLOWED_VALUES[key])}."
494
506
  )
507
+ attach_custom_error_code(exception, ErrorCodes.INVALID_CONFIG_VALUE)
508
+ raise exception
495
509
 
496
510
 
497
511
  def _verify_is_not_readonly_config(key):
498
512
  if key in global_config.readonly_config_list:
499
- raise ValueError(f"Config with key {key} is read-only and cannot be modified.")
513
+ exception = ValueError(
514
+ f"Config with key {key} is read-only and cannot be modified."
515
+ )
516
+ attach_custom_error_code(exception, ErrorCodes.CONFIG_CHANGE_NOT_ALLOWED)
517
+ raise exception
500
518
 
501
519
 
502
520
  def set_jvm_timezone(timezone_id: str):
@@ -513,10 +531,13 @@ def set_jvm_timezone(timezone_id: str):
513
531
  RuntimeError: If JVM is not started
514
532
  """
515
533
  if not jpype.isJVMStarted():
516
- raise RuntimeError("JVM must be started before setting timezone")
534
+ exception = RuntimeError("JVM must be started before setting timezone")
535
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
536
+ raise exception
517
537
 
518
538
  try:
519
- TimeZone = jpype.JClass("java.util.TimeZone")
539
+ with get_jpype_jclass_lock():
540
+ TimeZone = jpype.JClass("java.util.TimeZone")
520
541
  new_timezone = TimeZone.getTimeZone(timezone_id)
521
542
  TimeZone.setDefault(new_timezone)
522
543
 
@@ -528,7 +549,9 @@ def set_jvm_timezone(timezone_id: str):
528
549
  def reset_jvm_timezone_to_system_default():
529
550
  """Reset JVM timezone to the system's default timezone"""
530
551
  if not jpype.isJVMStarted():
531
- raise RuntimeError("JVM must be started first")
552
+ exception = RuntimeError("JVM must be started first")
553
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
554
+ raise exception
532
555
 
533
556
  try:
534
557
  TimeZone = jpype.JClass("java.util.TimeZone")
@@ -537,9 +560,13 @@ def reset_jvm_timezone_to_system_default():
537
560
  f"Reset JVM timezone to system default: {TimeZone.getDefault().getID()}"
538
561
  )
539
562
  except jpype.JException as e:
540
- raise RuntimeError(f"Java exception while resetting timezone: {e}")
563
+ exception = RuntimeError(f"Java exception while resetting timezone: {e}")
564
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
565
+ raise exception
541
566
  except Exception as e:
542
- raise RuntimeError(f"Unexpected error resetting JVM timezone: {e}")
567
+ exception = RuntimeError(f"Unexpected error resetting JVM timezone: {e}")
568
+ attach_custom_error_code(exception, ErrorCodes.INTERNAL_ERROR)
569
+ raise exception
543
570
 
544
571
 
545
572
  def set_snowflake_parameters(
@@ -629,16 +656,24 @@ def get_describe_cache_ttl_seconds() -> int:
629
656
 
630
657
  def auto_uppercase_column_identifiers() -> bool:
631
658
  session_config = sessions_config[get_session_id()]
632
- return session_config[
659
+ auto_upper_case_config = session_config[
633
660
  "snowpark.connect.sql.identifiers.auto-uppercase"
634
- ].lower() in ("all", "only_columns")
661
+ ]
662
+ if auto_upper_case_config:
663
+ return auto_upper_case_config.lower() in ("all", "only_columns")
664
+
665
+ return not global_config.spark_sql_caseSensitive
635
666
 
636
667
 
637
668
  def auto_uppercase_non_column_identifiers() -> bool:
638
669
  session_config = sessions_config[get_session_id()]
639
- return session_config[
670
+ auto_upper_case_config = session_config[
640
671
  "snowpark.connect.sql.identifiers.auto-uppercase"
641
- ].lower() in ("all", "all_except_columns")
672
+ ]
673
+ if auto_upper_case_config:
674
+ return auto_upper_case_config.lower() in ("all", "all_except_columns")
675
+
676
+ return not global_config.spark_sql_caseSensitive
642
677
 
643
678
 
644
679
  def parse_imports(session: snowpark.Session, imports: str | None) -> None:
@@ -24,6 +24,8 @@
24
24
  from pyspark.errors.exceptions.base import DateTimeException
25
25
 
26
26
  from snowflake.snowpark.types import DataType, StringType
27
+ from snowflake.snowpark_connect.error.error_codes import ErrorCodes
28
+ from snowflake.snowpark_connect.error.error_utils import attach_custom_error_code
27
29
 
28
30
 
29
31
  # TODO: There are more patterns where spark may throw an error.
@@ -229,9 +231,11 @@ def convert_spark_format_to_snowflake(
229
231
  timestamp_input_type: DataType | None = None,
230
232
  ):
231
233
  if spark_format in {"Y", "w", "W"}:
232
- raise DateTimeException(
234
+ exception = DateTimeException(
233
235
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter."
234
236
  )
237
+ attach_custom_error_code(exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT)
238
+ raise exception
235
239
  snowflake_format = ""
236
240
  i = 0
237
241
  n = len(spark_format)
@@ -299,28 +303,46 @@ def convert_spark_format_to_snowflake(
299
303
  # Spark's 'a' would be at most 1 times
300
304
  is_valid_a_pattern = spark_format[i : i + 2] != char * 2
301
305
  if not is_valid_a_pattern:
302
- raise DateTimeException(
306
+ exception = DateTimeException(
303
307
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
304
308
  )
309
+ attach_custom_error_code(
310
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
311
+ )
312
+ raise exception
305
313
  case "h" | "K" | "k" | "H" | "m" | "s" | "d":
306
314
  # Spark's characters would be at most 2 times
307
315
  is_valid_2_patterns = spark_format[i : i + 3] != char * 3
308
316
  if not is_valid_2_patterns:
309
- raise DateTimeException(
317
+ exception = DateTimeException(
310
318
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
311
319
  )
320
+ attach_custom_error_code(
321
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
322
+ )
323
+ raise exception
312
324
  case "D":
313
325
  # Spark's 'D'' would be at most 3 times
314
326
  is_valid_D_patterns = spark_format[i : i + 4] != char * 4
315
327
  if not is_valid_D_patterns:
316
- raise DateTimeException(
328
+ exception = DateTimeException(
317
329
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
318
330
  )
331
+ attach_custom_error_code(
332
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
333
+ )
334
+ raise exception
319
335
  case "V":
320
336
  # Spark's 'V' for Zone ID requires 'VV'. A single 'V' is invalid.
321
337
  is_valid_vv_pattern = spark_format[i : i + 2] == "VV"
322
338
  if not is_valid_vv_pattern:
323
- raise DateTimeException("Pattern letter count must be 2: V")
339
+ exception = DateTimeException(
340
+ "Pattern letter count must be 2: V"
341
+ )
342
+ attach_custom_error_code(
343
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
344
+ )
345
+ raise exception
324
346
  case "O":
325
347
  # Spark's 'O' would be either 1 or 4.
326
348
  is_valid_o_or_oooo_pattern = spark_format[i : i + 2] != "OO" or (
@@ -328,28 +350,52 @@ def convert_spark_format_to_snowflake(
328
350
  and spark_format[i : i + 5] != "OOOOO"
329
351
  )
330
352
  if not is_valid_o_or_oooo_pattern:
331
- raise DateTimeException(
353
+ exception = DateTimeException(
332
354
  "Pattern letter count must be 1 or 4: O"
333
355
  )
356
+ attach_custom_error_code(
357
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
358
+ )
359
+ raise exception
334
360
  case "q" | "Q" | "z" | "E":
335
361
  # Spark's characters would be at most 4 times
336
362
  is_valid_4_patterns = spark_format[i : i + 5] != char * 5
337
363
  if not is_valid_4_patterns:
338
- raise DateTimeException(f"Too many pattern letters: {char}")
364
+ exception = DateTimeException(
365
+ f"Too many pattern letters: {char}"
366
+ )
367
+ attach_custom_error_code(
368
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
369
+ )
370
+ raise exception
339
371
  case "x" | "X" | "Z":
340
372
  # Spark's 'x' or 'X' or 'z' or 'Z' would be at most 5 times
341
373
  is_valid_xz_pattern = spark_format[i : i + 6] != char * 6
342
374
  if not is_valid_xz_pattern:
343
- raise DateTimeException(f"Too many pattern letters: {char}")
375
+ exception = DateTimeException(
376
+ f"Too many pattern letters: {char}"
377
+ )
378
+ attach_custom_error_code(
379
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
380
+ )
381
+ raise exception
344
382
  case "y":
345
383
  # Spark's 'y' would be at most 6 times
346
384
  is_valid_y_pattern = spark_format[i : i + 7] != char * 7
347
385
  if not is_valid_y_pattern:
348
- raise DateTimeException(
386
+ exception = DateTimeException(
349
387
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter"
350
388
  )
389
+ attach_custom_error_code(
390
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
391
+ )
392
+ raise exception
351
393
  case "C" | "I":
352
- raise DateTimeException(f"Unknown pattern letter: {char}")
394
+ exception = DateTimeException(f"Unknown pattern letter: {char}")
395
+ attach_custom_error_code(
396
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
397
+ )
398
+ raise exception
353
399
 
354
400
  if (
355
401
  spark_format[i : i + 5] in {"M" * 5, "L" * 5}
@@ -362,9 +408,13 @@ def convert_spark_format_to_snowflake(
362
408
  or spark_format[i : i + 3] in {"kkk", "KKK"}
363
409
  or spark_format[i : i + 10] == "SSSSSSSSSS"
364
410
  ):
365
- raise DateTimeException(
411
+ exception = DateTimeException(
366
412
  f"Fail to recognize '{spark_format}' pattern in the DateTimeFormatter."
367
413
  )
414
+ attach_custom_error_code(
415
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
416
+ )
417
+ raise exception
368
418
 
369
419
  matched_pattern = False
370
420
 
@@ -375,7 +425,11 @@ def convert_spark_format_to_snowflake(
375
425
  spark_key
376
426
  ]
377
427
  if isinstance(snowflake_equivalent, _UnsupportedSparkFormatPattern):
378
- raise DateTimeException(snowflake_equivalent.message)
428
+ exception = DateTimeException(snowflake_equivalent.message)
429
+ attach_custom_error_code(
430
+ exception, ErrorCodes.UNSUPPORTED_OPERATION
431
+ )
432
+ raise exception
379
433
  if snowflake_equivalent is not None:
380
434
  snowflake_format += snowflake_equivalent
381
435
  i += len(spark_key)
@@ -389,7 +443,11 @@ def convert_spark_format_to_snowflake(
389
443
  isinstance(timestamp_input_type, StringType)
390
444
  and char not in snowflake_time_format_separator
391
445
  ):
392
- raise DateTimeException(f"Illegal pattern character: {char}")
446
+ exception = DateTimeException(f"Illegal pattern character: {char}")
447
+ attach_custom_error_code(
448
+ exception, ErrorCodes.INVALID_FUNCTION_ARGUMENT
449
+ )
450
+ raise exception
393
451
 
394
452
  snowflake_format += f'"{char}"'
395
453
  i += 1
@@ -0,0 +1,50 @@
1
+ #
2
+ # Copyright (c) 2012-2025 Snowflake Computing Inc. All rights reserved.
3
+ #
4
+
5
+ """
6
+ Error code constants for Snowpark Connect.
7
+
8
+ This module defines custom error codes that can be attached to exceptions
9
+ and included in gRPC error responses.
10
+ """
11
+
12
+
13
+ class ErrorCodes:
14
+ """Constants for Snowpark Connect custom error codes."""
15
+
16
+ # 1000-1999: Startup related errors
17
+ MISSING_DATABASE = 1001
18
+ MISSING_SCHEMA = 1002
19
+ RESOURCE_INITIALIZATION_FAILED = 1003
20
+ TCP_PORT_ALREADY_IN_USE = 1004
21
+ INVALID_SPARK_CONNECT_URL = 1005
22
+ INVALID_STARTUP_INPUT = 1006
23
+ INVALID_STARTUP_OPERATION = 1007
24
+ STARTUP_CONNECTION_FAILED = 1008
25
+
26
+ # 2000-2999: Configuration related errors
27
+ INVALID_CONFIG_VALUE = 2001
28
+ CONFIG_CHANGE_NOT_ALLOWED = 2002
29
+ CONFIG_NOT_ENABLED = 2003
30
+
31
+ # 3000-3999: User code errors
32
+ INVALID_SQL_SYNTAX = 3001
33
+ TYPE_MISMATCH = 3002
34
+ INVALID_CAST = 3003
35
+ INVALID_FUNCTION_ARGUMENT = 3004
36
+ ARRAY_INDEX_OUT_OF_BOUNDS = 3005
37
+ DIVISION_BY_ZERO = 3006
38
+ INVALID_INPUT = 3007
39
+ INVALID_OPERATION = 3008
40
+ INSUFFICIENT_INPUT = 3009
41
+
42
+ # 4000-4999: What we don't support
43
+ UNSUPPORTED_OPERATION = 4001
44
+ UNSUPPORTED_TYPE = 4002
45
+
46
+ # 5000-5999: Internal errors
47
+ INTERNAL_ERROR = 5001
48
+ TABLE_NOT_FOUND = 5002
49
+ COLUMN_NOT_FOUND = 5003
50
+ AMBIGUOUS_COLUMN_NAME = 5004