spark-utils 2.0.1__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {spark_utils-2.0.1 → spark_utils-2.0.2}/PKG-INFO +1 -1
  2. {spark_utils-2.0.1 → spark_utils-2.0.2}/pyproject.toml +1 -1
  3. spark_utils-2.0.2/spark_utils/_version.py +1 -0
  4. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/functions.py +8 -3
  5. spark_utils-2.0.1/spark_utils/_version.py +0 -1
  6. {spark_utils-2.0.1 → spark_utils-2.0.2}/LICENSE +0 -0
  7. {spark_utils-2.0.1 → spark_utils-2.0.2}/README.md +0 -0
  8. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/__init__.py +0 -0
  9. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/__init__.py +0 -0
  10. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/spark_job_args.py +0 -0
  11. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/spark_session_provider.py +0 -0
  12. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/spark_sql_utils.py +0 -0
  13. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/spark_udf.py +0 -0
  14. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/__init__.py +0 -0
  15. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/functions.py +0 -0
  16. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/models.py +0 -0
  17. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/sets/__init__.py +0 -0
  18. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/sets/functions.py +0 -0
  19. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/delta_lake/__init__.py +0 -0
  20. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/delta_lake/delta_log.py +0 -0
  21. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/delta_lake/functions.py +0 -0
  22. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/__init__.py +0 -0
  23. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/delta_lake_config.py +0 -0
  24. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/hive_metastore_config.py +0 -0
  25. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/hive_table.py +0 -0
  26. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/iceberg_rest_config.py +0 -0
  27. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/job_socket.py +0 -0
  28. {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/k8s_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: spark-utils
3
- Version: 2.0.1
3
+ Version: 2.0.2
4
4
  Summary: Utility classes for comfy Spark job authoriing.
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "spark-utils"
3
- version = "2.0.1"
3
+ version = "2.0.2"
4
4
  description = "Utility classes for comfy Spark job authoriing."
5
5
  authors = ["ECCO Sneaks & Data <esdsupport@ecco.com>"]
6
6
  maintainers = ['GZU <gzu@ecco.com>', 'JRB <ext-jrb@ecco.com>']
@@ -0,0 +1 @@
1
+ __version__ = '2.0.2'
@@ -88,6 +88,7 @@ def write_to_socket(
88
88
  partition_by: Optional[List[str]] = None,
89
89
  partition_count: Optional[int] = None,
90
90
  write_options: Optional[Dict[str, str]] = None,
91
+ mode: str = "overwrite",
91
92
  ) -> None:
92
93
  """Writes data to socket
93
94
 
@@ -95,7 +96,8 @@ def write_to_socket(
95
96
  :param socket: Socket to write to
96
97
  :param partition_by: List of column names to partition by
97
98
  :param partition_count: Number of partitions to split result into.
98
- :param write_options: Write options passed to spark (e.g. Parquet options
99
+ :param write_options: Write options passed to spark (e.g. Parquet options)
100
+ :param mode: Write mode
99
101
  found here: https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option)
100
102
  """
101
103
  write_options = write_options or {}
@@ -105,10 +107,13 @@ def write_to_socket(
105
107
 
106
108
  # ignore all external write options as Iceberg writer will take care of those
107
109
  if socket.data_format.startswith("iceberg"):
108
- data.writeTo(socket.data_path).createOrReplace()
110
+ if mode == "overwrite":
111
+ data.writeTo(socket.data_path).createOrReplace()
112
+ if mode == "append":
113
+ data.writeTo(socket.data_path).append()
109
114
  return
110
115
 
111
- writer = data.write.mode("overwrite").options(**write_options)
116
+ writer = data.write.mode(mode).options(**write_options)
112
117
 
113
118
  if partition_by:
114
119
  writer = writer.partitionBy(*partition_by)
@@ -1 +0,0 @@
1
- __version__ = '2.0.1'
File without changes
File without changes