spark-utils 2.0.1__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {spark_utils-2.0.1 → spark_utils-2.0.2}/PKG-INFO +1 -1
- {spark_utils-2.0.1 → spark_utils-2.0.2}/pyproject.toml +1 -1
- spark_utils-2.0.2/spark_utils/_version.py +1 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/functions.py +8 -3
- spark_utils-2.0.1/spark_utils/_version.py +0 -1
- {spark_utils-2.0.1 → spark_utils-2.0.2}/LICENSE +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/README.md +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/__init__.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/__init__.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/spark_job_args.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/spark_session_provider.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/spark_sql_utils.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/common/spark_udf.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/__init__.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/functions.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/models.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/sets/__init__.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/dataframes/sets/functions.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/delta_lake/__init__.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/delta_lake/delta_log.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/delta_lake/functions.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/__init__.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/delta_lake_config.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/hive_metastore_config.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/hive_table.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/iceberg_rest_config.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/job_socket.py +0 -0
- {spark_utils-2.0.1 → spark_utils-2.0.2}/spark_utils/models/k8s_config.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = '2.0.2'
|
|
@@ -88,6 +88,7 @@ def write_to_socket(
|
|
|
88
88
|
partition_by: Optional[List[str]] = None,
|
|
89
89
|
partition_count: Optional[int] = None,
|
|
90
90
|
write_options: Optional[Dict[str, str]] = None,
|
|
91
|
+
mode: str = "overwrite",
|
|
91
92
|
) -> None:
|
|
92
93
|
"""Writes data to socket
|
|
93
94
|
|
|
@@ -95,7 +96,8 @@ def write_to_socket(
|
|
|
95
96
|
:param socket: Socket to write to
|
|
96
97
|
:param partition_by: List of column names to partition by
|
|
97
98
|
:param partition_count: Number of partitions to split result into.
|
|
98
|
-
:param write_options: Write options passed to spark (e.g. Parquet options
|
|
99
|
+
:param write_options: Write options passed to spark (e.g. Parquet options)
|
|
100
|
+
:param mode: Write mode
|
|
99
101
|
found here: https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option)
|
|
100
102
|
"""
|
|
101
103
|
write_options = write_options or {}
|
|
@@ -105,10 +107,13 @@ def write_to_socket(
|
|
|
105
107
|
|
|
106
108
|
# ignore all external write options as Iceberg writer will take care of those
|
|
107
109
|
if socket.data_format.startswith("iceberg"):
|
|
108
|
-
|
|
110
|
+
if mode == "overwrite":
|
|
111
|
+
data.writeTo(socket.data_path).createOrReplace()
|
|
112
|
+
if mode == "append":
|
|
113
|
+
data.writeTo(socket.data_path).append()
|
|
109
114
|
return
|
|
110
115
|
|
|
111
|
-
writer = data.write.mode(
|
|
116
|
+
writer = data.write.mode(mode).options(**write_options)
|
|
112
117
|
|
|
113
118
|
if partition_by:
|
|
114
119
|
writer = writer.partitionBy(*partition_by)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = '2.0.1'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|