sws-spark-dissemination-helper 0.0.145__tar.gz → 0.0.147__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/PKG-INFO +1 -1
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/pyproject.toml +1 -1
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py +18 -2
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/.gitignore +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/LICENSE +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/README.md +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSDatatablesExportHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSPostgresSparkReader.py +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/constants.py +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/utils.py +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/tests/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/tests/test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.147
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from copy import copy
|
|
3
|
-
from typing import Dict, List, Tuple
|
|
3
|
+
from typing import Dict, List, Tuple, Union
|
|
4
4
|
|
|
5
5
|
import pyspark.sql.functions as F
|
|
6
6
|
from pyspark.sql import DataFrame, SparkSession
|
|
@@ -26,6 +26,7 @@ class SWSEasyIcebergSparkHelper:
|
|
|
26
26
|
dataset_tables: DatasetTables = None,
|
|
27
27
|
keep_history: bool = False,
|
|
28
28
|
write_csv: bool = True,
|
|
29
|
+
source_tag: Union[str, None] = None,
|
|
29
30
|
) -> None:
|
|
30
31
|
self.spark: SparkSession = spark
|
|
31
32
|
self.dataset_details: dict = dataset_details
|
|
@@ -37,6 +38,7 @@ class SWSEasyIcebergSparkHelper:
|
|
|
37
38
|
self.iceberg_tables: IcebergTables = iceberg_tables
|
|
38
39
|
self.keep_history: bool = keep_history
|
|
39
40
|
self.write_csv: bool = write_csv
|
|
41
|
+
self.source_tag: Union[str, None] = source_tag
|
|
40
42
|
|
|
41
43
|
if dataset_details is not None:
|
|
42
44
|
(
|
|
@@ -496,9 +498,23 @@ class SWSEasyIcebergSparkHelper:
|
|
|
496
498
|
.drop("m.observation_id")
|
|
497
499
|
)
|
|
498
500
|
|
|
499
|
-
def
|
|
501
|
+
def _gen_denormalied_data_sql_from_tag(self) -> DataFrame:
|
|
502
|
+
return (
|
|
503
|
+
self._gen_denormalized_observation_sql_from_tag()
|
|
504
|
+
.alias("o")
|
|
505
|
+
.join(
|
|
506
|
+
self._gen_grouped_metadata_sql().alias("m"),
|
|
507
|
+
col("o.id") == col("m.observation_id"),
|
|
508
|
+
"left",
|
|
509
|
+
)
|
|
510
|
+
.drop("m.observation_id")
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
def write_data_to_iceberg_and_csv(self, sql=False, from_tag=False) -> DataFrame:
|
|
500
514
|
if sql:
|
|
501
515
|
self.df_denorm = self._gen_denormalied_data_sql()
|
|
516
|
+
elif from_tag:
|
|
517
|
+
self.df_denorm = self._gen_denormalied_data_sql_from_tag()
|
|
502
518
|
else:
|
|
503
519
|
self.df_denorm = self._gen_denormalied_data()
|
|
504
520
|
|
{sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/.gitignore
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/tests/__init__.py
RENAMED
|
File without changes
|
{sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/tests/test.py
RENAMED
|
File without changes
|