sws-spark-dissemination-helper 0.0.145__tar.gz → 0.0.147__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/PKG-INFO +1 -1
  2. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/pyproject.toml +1 -1
  3. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py +18 -2
  4. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/.gitignore +0 -0
  5. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/LICENSE +0 -0
  6. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/README.md +0 -0
  7. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +0 -0
  8. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSDatatablesExportHelper.py +0 -0
  9. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +0 -0
  10. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSPostgresSparkReader.py +0 -0
  11. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +0 -0
  12. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/__init__.py +0 -0
  13. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/constants.py +0 -0
  14. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/src/sws_spark_dissemination_helper/utils.py +0 -0
  15. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/tests/__init__.py +0 -0
  16. {sws_spark_dissemination_helper-0.0.145 → sws_spark_dissemination_helper-0.0.147}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sws-spark-dissemination-helper
3
- Version: 0.0.145
3
+ Version: 0.0.147
4
4
  Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
5
5
  Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
6
6
  Author-email: Daniele Mansillo <danielemansillo@gmail.com>
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sws-spark-dissemination-helper"
7
- version = "0.0.145"
7
+ version = "0.0.147"
8
8
  dependencies = [
9
9
  "annotated-types==0.7.0",
10
10
  "boto3>=1.36.18",
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from copy import copy
3
- from typing import Dict, List, Tuple
3
+ from typing import Dict, List, Tuple, Union
4
4
 
5
5
  import pyspark.sql.functions as F
6
6
  from pyspark.sql import DataFrame, SparkSession
@@ -26,6 +26,7 @@ class SWSEasyIcebergSparkHelper:
26
26
  dataset_tables: DatasetTables = None,
27
27
  keep_history: bool = False,
28
28
  write_csv: bool = True,
29
+ source_tag: Union[str, None] = None,
29
30
  ) -> None:
30
31
  self.spark: SparkSession = spark
31
32
  self.dataset_details: dict = dataset_details
@@ -37,6 +38,7 @@ class SWSEasyIcebergSparkHelper:
37
38
  self.iceberg_tables: IcebergTables = iceberg_tables
38
39
  self.keep_history: bool = keep_history
39
40
  self.write_csv: bool = write_csv
41
+ self.source_tag: Union[str, None] = source_tag
40
42
 
41
43
  if dataset_details is not None:
42
44
  (
@@ -496,9 +498,23 @@ class SWSEasyIcebergSparkHelper:
496
498
  .drop("m.observation_id")
497
499
  )
498
500
 
499
- def write_data_to_iceberg_and_csv(self, sql=False) -> DataFrame:
501
+ def _gen_denormalied_data_sql_from_tag(self) -> DataFrame:
502
+ return (
503
+ self._gen_denormalized_observation_sql_from_tag()
504
+ .alias("o")
505
+ .join(
506
+ self._gen_grouped_metadata_sql().alias("m"),
507
+ col("o.id") == col("m.observation_id"),
508
+ "left",
509
+ )
510
+ .drop("m.observation_id")
511
+ )
512
+
513
+ def write_data_to_iceberg_and_csv(self, sql=False, from_tag=False) -> DataFrame:
500
514
  if sql:
501
515
  self.df_denorm = self._gen_denormalied_data_sql()
516
+ elif from_tag:
517
+ self.df_denorm = self._gen_denormalied_data_sql_from_tag()
502
518
  else:
503
519
  self.df_denorm = self._gen_denormalied_data()
504
520