sws-spark-dissemination-helper 0.0.166__py3-none-any.whl → 0.0.167__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +78 -0
- {sws_spark_dissemination_helper-0.0.166.dist-info → sws_spark_dissemination_helper-0.0.167.dist-info}/METADATA +1 -1
- {sws_spark_dissemination_helper-0.0.166.dist-info → sws_spark_dissemination_helper-0.0.167.dist-info}/RECORD +5 -5
- {sws_spark_dissemination_helper-0.0.166.dist-info → sws_spark_dissemination_helper-0.0.167.dist-info}/WHEEL +0 -0
- {sws_spark_dissemination_helper-0.0.166.dist-info → sws_spark_dissemination_helper-0.0.167.dist-info}/licenses/LICENSE +0 -0
|
@@ -296,6 +296,35 @@ class SWSGoldIcebergSparkHelper:
|
|
|
296
296
|
|
|
297
297
|
return df
|
|
298
298
|
|
|
299
|
+
def write_gold_faostat_unfiltered_data_to_iceberg_and_csv(
|
|
300
|
+
self, df: DataFrame
|
|
301
|
+
) -> DataFrame:
|
|
302
|
+
"""The expected input to this function is the output of the sws disseminated function"""
|
|
303
|
+
df.writeTo(
|
|
304
|
+
self.iceberg_tables.GOLD_FAOSTAT_UNFILTERED.iceberg_id
|
|
305
|
+
).createOrReplace()
|
|
306
|
+
|
|
307
|
+
logging.info(
|
|
308
|
+
f"Gold FAOSTAT unfiltered table written to {self.iceberg_tables.GOLD_FAOSTAT.iceberg_id}"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
self.spark.sql(
|
|
312
|
+
f"ALTER TABLE {self.iceberg_tables.GOLD_FAOSTAT_UNFILTERED.iceberg_id} CREATE OR REPLACE TAG `{self.tag_name}`"
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
logging.info(f"gold FAOSTAT unfiltered tag '{self.tag_name}' created")
|
|
316
|
+
|
|
317
|
+
df_1 = df.coalesce(1)
|
|
318
|
+
|
|
319
|
+
save_cache_csv(
|
|
320
|
+
df=df_1,
|
|
321
|
+
bucket=self.bucket,
|
|
322
|
+
prefix=self.iceberg_tables.GOLD_FAOSTAT_UNFILTERED.csv_prefix,
|
|
323
|
+
tag_name=self.tag_name,
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
return df
|
|
327
|
+
|
|
299
328
|
def write_gold_sws_validated_sws_dissemination_tag(
|
|
300
329
|
self, df: DataFrame, tags: Tags
|
|
301
330
|
) -> DataFrame:
|
|
@@ -589,3 +618,52 @@ class SWSGoldIcebergSparkHelper:
|
|
|
589
618
|
logging.debug(f"Tag with Added csv Table: {tag}")
|
|
590
619
|
|
|
591
620
|
return df
|
|
621
|
+
|
|
622
|
+
def write_gold_faostat_unfiltered_dissemination_tag(
|
|
623
|
+
self, df: DataFrame, tags: Tags
|
|
624
|
+
) -> DataFrame:
|
|
625
|
+
# Get or create a new tag
|
|
626
|
+
tag = get_or_create_tag(tags, self.dataset_id, self.tag_name, self.tag_name)
|
|
627
|
+
logging.debug(f"Tag: {tag}")
|
|
628
|
+
|
|
629
|
+
new_iceberg_table = BaseDisseminatedTagTable(
|
|
630
|
+
id=f"{self.domain_code.lower()}_gold_faostat_unfiltered_iceberg",
|
|
631
|
+
name=f"{self.domain_code} gold FAOSTAT unfiltered Iceberg",
|
|
632
|
+
description="Gold table containing all the tag data in FAOSTAT format",
|
|
633
|
+
layer=TableLayer.GOLD,
|
|
634
|
+
private=True,
|
|
635
|
+
type=TableType.ICEBERG,
|
|
636
|
+
database=IcebergDatabases.GOLD_DATABASE,
|
|
637
|
+
table=self.iceberg_tables.GOLD_FAOSTAT_UNFILTERED.table,
|
|
638
|
+
path=self.iceberg_tables.GOLD_FAOSTAT_UNFILTERED.path,
|
|
639
|
+
structure={"columns": df.schema.jsonValue()["fields"]},
|
|
640
|
+
)
|
|
641
|
+
tag = upsert_disseminated_table(
|
|
642
|
+
sws_tags=tags,
|
|
643
|
+
tag=tag,
|
|
644
|
+
dataset_id=self.dataset_id,
|
|
645
|
+
tag_name=self.tag_name,
|
|
646
|
+
table=new_iceberg_table,
|
|
647
|
+
)
|
|
648
|
+
logging.debug(f"Tag with Added Iceberg Table: {tag}")
|
|
649
|
+
|
|
650
|
+
new_diss_table = BaseDisseminatedTagTable(
|
|
651
|
+
id=f"{self.domain_code.lower()}_gold_faostat_unfiltered_csv",
|
|
652
|
+
name=f"{self.domain_code} gold FAOSTAT unfiltered csv",
|
|
653
|
+
description="Gold table containing the tag data in FAOSTAT format in csv",
|
|
654
|
+
layer=TableLayer.GOLD,
|
|
655
|
+
private=True,
|
|
656
|
+
type=TableType.CSV,
|
|
657
|
+
path=self.iceberg_tables.GOLD_FAOSTAT_UNFILTERED.csv_path,
|
|
658
|
+
structure={"columns": df.schema.jsonValue()["fields"]},
|
|
659
|
+
)
|
|
660
|
+
tag = upsert_disseminated_table(
|
|
661
|
+
sws_tags=tags,
|
|
662
|
+
tag=tag,
|
|
663
|
+
dataset_id=self.dataset_id,
|
|
664
|
+
tag_name=self.tag_name,
|
|
665
|
+
table=new_diss_table,
|
|
666
|
+
)
|
|
667
|
+
logging.debug(f"Tag with Added csv Table: {tag}")
|
|
668
|
+
|
|
669
|
+
return df
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.167
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=ocuau0WtpyRwui0qwdQ_Rxh4nYPOyZoHpGKaWRa6B3Q,28868
|
|
2
2
|
sws_spark_dissemination_helper/SWSDatatablesExportHelper.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py,sha256=csqKyYglBkJSBvEkEa1_keHarZZAIJHaV0d64gGJy98,26379
|
|
4
|
-
sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=
|
|
4
|
+
sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=0dxbVkrhdaASapEffF5PFcgKwAMyJoWBxzgymjZ4JyY,25049
|
|
5
5
|
sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=KpG8gp8Ai9pHDiKhUOTcXWxxmFGeKEE3XKlI_Y-SveU,18453
|
|
6
6
|
sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=qioLv3SlJEfk0LzTiwfXRtZXVImPOJUeh9k1XwHC-pA,26225
|
|
7
7
|
sws_spark_dissemination_helper/__init__.py,sha256=42TPbk7KxAud_qY3Sr_F4F7VjyofUlxEJkUXAFQsjRo,327
|
|
8
8
|
sws_spark_dissemination_helper/constants.py,sha256=zviO6huxWTWonHv4v2M8zKr7HXCDMBGqjHx-eTfGT2A,13487
|
|
9
9
|
sws_spark_dissemination_helper/utils.py,sha256=G7lQqNRrvqZpgm9WmddD7fWsI8IVn09x1p3cV3458EA,21963
|
|
10
|
-
sws_spark_dissemination_helper-0.0.
|
|
11
|
-
sws_spark_dissemination_helper-0.0.
|
|
12
|
-
sws_spark_dissemination_helper-0.0.
|
|
13
|
-
sws_spark_dissemination_helper-0.0.
|
|
10
|
+
sws_spark_dissemination_helper-0.0.167.dist-info/METADATA,sha256=h27GpuoB4elORRQybwqiKoKkF59JOPk6HzkQ4uDsSjo,2824
|
|
11
|
+
sws_spark_dissemination_helper-0.0.167.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
12
|
+
sws_spark_dissemination_helper-0.0.167.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
|
|
13
|
+
sws_spark_dissemination_helper-0.0.167.dist-info/RECORD,,
|
|
File without changes
|