sws-spark-dissemination-helper 0.0.86__py3-none-any.whl → 0.0.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +75 -0
- sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +7 -7
- sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +4 -0
- {sws_spark_dissemination_helper-0.0.86.dist-info → sws_spark_dissemination_helper-0.0.88.dist-info}/METADATA +1 -1
- {sws_spark_dissemination_helper-0.0.86.dist-info → sws_spark_dissemination_helper-0.0.88.dist-info}/RECORD +7 -7
- {sws_spark_dissemination_helper-0.0.86.dist-info → sws_spark_dissemination_helper-0.0.88.dist-info}/WHEEL +0 -0
- {sws_spark_dissemination_helper-0.0.86.dist-info → sws_spark_dissemination_helper-0.0.88.dist-info}/licenses/LICENSE +0 -0
|
@@ -443,3 +443,78 @@ class SWSBronzeIcebergSparkHelper:
|
|
|
443
443
|
logging.debug(f"Tag with Added csv Table: {tag}")
|
|
444
444
|
|
|
445
445
|
logging.info("Bronze Dissemination tags successfully written")
|
|
446
|
+
|
|
447
|
+
def write_bronze_disseminated_tag_data_to_iceberg_and_csv(
|
|
448
|
+
self, dimensions: Dict[str, List[str]]
|
|
449
|
+
) -> DataFrame:
|
|
450
|
+
self.spark.sql(
|
|
451
|
+
f"ALTER TABLE {self.iceberg_tables.BRONZE.iceberg_id}.`tag_{self.tag_name}` CREATE OR REPLACE BRANCH `diss_tag_{self.tag_name}`" # AS OF VERSION `{tag_name}`
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
for dimension_name, codes in dimensions.items():
|
|
455
|
+
if len(codes) != 0:
|
|
456
|
+
not_in_codes = ",".join([f"'{code}'" for code in codes])
|
|
457
|
+
self.spark.sql(
|
|
458
|
+
f"DELETE FROM {self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}` WHERE {dimension_name} NOT IN ({not_in_codes})"
|
|
459
|
+
)
|
|
460
|
+
disseminated_tag_df = self.spark.read.option("branch", self.tag_name).table(
|
|
461
|
+
self.iceberg_tables.BRONZE.iceberg_id
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
disseminated_tag_df = disseminated_tag_df.withColumn(
|
|
465
|
+
"metadata", F.to_json(col("metadata"))
|
|
466
|
+
).coalesce(1)
|
|
467
|
+
|
|
468
|
+
save_cache_csv(
|
|
469
|
+
df=disseminated_tag_df,
|
|
470
|
+
bucket=self.bucket,
|
|
471
|
+
prefix=f"{self.iceberg_tables.BRONZE.csv_prefix}_disseminated_tag",
|
|
472
|
+
tag_name=self.tag_name,
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
return disseminated_tag_df
|
|
476
|
+
|
|
477
|
+
def write_bronze_sws_filtered_disseminated_tag(self, tags: Tags):
|
|
478
|
+
# Get or create a new tag
|
|
479
|
+
tag = get_or_create_tag(tags, self.dataset_id, self.tag_name, self.tag_name)
|
|
480
|
+
logging.debug(f"Tag: {tag}")
|
|
481
|
+
|
|
482
|
+
new_iceberg_table = BaseDisseminatedTagTable(
|
|
483
|
+
id=f"{self.domain_code.lower()}_bronze_disseminated_tag_iceberg",
|
|
484
|
+
name=f"{self.domain_code} bronze disseminated tag Iceberg",
|
|
485
|
+
description="Bronze table containing the raw data imported from the SWS, denormalized and filtered per dimension",
|
|
486
|
+
layer=TableLayer.BRONZE,
|
|
487
|
+
private=True,
|
|
488
|
+
type=TableType.ICEBERG,
|
|
489
|
+
database=IcebergDatabases.BRONZE_DATABASE,
|
|
490
|
+
table=self.iceberg_tables.BRONZE.table,
|
|
491
|
+
path=self.iceberg_tables.BRONZE.path,
|
|
492
|
+
structure={
|
|
493
|
+
"columns": self.disseminated_tag_df.schema.jsonValue()["fields"]
|
|
494
|
+
},
|
|
495
|
+
)
|
|
496
|
+
tag = tags.add_dissemination_table(
|
|
497
|
+
self.dataset_id, self.tag_name, new_iceberg_table
|
|
498
|
+
)
|
|
499
|
+
logging.debug(f"Tag with Added Iceberg Table: {tag}")
|
|
500
|
+
|
|
501
|
+
new_csv_table = BaseDisseminatedTagTable(
|
|
502
|
+
id=f"{self.domain_code.lower()}_bronze_disseminated_tag_csv",
|
|
503
|
+
name=f"{self.domain_code} bronze disseminated tag csv",
|
|
504
|
+
description="Bronze table containing the raw data imported from the SWS, denormalized and filtered per dimension cached in csv",
|
|
505
|
+
layer=TableLayer.BRONZE,
|
|
506
|
+
private=True,
|
|
507
|
+
type=TableType.CSV,
|
|
508
|
+
# TODO Correct the path in the origin library
|
|
509
|
+
path=self.iceberg_tables.BRONZE.csv_path,
|
|
510
|
+
structure={
|
|
511
|
+
"columns": self.disseminated_tag_df.schema.jsonValue()["fields"]
|
|
512
|
+
},
|
|
513
|
+
)
|
|
514
|
+
tag = tags.add_dissemination_table(
|
|
515
|
+
self.dataset_id, self.tag_name, new_csv_table
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
logging.debug(f"Tag with Added csv Table: {tag}")
|
|
519
|
+
|
|
520
|
+
logging.info("Bronze Disseminated tag with selection successfully written")
|
|
@@ -89,20 +89,20 @@ class SWSGoldIcebergSparkHelper:
|
|
|
89
89
|
def keep_dim_val_attr_columns(self, df: DataFrame):
|
|
90
90
|
return df.select(*self.cols_to_keep_sws)
|
|
91
91
|
|
|
92
|
+
def read_silver_data(self) -> DataFrame:
|
|
93
|
+
return self.spark.read.option("tag", self.tag_name).table(
|
|
94
|
+
self.iceberg_tables.SILVER.iceberg_id
|
|
95
|
+
)
|
|
96
|
+
|
|
92
97
|
def gen_gold_sws_disseminated_data(self) -> DataFrame:
|
|
93
98
|
return (
|
|
94
|
-
self.
|
|
95
|
-
.table(self.iceberg_tables.SILVER.iceberg_id)
|
|
99
|
+
self.read_silver_data()
|
|
96
100
|
.transform(self.apply_diss_flag_filter)
|
|
97
101
|
.transform(self.keep_dim_val_attr_columns)
|
|
98
102
|
)
|
|
99
103
|
|
|
100
104
|
def gen_gold_sws_validated_data(self) -> DataFrame:
|
|
101
|
-
return (
|
|
102
|
-
self.spark.read.option("tag", self.tag_name)
|
|
103
|
-
.table(self.iceberg_tables.BRONZE.iceberg_id)
|
|
104
|
-
.transform(self.keep_dim_val_attr_columns)
|
|
105
|
-
)
|
|
105
|
+
return self.read_silver_data().transform(self.keep_dim_val_attr_columns)
|
|
106
106
|
|
|
107
107
|
def write_gold_sws_validated_data_to_iceberg_and_csv(
|
|
108
108
|
self, df: DataFrame
|
|
@@ -110,6 +110,10 @@ class SWSSilverIcebergSparkHelper:
|
|
|
110
110
|
return self.spark.read.option("tag", self.tag_name).table(
|
|
111
111
|
self.iceberg_tables.BRONZE.iceberg_id
|
|
112
112
|
)
|
|
113
|
+
def read_bronze_diss_tag_data(self) -> DataFrame:
|
|
114
|
+
return self.spark.read.option("branch", f"diss_tag_{self.tag_name}").table(
|
|
115
|
+
self.iceberg_tables.BRONZE.iceberg_id
|
|
116
|
+
)
|
|
113
117
|
|
|
114
118
|
def _get_dim_time_flag_columns(self) -> Tuple[List[str], List[str], str, List[str]]:
|
|
115
119
|
"""Extract the dimension columns with time, without time, the time column and the flag columns names."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.88
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://bitbucket.org/cioapps/sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=
|
|
2
|
-
sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=
|
|
1
|
+
sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=5z3uaVgRjtvZFO8C8LG9k3GO2dO21Ht5l7MXPY5Hb5M,19673
|
|
2
|
+
sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=ZC7hxkppo6qmfCc2z5vm2Y2iH1901F-rx9Er9cxuzP4,16037
|
|
3
3
|
sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=ja7AbOfbmC_EXHCJk7UMDzzbA-LRxzPkaaUmuvcihJ8,17449
|
|
4
|
-
sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=
|
|
4
|
+
sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=zEppNq5shiHZH2yt5faWGsb5QEmpAQS0ToIrG6fmv6o,22231
|
|
5
5
|
sws_spark_dissemination_helper/__init__.py,sha256=Efjoe9V4vGXWVp-DY5P6NbRwIUr_zkZJkDmMi-lf5Bc,262
|
|
6
6
|
sws_spark_dissemination_helper/constants.py,sha256=hpHHlbojShMWRfyIelXz6c5BqFzO48Oap1zmztlMMrs,11349
|
|
7
7
|
sws_spark_dissemination_helper/utils.py,sha256=6SzrXX0xhvynRyv-vRFDbc6V4UNe_RzKKETZAtefnhg,21341
|
|
8
|
-
sws_spark_dissemination_helper-0.0.
|
|
9
|
-
sws_spark_dissemination_helper-0.0.
|
|
10
|
-
sws_spark_dissemination_helper-0.0.
|
|
11
|
-
sws_spark_dissemination_helper-0.0.
|
|
8
|
+
sws_spark_dissemination_helper-0.0.88.dist-info/METADATA,sha256=ta-N8JQzmir7jdw5Sm6k5dHKg1gkHkt-yCu25z0HbUY,2823
|
|
9
|
+
sws_spark_dissemination_helper-0.0.88.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
sws_spark_dissemination_helper-0.0.88.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
|
|
11
|
+
sws_spark_dissemination_helper-0.0.88.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|