sws-spark-dissemination-helper 0.0.99__tar.gz → 0.0.101__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/PKG-INFO +1 -1
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/pyproject.toml +1 -1
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/src/sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +15 -51
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/src/sws_spark_dissemination_helper/constants.py +1 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/.gitignore +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/LICENSE +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/README.md +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/old_requirements.txt +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/requirements.txt +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/src/sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/src/sws_spark_dissemination_helper/SWSPostgresSparkReader.py +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/src/sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/src/sws_spark_dissemination_helper/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/src/sws_spark_dissemination_helper/utils.py +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/tests/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/tests/test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.101
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://bitbucket.org/cioapps/sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -458,63 +458,27 @@ class SWSBronzeIcebergSparkHelper:
|
|
|
458
458
|
self, dimensions: Dict[str, List[str]]
|
|
459
459
|
) -> DataFrame:
|
|
460
460
|
|
|
461
|
-
|
|
462
|
-
f"SELECT * FROM {self.iceberg_tables.BRONZE.iceberg_id}.refs"
|
|
463
|
-
).collect()
|
|
464
|
-
logging.info(f"bronze refs: {refs}")
|
|
461
|
+
self.disseminated_tag_df = self.df_bronze
|
|
465
462
|
|
|
466
|
-
create_branch_query = f"ALTER TABLE {self.iceberg_tables.BRONZE.iceberg_id}.`tag_{self.tag_name}` CREATE OR REPLACE BRANCH `diss_tag_{self.tag_name}`" # AS OF VERSION `{tag_name}`
|
|
467
|
-
create_branch_query = f"ALTER TABLE {self.iceberg_tables.BRONZE.iceberg_id} CREATE OR REPLACE BRANCH `diss_tag_{self.tag_name}`"
|
|
468
|
-
logging.info(f"create_branch_query: {create_branch_query}")
|
|
469
|
-
create_branch_query_result = self.spark.sql(create_branch_query).collect()
|
|
470
|
-
|
|
471
|
-
while (
|
|
472
|
-
self.spark.sql(
|
|
473
|
-
f"SELECT * FROM {self.iceberg_tables.BRONZE.iceberg_id}.refs"
|
|
474
|
-
)
|
|
475
|
-
.filter(
|
|
476
|
-
(col("type") == lit("BRANCH"))
|
|
477
|
-
& (col("name") == lit(f"diss_tag_{self.tag_name}"))
|
|
478
|
-
)
|
|
479
|
-
.count()
|
|
480
|
-
) == 0:
|
|
481
|
-
logging.info(
|
|
482
|
-
f"Waiting for the branch {self.tag_name} diss_tag_{self.tag_name} to be created"
|
|
483
|
-
)
|
|
484
|
-
time.sleep(2)
|
|
485
|
-
|
|
486
|
-
logging.info(f"result of create_branch_query: {create_branch_query_result}")
|
|
487
|
-
|
|
488
|
-
self.disseminated_tag_df = self.spark.read.option(
|
|
489
|
-
"branch", f"`diss_tag_{self.tag_name}`"
|
|
490
|
-
).table(self.iceberg_tables.BRONZE.iceberg_id)
|
|
491
|
-
|
|
492
|
-
logging.info(f"dimensions: {dimensions}")
|
|
493
463
|
for dimension_name, codes in dimensions.items():
|
|
494
464
|
logging.info(f"dimension_name: {dimension_name}")
|
|
495
465
|
logging.info(f"codes: {codes}")
|
|
496
466
|
if len(codes) != 0:
|
|
497
|
-
# not_in_codes = ",".join([f"'{code}'" for code in codes])
|
|
498
|
-
# delete_from_branch_query = f"DELETE FROM {self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}` WHERE {dimension_name} NOT IN ({not_in_codes})"
|
|
499
|
-
# logging.info(f"delete_from_branch_query: {delete_from_branch_query}")
|
|
500
|
-
# delete_from_branch_query_result = self.spark.sql(
|
|
501
|
-
# delete_from_branch_query
|
|
502
|
-
# ).collect()
|
|
503
|
-
|
|
504
|
-
# logging.info(
|
|
505
|
-
# f"result of delete_from_branch_query: {delete_from_branch_query_result}"
|
|
506
|
-
# )
|
|
507
467
|
self.disseminated_tag_df = self.disseminated_tag_df.filter(
|
|
508
468
|
col(dimension_name).isin(codes)
|
|
509
469
|
)
|
|
510
470
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
471
|
+
self.df_bronze.writeTo(
|
|
472
|
+
self.iceberg_tables.BRONZE_DISS_TAG.iceberg_id
|
|
473
|
+
).createOrReplace()
|
|
474
|
+
|
|
475
|
+
logging.info(
|
|
476
|
+
f"Bronze disseminated tag table written to {self.iceberg_tables.BRONZE_DISS_TAG.iceberg_id}"
|
|
477
|
+
)
|
|
514
478
|
|
|
515
|
-
self.
|
|
516
|
-
f"{self.iceberg_tables.
|
|
517
|
-
)
|
|
479
|
+
self.spark.sql(
|
|
480
|
+
f"ALTER TABLE {self.iceberg_tables.BRONZE_DISS_TAG.iceberg_id} CREATE TAG `{self.tag_name}`"
|
|
481
|
+
)
|
|
518
482
|
|
|
519
483
|
disseminated_tag_df = self.disseminated_tag_df.withColumn(
|
|
520
484
|
"metadata", F.to_json(col("metadata"))
|
|
@@ -523,7 +487,7 @@ class SWSBronzeIcebergSparkHelper:
|
|
|
523
487
|
save_cache_csv(
|
|
524
488
|
df=disseminated_tag_df,
|
|
525
489
|
bucket=self.bucket,
|
|
526
|
-
prefix=f"{self.iceberg_tables.
|
|
490
|
+
prefix=f"{self.iceberg_tables.BRONZE_DISS_TAG.csv_prefix}",
|
|
527
491
|
tag_name=self.tag_name,
|
|
528
492
|
)
|
|
529
493
|
|
|
@@ -542,8 +506,8 @@ class SWSBronzeIcebergSparkHelper:
|
|
|
542
506
|
private=True,
|
|
543
507
|
type=TableType.ICEBERG,
|
|
544
508
|
database=IcebergDatabases.BRONZE_DATABASE,
|
|
545
|
-
table=self.iceberg_tables.
|
|
546
|
-
path=self.iceberg_tables.
|
|
509
|
+
table=self.iceberg_tables.BRONZE_DISS_TAG.table,
|
|
510
|
+
path=self.iceberg_tables.BRONZE_DISS_TAG.path,
|
|
547
511
|
structure={
|
|
548
512
|
"columns": self.disseminated_tag_df.schema.jsonValue()["fields"]
|
|
549
513
|
},
|
|
@@ -561,7 +525,7 @@ class SWSBronzeIcebergSparkHelper:
|
|
|
561
525
|
private=True,
|
|
562
526
|
type=TableType.CSV,
|
|
563
527
|
# TODO Correct the path in the origin library
|
|
564
|
-
path=self.iceberg_tables.
|
|
528
|
+
path=self.iceberg_tables.BRONZE_DISS_TAG.csv_path,
|
|
565
529
|
structure={
|
|
566
530
|
"columns": self.disseminated_tag_df.schema.jsonValue()["fields"]
|
|
567
531
|
},
|
|
@@ -219,6 +219,7 @@ class IcebergTables:
|
|
|
219
219
|
self.__tag_name = tag_name
|
|
220
220
|
|
|
221
221
|
self.BRONZE = self._create_iceberg_table("BRONZE")
|
|
222
|
+
self.BRONZE_DISS_TAG = self._create_iceberg_table("BRONZE", suffix="diss_tag")
|
|
222
223
|
self.SILVER = self._create_iceberg_table("SILVER", prefix=domain)
|
|
223
224
|
|
|
224
225
|
# GOLD tables with specific suffixes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/tests/__init__.py
RENAMED
|
File without changes
|
{sws_spark_dissemination_helper-0.0.99 → sws_spark_dissemination_helper-0.0.101}/tests/test.py
RENAMED
|
File without changes
|