sws-spark-dissemination-helper 0.0.92__py3-none-any.whl → 0.0.94__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +28 -9
- {sws_spark_dissemination_helper-0.0.92.dist-info → sws_spark_dissemination_helper-0.0.94.dist-info}/METADATA +1 -1
- {sws_spark_dissemination_helper-0.0.92.dist-info → sws_spark_dissemination_helper-0.0.94.dist-info}/RECORD +5 -5
- {sws_spark_dissemination_helper-0.0.92.dist-info → sws_spark_dissemination_helper-0.0.94.dist-info}/WHEEL +0 -0
- {sws_spark_dissemination_helper-0.0.92.dist-info → sws_spark_dissemination_helper-0.0.94.dist-info}/licenses/LICENSE +0 -0
|
@@ -450,21 +450,40 @@ class SWSBronzeIcebergSparkHelper:
|
|
|
450
450
|
|
|
451
451
|
create_branch_query = f"ALTER TABLE {self.iceberg_tables.BRONZE.iceberg_id}.`tag_{self.tag_name}` CREATE OR REPLACE BRANCH `diss_tag_{self.tag_name}`" # AS OF VERSION `{tag_name}`
|
|
452
452
|
logging.info(f"create_branch_query: {create_branch_query}")
|
|
453
|
-
self.spark.sql(create_branch_query)
|
|
454
|
-
|
|
453
|
+
create_branch_query_result = self.spark.sql(create_branch_query).collect()
|
|
454
|
+
|
|
455
|
+
logging.info(f"result of create_branch_query: {create_branch_query_result}")
|
|
455
456
|
|
|
457
|
+
self.disseminated_tag_df = self.spark.read.option(
|
|
458
|
+
"branch", f"`diss_tag_{self.tag_name}`"
|
|
459
|
+
).table(self.iceberg_tables.BRONZE.iceberg_id)
|
|
460
|
+
|
|
461
|
+
logging.info(f"dimensions: {dimensions}")
|
|
456
462
|
for dimension_name, codes in dimensions.items():
|
|
457
463
|
logging.info(f"dimension_name: {dimension_name}")
|
|
458
464
|
logging.info(f"codes: {codes}")
|
|
459
465
|
if len(codes) != 0:
|
|
460
|
-
not_in_codes = ",".join([f"'{code}'" for code in codes])
|
|
461
|
-
delete_from_branch_query = f"DELETE FROM {self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}` WHERE {dimension_name} NOT IN ({not_in_codes})"
|
|
462
|
-
logging.info(f"delete_from_branch_query: {delete_from_branch_query}")
|
|
463
|
-
self.spark.sql(
|
|
466
|
+
# not_in_codes = ",".join([f"'{code}'" for code in codes])
|
|
467
|
+
# delete_from_branch_query = f"DELETE FROM {self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}` WHERE {dimension_name} NOT IN ({not_in_codes})"
|
|
468
|
+
# logging.info(f"delete_from_branch_query: {delete_from_branch_query}")
|
|
469
|
+
# delete_from_branch_query_result = self.spark.sql(
|
|
470
|
+
# delete_from_branch_query
|
|
471
|
+
# ).collect()
|
|
472
|
+
|
|
473
|
+
# logging.info(
|
|
474
|
+
# f"result of delete_from_branch_query: {delete_from_branch_query_result}"
|
|
475
|
+
# )
|
|
476
|
+
self.disseminated_tag_df = self.disseminated_tag_df.filter(
|
|
477
|
+
col(dimension_name).isin(codes)
|
|
478
|
+
)
|
|
464
479
|
|
|
465
|
-
self.disseminated_tag_df = self.spark.read.option(
|
|
466
|
-
|
|
467
|
-
).table(self.iceberg_tables.BRONZE.iceberg_id)
|
|
480
|
+
# self.disseminated_tag_df = self.spark.read.option(
|
|
481
|
+
# "branch", f"`diss_tag_{self.tag_name}`"
|
|
482
|
+
# ).table(self.iceberg_tables.BRONZE.iceberg_id)
|
|
483
|
+
|
|
484
|
+
self.disseminated_tag_df.write.format("iceberg").mode("overwrite").save(
|
|
485
|
+
f"{self.iceberg_tables.BRONZE.iceberg_id}.`diss_tag_{self.tag_name}`"
|
|
486
|
+
)
|
|
468
487
|
|
|
469
488
|
disseminated_tag_df = self.disseminated_tag_df.withColumn(
|
|
470
489
|
"metadata", F.to_json(col("metadata"))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.94
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://bitbucket.org/cioapps/sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256
|
|
1
|
+
sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=-Tyd_hffHV3bQxNE8ZJ3ZO-lQHJ_zlD5V7cPP4F1p7s,20929
|
|
2
2
|
sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=ZC7hxkppo6qmfCc2z5vm2Y2iH1901F-rx9Er9cxuzP4,16037
|
|
3
3
|
sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=ja7AbOfbmC_EXHCJk7UMDzzbA-LRxzPkaaUmuvcihJ8,17449
|
|
4
4
|
sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=zEppNq5shiHZH2yt5faWGsb5QEmpAQS0ToIrG6fmv6o,22231
|
|
5
5
|
sws_spark_dissemination_helper/__init__.py,sha256=Efjoe9V4vGXWVp-DY5P6NbRwIUr_zkZJkDmMi-lf5Bc,262
|
|
6
6
|
sws_spark_dissemination_helper/constants.py,sha256=hpHHlbojShMWRfyIelXz6c5BqFzO48Oap1zmztlMMrs,11349
|
|
7
7
|
sws_spark_dissemination_helper/utils.py,sha256=6SzrXX0xhvynRyv-vRFDbc6V4UNe_RzKKETZAtefnhg,21341
|
|
8
|
-
sws_spark_dissemination_helper-0.0.
|
|
9
|
-
sws_spark_dissemination_helper-0.0.
|
|
10
|
-
sws_spark_dissemination_helper-0.0.
|
|
11
|
-
sws_spark_dissemination_helper-0.0.
|
|
8
|
+
sws_spark_dissemination_helper-0.0.94.dist-info/METADATA,sha256=aCtDfs_PFu2JhMw7yL-SThGmluQrTxQmqrgRG0Q1wXo,2823
|
|
9
|
+
sws_spark_dissemination_helper-0.0.94.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
10
|
+
sws_spark_dissemination_helper-0.0.94.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
|
|
11
|
+
sws_spark_dissemination_helper-0.0.94.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|