sws-spark-dissemination-helper 0.0.92__tar.gz → 0.0.94__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/PKG-INFO +1 -1
  2. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/pyproject.toml +1 -1
  3. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/src/sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +28 -9
  4. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/.gitignore +0 -0
  5. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/LICENSE +0 -0
  6. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/README.md +0 -0
  7. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/old_requirements.txt +0 -0
  8. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/requirements.txt +0 -0
  9. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/src/sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +0 -0
  10. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/src/sws_spark_dissemination_helper/SWSPostgresSparkReader.py +0 -0
  11. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/src/sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +0 -0
  12. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/src/sws_spark_dissemination_helper/__init__.py +0 -0
  13. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/src/sws_spark_dissemination_helper/constants.py +0 -0
  14. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/src/sws_spark_dissemination_helper/utils.py +0 -0
  15. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/tests/__init__.py +0 -0
  16. {sws_spark_dissemination_helper-0.0.92 → sws_spark_dissemination_helper-0.0.94}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sws-spark-dissemination-helper
3
- Version: 0.0.92
3
+ Version: 0.0.94
4
4
  Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
5
5
  Project-URL: Repository, https://bitbucket.org/cioapps/sws-it-python-spark-dissemination-helper
6
6
  Author-email: Daniele Mansillo <danielemansillo@gmail.com>
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sws-spark-dissemination-helper"
7
- version = "0.0.92"
7
+ version = "0.0.94"
8
8
  dependencies = [
9
9
  "annotated-types==0.7.0",
10
10
  "boto3==1.36.18",
@@ -450,21 +450,40 @@ class SWSBronzeIcebergSparkHelper:
450
450
 
451
451
  create_branch_query = f"ALTER TABLE {self.iceberg_tables.BRONZE.iceberg_id}.`tag_{self.tag_name}` CREATE OR REPLACE BRANCH `diss_tag_{self.tag_name}`" # AS OF VERSION `{tag_name}`
452
452
  logging.info(f"create_branch_query: {create_branch_query}")
453
- self.spark.sql(create_branch_query)
454
- logging.info(f"dimensions: {dimensions}")
453
+ create_branch_query_result = self.spark.sql(create_branch_query).collect()
454
+
455
+ logging.info(f"result of create_branch_query: {create_branch_query_result}")
455
456
 
457
+ self.disseminated_tag_df = self.spark.read.option(
458
+ "branch", f"`diss_tag_{self.tag_name}`"
459
+ ).table(self.iceberg_tables.BRONZE.iceberg_id)
460
+
461
+ logging.info(f"dimensions: {dimensions}")
456
462
  for dimension_name, codes in dimensions.items():
457
463
  logging.info(f"dimension_name: {dimension_name}")
458
464
  logging.info(f"codes: {codes}")
459
465
  if len(codes) != 0:
460
- not_in_codes = ",".join([f"'{code}'" for code in codes])
461
- delete_from_branch_query = f"DELETE FROM {self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}` WHERE {dimension_name} NOT IN ({not_in_codes})"
462
- logging.info(f"delete_from_branch_query: {delete_from_branch_query}")
463
- self.spark.sql(delete_from_branch_query)
466
+ # not_in_codes = ",".join([f"'{code}'" for code in codes])
467
+ # delete_from_branch_query = f"DELETE FROM {self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}` WHERE {dimension_name} NOT IN ({not_in_codes})"
468
+ # logging.info(f"delete_from_branch_query: {delete_from_branch_query}")
469
+ # delete_from_branch_query_result = self.spark.sql(
470
+ # delete_from_branch_query
471
+ # ).collect()
472
+
473
+ # logging.info(
474
+ # f"result of delete_from_branch_query: {delete_from_branch_query_result}"
475
+ # )
476
+ self.disseminated_tag_df = self.disseminated_tag_df.filter(
477
+ col(dimension_name).isin(codes)
478
+ )
464
479
 
465
- self.disseminated_tag_df = self.spark.read.option(
466
- "branch", f"`diss_tag_{self.tag_name}`"
467
- ).table(self.iceberg_tables.BRONZE.iceberg_id)
480
+ # self.disseminated_tag_df = self.spark.read.option(
481
+ # "branch", f"`diss_tag_{self.tag_name}`"
482
+ # ).table(self.iceberg_tables.BRONZE.iceberg_id)
483
+
484
+ self.disseminated_tag_df.write.format("iceberg").mode("overwrite").save(
485
+ f"{self.iceberg_tables.BRONZE.iceberg_id}.`diss_tag_{self.tag_name}`"
486
+ )
468
487
 
469
488
  disseminated_tag_df = self.disseminated_tag_df.withColumn(
470
489
  "metadata", F.to_json(col("metadata"))