sws-spark-dissemination-helper 0.0.93__py3-none-any.whl → 0.0.95__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -447,6 +447,9 @@ class SWSBronzeIcebergSparkHelper:
447
447
  def write_bronze_disseminated_tag_data_to_iceberg_and_csv(
448
448
  self, dimensions: Dict[str, List[str]]
449
449
  ) -> DataFrame:
450
+
451
+ refs = self.spark.sql(f"SELECT * FROM {self.iceberg_tables.BRONZE.iceberg_id}.refs").collect()
452
+ logging.info(f"bronze refs: {refs}")
450
453
 
451
454
  create_branch_query = f"ALTER TABLE {self.iceberg_tables.BRONZE.iceberg_id}.`tag_{self.tag_name}` CREATE OR REPLACE BRANCH `diss_tag_{self.tag_name}`" # AS OF VERSION `{tag_name}`
452
455
  logging.info(f"create_branch_query: {create_branch_query}")
@@ -454,25 +457,36 @@ class SWSBronzeIcebergSparkHelper:
454
457
 
455
458
  logging.info(f"result of create_branch_query: {create_branch_query_result}")
456
459
 
460
+ self.disseminated_tag_df = self.spark.read.option(
461
+ "branch", f"`diss_tag_{self.tag_name}`"
462
+ ).table(self.iceberg_tables.BRONZE.iceberg_id)
463
+
457
464
  logging.info(f"dimensions: {dimensions}")
458
465
  for dimension_name, codes in dimensions.items():
459
466
  logging.info(f"dimension_name: {dimension_name}")
460
467
  logging.info(f"codes: {codes}")
461
468
  if len(codes) != 0:
462
- not_in_codes = ",".join([f"'{code}'" for code in codes])
463
- delete_from_branch_query = f"DELETE FROM {self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}` WHERE {dimension_name} NOT IN ({not_in_codes})"
464
- logging.info(f"delete_from_branch_query: {delete_from_branch_query}")
465
- delete_from_branch_query_result = self.spark.sql(
466
- delete_from_branch_query
467
- ).collect()
468
-
469
- logging.info(
470
- f"result of delete_from_branch_query: {delete_from_branch_query_result}"
469
+ # not_in_codes = ",".join([f"'{code}'" for code in codes])
470
+ # delete_from_branch_query = f"DELETE FROM {self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}` WHERE {dimension_name} NOT IN ({not_in_codes})"
471
+ # logging.info(f"delete_from_branch_query: {delete_from_branch_query}")
472
+ # delete_from_branch_query_result = self.spark.sql(
473
+ # delete_from_branch_query
474
+ # ).collect()
475
+
476
+ # logging.info(
477
+ # f"result of delete_from_branch_query: {delete_from_branch_query_result}"
478
+ # )
479
+ self.disseminated_tag_df = self.disseminated_tag_df.filter(
480
+ col(dimension_name).isin(codes)
471
481
  )
472
482
 
473
- self.disseminated_tag_df = self.spark.read.option(
474
- "branch", f"`diss_tag_{self.tag_name}`"
475
- ).table(self.iceberg_tables.BRONZE.iceberg_id)
483
+ # self.disseminated_tag_df = self.spark.read.option(
484
+ # "branch", f"`diss_tag_{self.tag_name}`"
485
+ # ).table(self.iceberg_tables.BRONZE.iceberg_id)
486
+
487
+ self.disseminated_tag_df.write.format("iceberg").mode("overwrite").save(
488
+ f"{self.iceberg_tables.BRONZE.iceberg_id}.`diss_tag_{self.tag_name}`"
489
+ )
476
490
 
477
491
  disseminated_tag_df = self.disseminated_tag_df.withColumn(
478
492
  "metadata", F.to_json(col("metadata"))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sws-spark-dissemination-helper
3
- Version: 0.0.93
3
+ Version: 0.0.95
4
4
  Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
5
5
  Project-URL: Repository, https://bitbucket.org/cioapps/sws-it-python-spark-dissemination-helper
6
6
  Author-email: Daniele Mansillo <danielemansillo@gmail.com>
@@ -1,11 +1,11 @@
1
- sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=ZPCpHgPVCsf7-7tWl6DDWgnXLkS02RoCvsomO3TmQ24,20418
1
+ sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=ja7zXIJNYdzvSiHdMB8oeKSQkLVu-FSUhy0vn_myhnU,21086
2
2
  sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=ZC7hxkppo6qmfCc2z5vm2Y2iH1901F-rx9Er9cxuzP4,16037
3
3
  sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=ja7AbOfbmC_EXHCJk7UMDzzbA-LRxzPkaaUmuvcihJ8,17449
4
4
  sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=zEppNq5shiHZH2yt5faWGsb5QEmpAQS0ToIrG6fmv6o,22231
5
5
  sws_spark_dissemination_helper/__init__.py,sha256=Efjoe9V4vGXWVp-DY5P6NbRwIUr_zkZJkDmMi-lf5Bc,262
6
6
  sws_spark_dissemination_helper/constants.py,sha256=hpHHlbojShMWRfyIelXz6c5BqFzO48Oap1zmztlMMrs,11349
7
7
  sws_spark_dissemination_helper/utils.py,sha256=6SzrXX0xhvynRyv-vRFDbc6V4UNe_RzKKETZAtefnhg,21341
8
- sws_spark_dissemination_helper-0.0.93.dist-info/METADATA,sha256=y1PL3ZygwfoyBxglsrNeP6IZvaUGTYCM03RuIjrqDMc,2823
9
- sws_spark_dissemination_helper-0.0.93.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- sws_spark_dissemination_helper-0.0.93.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
11
- sws_spark_dissemination_helper-0.0.93.dist-info/RECORD,,
8
+ sws_spark_dissemination_helper-0.0.95.dist-info/METADATA,sha256=5oamHa9qI0T6_rNyZc4NLUM4kPUaCGfmHOypaNyky8s,2823
9
+ sws_spark_dissemination_helper-0.0.95.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ sws_spark_dissemination_helper-0.0.95.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
11
+ sws_spark_dissemination_helper-0.0.95.dist-info/RECORD,,