sws-spark-dissemination-helper 0.0.99__py3-none-any.whl → 0.0.101__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -458,63 +458,27 @@ class SWSBronzeIcebergSparkHelper:
458
458
  self, dimensions: Dict[str, List[str]]
459
459
  ) -> DataFrame:
460
460
 
461
- refs = self.spark.sql(
462
- f"SELECT * FROM {self.iceberg_tables.BRONZE.iceberg_id}.refs"
463
- ).collect()
464
- logging.info(f"bronze refs: {refs}")
461
+ self.disseminated_tag_df = self.df_bronze
465
462
 
466
- create_branch_query = f"ALTER TABLE {self.iceberg_tables.BRONZE.iceberg_id}.`tag_{self.tag_name}` CREATE OR REPLACE BRANCH `diss_tag_{self.tag_name}`" # AS OF VERSION `{tag_name}`
467
- create_branch_query = f"ALTER TABLE {self.iceberg_tables.BRONZE.iceberg_id} CREATE OR REPLACE BRANCH `diss_tag_{self.tag_name}`"
468
- logging.info(f"create_branch_query: {create_branch_query}")
469
- create_branch_query_result = self.spark.sql(create_branch_query).collect()
470
-
471
- while (
472
- self.spark.sql(
473
- f"SELECT * FROM {self.iceberg_tables.BRONZE.iceberg_id}.refs"
474
- )
475
- .filter(
476
- (col("type") == lit("BRANCH"))
477
- & (col("name") == lit(f"diss_tag_{self.tag_name}"))
478
- )
479
- .count()
480
- ) == 0:
481
- logging.info(
482
- f"Waiting for the branch {self.tag_name} diss_tag_{self.tag_name} to be created"
483
- )
484
- time.sleep(2)
485
-
486
- logging.info(f"result of create_branch_query: {create_branch_query_result}")
487
-
488
- self.disseminated_tag_df = self.spark.read.option(
489
- "branch", f"`diss_tag_{self.tag_name}`"
490
- ).table(self.iceberg_tables.BRONZE.iceberg_id)
491
-
492
- logging.info(f"dimensions: {dimensions}")
493
463
  for dimension_name, codes in dimensions.items():
494
464
  logging.info(f"dimension_name: {dimension_name}")
495
465
  logging.info(f"codes: {codes}")
496
466
  if len(codes) != 0:
497
- # not_in_codes = ",".join([f"'{code}'" for code in codes])
498
- # delete_from_branch_query = f"DELETE FROM {self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}` WHERE {dimension_name} NOT IN ({not_in_codes})"
499
- # logging.info(f"delete_from_branch_query: {delete_from_branch_query}")
500
- # delete_from_branch_query_result = self.spark.sql(
501
- # delete_from_branch_query
502
- # ).collect()
503
-
504
- # logging.info(
505
- # f"result of delete_from_branch_query: {delete_from_branch_query_result}"
506
- # )
507
467
  self.disseminated_tag_df = self.disseminated_tag_df.filter(
508
468
  col(dimension_name).isin(codes)
509
469
  )
510
470
 
511
- # self.disseminated_tag_df = self.spark.read.option(
512
- # "branch", f"`diss_tag_{self.tag_name}`"
513
- # ).table(self.iceberg_tables.BRONZE.iceberg_id)
471
+ self.df_bronze.writeTo(
472
+ self.iceberg_tables.BRONZE_DISS_TAG.iceberg_id
473
+ ).createOrReplace()
474
+
475
+ logging.info(
476
+ f"Bronze disseminated tag table written to {self.iceberg_tables.BRONZE_DISS_TAG.iceberg_id}"
477
+ )
514
478
 
515
- self.disseminated_tag_df.writeTo(
516
- f"{self.iceberg_tables.BRONZE.iceberg_id}.`branch_diss_tag_{self.tag_name}`"
517
- ).overwritePartitions()
479
+ self.spark.sql(
480
+ f"ALTER TABLE {self.iceberg_tables.BRONZE_DISS_TAG.iceberg_id} CREATE TAG `{self.tag_name}`"
481
+ )
518
482
 
519
483
  disseminated_tag_df = self.disseminated_tag_df.withColumn(
520
484
  "metadata", F.to_json(col("metadata"))
@@ -523,7 +487,7 @@ class SWSBronzeIcebergSparkHelper:
523
487
  save_cache_csv(
524
488
  df=disseminated_tag_df,
525
489
  bucket=self.bucket,
526
- prefix=f"{self.iceberg_tables.BRONZE.csv_prefix}_disseminated_tag",
490
+ prefix=f"{self.iceberg_tables.BRONZE_DISS_TAG.csv_prefix}",
527
491
  tag_name=self.tag_name,
528
492
  )
529
493
 
@@ -542,8 +506,8 @@ class SWSBronzeIcebergSparkHelper:
542
506
  private=True,
543
507
  type=TableType.ICEBERG,
544
508
  database=IcebergDatabases.BRONZE_DATABASE,
545
- table=self.iceberg_tables.BRONZE.table,
546
- path=self.iceberg_tables.BRONZE.path,
509
+ table=self.iceberg_tables.BRONZE_DISS_TAG.table,
510
+ path=self.iceberg_tables.BRONZE_DISS_TAG.path,
547
511
  structure={
548
512
  "columns": self.disseminated_tag_df.schema.jsonValue()["fields"]
549
513
  },
@@ -561,7 +525,7 @@ class SWSBronzeIcebergSparkHelper:
561
525
  private=True,
562
526
  type=TableType.CSV,
563
527
  # TODO Correct the path in the origin library
564
- path=self.iceberg_tables.BRONZE.csv_path,
528
+ path=self.iceberg_tables.BRONZE_DISS_TAG.csv_path,
565
529
  structure={
566
530
  "columns": self.disseminated_tag_df.schema.jsonValue()["fields"]
567
531
  },
@@ -219,6 +219,7 @@ class IcebergTables:
219
219
  self.__tag_name = tag_name
220
220
 
221
221
  self.BRONZE = self._create_iceberg_table("BRONZE")
222
+ self.BRONZE_DISS_TAG = self._create_iceberg_table("BRONZE", suffix="diss_tag")
222
223
  self.SILVER = self._create_iceberg_table("SILVER", prefix=domain)
223
224
 
224
225
  # GOLD tables with specific suffixes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sws-spark-dissemination-helper
3
- Version: 0.0.99
3
+ Version: 0.0.101
4
4
  Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
5
5
  Project-URL: Repository, https://bitbucket.org/cioapps/sws-it-python-spark-dissemination-helper
6
6
  Author-email: Daniele Mansillo <danielemansillo@gmail.com>
@@ -1,11 +1,11 @@
1
- sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=GGW1cw9wYx33r9VonkHoJedtFfHffTzuY9x13p6ukc8,22091
1
+ sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=k79yVW14wnNTX1nLEZj4bqsmv9L43Hd5ILuAxFWTX2s,20146
2
2
  sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=ZC7hxkppo6qmfCc2z5vm2Y2iH1901F-rx9Er9cxuzP4,16037
3
3
  sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=ja7AbOfbmC_EXHCJk7UMDzzbA-LRxzPkaaUmuvcihJ8,17449
4
4
  sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=zEppNq5shiHZH2yt5faWGsb5QEmpAQS0ToIrG6fmv6o,22231
5
5
  sws_spark_dissemination_helper/__init__.py,sha256=Efjoe9V4vGXWVp-DY5P6NbRwIUr_zkZJkDmMi-lf5Bc,262
6
- sws_spark_dissemination_helper/constants.py,sha256=hpHHlbojShMWRfyIelXz6c5BqFzO48Oap1zmztlMMrs,11349
6
+ sws_spark_dissemination_helper/constants.py,sha256=yG_pUi2PVeFKz6pogAU2AGQ-wV4FHWRHOW77wE_XGL0,11436
7
7
  sws_spark_dissemination_helper/utils.py,sha256=6SzrXX0xhvynRyv-vRFDbc6V4UNe_RzKKETZAtefnhg,21341
8
- sws_spark_dissemination_helper-0.0.99.dist-info/METADATA,sha256=SmsirKuHhZixuyL-aC5IfJguzL6AkCcyhfff6T3vaUo,2823
9
- sws_spark_dissemination_helper-0.0.99.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- sws_spark_dissemination_helper-0.0.99.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
11
- sws_spark_dissemination_helper-0.0.99.dist-info/RECORD,,
8
+ sws_spark_dissemination_helper-0.0.101.dist-info/METADATA,sha256=wDEhvo5aNx0XYvV1kqUsG5UdPJ6rIDp8Hbk1zXvzCk8,2824
9
+ sws_spark_dissemination_helper-0.0.101.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
+ sws_spark_dissemination_helper-0.0.101.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
11
+ sws_spark_dissemination_helper-0.0.101.dist-info/RECORD,,