sws-spark-dissemination-helper 0.0.154__tar.gz → 0.0.156__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/PKG-INFO +1 -1
  2. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/pyproject.toml +1 -1
  3. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/src/sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +12 -4
  4. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/.gitignore +0 -0
  5. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/LICENSE +0 -0
  6. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/README.md +0 -0
  7. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/src/sws_spark_dissemination_helper/SWSDatatablesExportHelper.py +0 -0
  8. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/src/sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py +0 -0
  9. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/src/sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +0 -0
  10. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/src/sws_spark_dissemination_helper/SWSPostgresSparkReader.py +0 -0
  11. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/src/sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +0 -0
  12. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/src/sws_spark_dissemination_helper/__init__.py +0 -0
  13. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/src/sws_spark_dissemination_helper/constants.py +0 -0
  14. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/src/sws_spark_dissemination_helper/utils.py +0 -0
  15. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/tests/__init__.py +0 -0
  16. {sws_spark_dissemination_helper-0.0.154 → sws_spark_dissemination_helper-0.0.156}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sws-spark-dissemination-helper
3
- Version: 0.0.154
3
+ Version: 0.0.156
4
4
  Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
5
5
  Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
6
6
  Author-email: Daniele Mansillo <danielemansillo@gmail.com>
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sws-spark-dissemination-helper"
7
- version = "0.0.154"
7
+ version = "0.0.156"
8
8
  dependencies = [
9
9
  "annotated-types==0.7.0",
10
10
  "boto3>=1.36.18",
@@ -347,18 +347,25 @@ class SWSBronzeIcebergSparkHelper:
347
347
  for dimension_column, df_dimension in zip(
348
348
  self.dim_columns_w_time, dfs_dimension_w_validity
349
349
  ):
350
+ logging.info(f"Joining dimension column: {dimension_column}")
351
+ logging.info(f"df_obs_denorm columns: {df_obs_denorm.columns}")
352
+ logging.info(
353
+ f"Is dimension {dimension_column} in the dataframe? {dimension_column in df_obs_denorm.columns}"
354
+ )
355
+ df_dimension.show(5)
350
356
  df_obs_denorm = (
351
357
  df_obs_denorm.alias("o")
352
358
  .join(
353
359
  F.broadcast(df_dimension.withColumnRenamed("id", "join_id")).alias(
354
360
  "d"
355
361
  ),
356
- col(f"{dimension_column}") == col("d.join_id"),
362
+ col(f"{dimension_column}") == col("d.code"),
357
363
  )
358
- .drop(f"{dimension_column}", "join_id")
359
- .withColumnRenamed("code", dimension_column)
364
+ .drop("code", "join_id")
360
365
  )
361
-
366
+ logging.info(f"After join count: {df_obs_denorm.count()}")
367
+
368
+ df_element_uom.show(5)
362
369
  df_obs_denorm = (
363
370
  df_obs_denorm.alias("d")
364
371
  .join(
@@ -368,6 +375,7 @@ class SWSBronzeIcebergSparkHelper:
368
375
  )
369
376
  .drop("element_code")
370
377
  )
378
+ logging.info(f"After uom count: {df_obs_denorm.count()}")
371
379
 
372
380
  return df_obs_denorm
373
381