sws-spark-dissemination-helper 0.0.190__tar.gz → 0.0.194__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (16) hide show
  1. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/PKG-INFO +1 -1
  2. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/pyproject.toml +1 -1
  3. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +23 -37
  4. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/.gitignore +0 -0
  5. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/LICENSE +0 -0
  6. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/README.md +0 -0
  7. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +0 -0
  8. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSDatatablesExportHelper.py +0 -0
  9. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py +0 -0
  10. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSPostgresSparkReader.py +0 -0
  11. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +0 -0
  12. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/__init__.py +0 -0
  13. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/constants.py +0 -0
  14. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/utils.py +0 -0
  15. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/tests/__init__.py +0 -0
  16. {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sws-spark-dissemination-helper
3
- Version: 0.0.190
3
+ Version: 0.0.194
4
4
  Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
5
5
  Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
6
6
  Author-email: Daniele Mansillo <danielemansillo@gmail.com>
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "sws-spark-dissemination-helper"
7
- version = "0.0.190"
7
+ version = "0.0.194"
8
8
  dependencies = [
9
9
  "annotated-types==0.7.0",
10
10
  "boto3>=1.40.0",
@@ -71,6 +71,9 @@ class SWSGoldIcebergSparkHelper:
71
71
  self.display_decimals_df = self.sws_postgres_spark_reader.read_pg_table(
72
72
  pg_table=DatasetDatatables.DISPLAY_DECIMALS.id,
73
73
  custom_schema=DatasetDatatables.DISPLAY_DECIMALS.schema,
74
+ ).filter(
75
+ (col("domain") == lit(self.domain_code))
76
+ | ((col("domain") == lit("DEFAULT")))
74
77
  )
75
78
 
76
79
  def _get_dim_time_flag_columns(self) -> Tuple[List[str], List[str], str, List[str]]:
@@ -110,7 +113,7 @@ class SWSGoldIcebergSparkHelper:
110
113
  value_column: str = "value",
111
114
  ) -> DataFrame:
112
115
 
113
- df = df.withColumn("unrounded_value", col(value_column))
116
+ df = df.withColumn("unrounded_value", col(value_column).cast("string"))
114
117
 
115
118
  general_default_decimals = (
116
119
  self.display_decimals_df.filter(col("domain") == lit("DEFAULT"))
@@ -166,11 +169,15 @@ class SWSGoldIcebergSparkHelper:
166
169
  display_decimals = int(rule["display_decimals"])
167
170
 
168
171
  # Count actual decimal places in the current value
169
- # If the value already has fewer decimals than target, skip rounding
172
+ # Handle both regular decimals and scientific notation
173
+ # Convert scientific notation to decimal format first
174
+ value_str_normalized = F.when(
175
+ F.col(value_column).cast("string").rlike("[eE]"),
176
+ F.format_number(F.col(value_column).cast("double"), 20),
177
+ ).otherwise(F.col(value_column).cast("string"))
178
+
170
179
  actual_decimals = F.length(
171
- F.regexp_extract(
172
- F.col(value_column).cast("string"), DECIMAL_PLACES_REGEX, 1
173
- )
180
+ F.regexp_extract(value_str_normalized, DECIMAL_PLACES_REGEX, 1)
174
181
  )
175
182
 
176
183
  # Add decimals condition
@@ -184,9 +191,10 @@ class SWSGoldIcebergSparkHelper:
184
191
  # Only apply rounding if current decimals >= target decimals
185
192
  if display_decimals > 6:
186
193
  # Cast to float and round
187
- rounded_value = F.round(
188
- col(value_column).cast(FloatType()), display_decimals
189
- )
194
+ # Cast to DECIMAL with precision 38 and decimals as display_decimals + 2
195
+ precision = 38
196
+ decimals = display_decimals
197
+ rounded_value = col(value_column).cast(DecimalType(precision, decimals))
190
198
  else:
191
199
  # Cast to DECIMAL with precision 38 and decimals as display_decimals + 2
192
200
  precision = 38
@@ -234,10 +242,14 @@ class SWSGoldIcebergSparkHelper:
234
242
  )
235
243
 
236
244
  # Only round if actual decimals >= target decimals, otherwise keep original
245
+ # Handle both regular decimals and scientific notation for default case
246
+ value_str_normalized_default = F.when(
247
+ F.col(value_column).cast("string").rlike("[eE]"),
248
+ F.format_number(F.col(value_column).cast("double"), 20),
249
+ ).otherwise(F.col(value_column).cast("string"))
250
+
237
251
  actual_decimals_default = F.length(
238
- F.regexp_extract(
239
- F.col(value_column).cast("string"), DECIMAL_PLACES_REGEX, 1
240
- )
252
+ F.regexp_extract(value_str_normalized_default, DECIMAL_PLACES_REGEX, 1)
241
253
  )
242
254
  default_rounded = F.when(
243
255
  actual_decimals_default >= lit(default_decimals), default_rounded
@@ -840,29 +852,3 @@ class SWSGoldIcebergSparkHelper:
840
852
  logging.debug(f"Tag with Added csv Table: {tag}")
841
853
 
842
854
  return df
843
-
844
-
845
- 1
846
- frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
847
- 1
848
- 1
849
- 2
850
- frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
851
- 2
852
- 1
853
- 1
854
- frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
855
- 1
856
- 1
857
- 2
858
- frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
859
- 2
860
- 1
861
- 1
862
- frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
863
- 1
864
- 1
865
- 1
866
- frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
867
- 1
868
- 1