sws-spark-dissemination-helper 0.0.190__tar.gz → 0.0.194__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/PKG-INFO +1 -1
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/pyproject.toml +1 -1
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +23 -37
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/.gitignore +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/LICENSE +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/README.md +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSDatatablesExportHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSPostgresSparkReader.py +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/constants.py +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/src/sws_spark_dissemination_helper/utils.py +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/tests/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/tests/test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.194
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -71,6 +71,9 @@ class SWSGoldIcebergSparkHelper:
|
|
|
71
71
|
self.display_decimals_df = self.sws_postgres_spark_reader.read_pg_table(
|
|
72
72
|
pg_table=DatasetDatatables.DISPLAY_DECIMALS.id,
|
|
73
73
|
custom_schema=DatasetDatatables.DISPLAY_DECIMALS.schema,
|
|
74
|
+
).filter(
|
|
75
|
+
(col("domain") == lit(self.domain_code))
|
|
76
|
+
| ((col("domain") == lit("DEFAULT")))
|
|
74
77
|
)
|
|
75
78
|
|
|
76
79
|
def _get_dim_time_flag_columns(self) -> Tuple[List[str], List[str], str, List[str]]:
|
|
@@ -110,7 +113,7 @@ class SWSGoldIcebergSparkHelper:
|
|
|
110
113
|
value_column: str = "value",
|
|
111
114
|
) -> DataFrame:
|
|
112
115
|
|
|
113
|
-
df = df.withColumn("unrounded_value", col(value_column))
|
|
116
|
+
df = df.withColumn("unrounded_value", col(value_column).cast("string"))
|
|
114
117
|
|
|
115
118
|
general_default_decimals = (
|
|
116
119
|
self.display_decimals_df.filter(col("domain") == lit("DEFAULT"))
|
|
@@ -166,11 +169,15 @@ class SWSGoldIcebergSparkHelper:
|
|
|
166
169
|
display_decimals = int(rule["display_decimals"])
|
|
167
170
|
|
|
168
171
|
# Count actual decimal places in the current value
|
|
169
|
-
#
|
|
172
|
+
# Handle both regular decimals and scientific notation
|
|
173
|
+
# Convert scientific notation to decimal format first
|
|
174
|
+
value_str_normalized = F.when(
|
|
175
|
+
F.col(value_column).cast("string").rlike("[eE]"),
|
|
176
|
+
F.format_number(F.col(value_column).cast("double"), 20),
|
|
177
|
+
).otherwise(F.col(value_column).cast("string"))
|
|
178
|
+
|
|
170
179
|
actual_decimals = F.length(
|
|
171
|
-
F.regexp_extract(
|
|
172
|
-
F.col(value_column).cast("string"), DECIMAL_PLACES_REGEX, 1
|
|
173
|
-
)
|
|
180
|
+
F.regexp_extract(value_str_normalized, DECIMAL_PLACES_REGEX, 1)
|
|
174
181
|
)
|
|
175
182
|
|
|
176
183
|
# Add decimals condition
|
|
@@ -184,9 +191,10 @@ class SWSGoldIcebergSparkHelper:
|
|
|
184
191
|
# Only apply rounding if current decimals >= target decimals
|
|
185
192
|
if display_decimals > 6:
|
|
186
193
|
# Cast to float and round
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
194
|
+
# Cast to DECIMAL with precision 38 and decimals as display_decimals + 2
|
|
195
|
+
precision = 38
|
|
196
|
+
decimals = display_decimals
|
|
197
|
+
rounded_value = col(value_column).cast(DecimalType(precision, decimals))
|
|
190
198
|
else:
|
|
191
199
|
# Cast to DECIMAL with precision 38 and decimals as display_decimals + 2
|
|
192
200
|
precision = 38
|
|
@@ -234,10 +242,14 @@ class SWSGoldIcebergSparkHelper:
|
|
|
234
242
|
)
|
|
235
243
|
|
|
236
244
|
# Only round if actual decimals >= target decimals, otherwise keep original
|
|
245
|
+
# Handle both regular decimals and scientific notation for default case
|
|
246
|
+
value_str_normalized_default = F.when(
|
|
247
|
+
F.col(value_column).cast("string").rlike("[eE]"),
|
|
248
|
+
F.format_number(F.col(value_column).cast("double"), 20),
|
|
249
|
+
).otherwise(F.col(value_column).cast("string"))
|
|
250
|
+
|
|
237
251
|
actual_decimals_default = F.length(
|
|
238
|
-
F.regexp_extract(
|
|
239
|
-
F.col(value_column).cast("string"), DECIMAL_PLACES_REGEX, 1
|
|
240
|
-
)
|
|
252
|
+
F.regexp_extract(value_str_normalized_default, DECIMAL_PLACES_REGEX, 1)
|
|
241
253
|
)
|
|
242
254
|
default_rounded = F.when(
|
|
243
255
|
actual_decimals_default >= lit(default_decimals), default_rounded
|
|
@@ -840,29 +852,3 @@ class SWSGoldIcebergSparkHelper:
|
|
|
840
852
|
logging.debug(f"Tag with Added csv Table: {tag}")
|
|
841
853
|
|
|
842
854
|
return df
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
1
|
|
846
|
-
frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
|
|
847
|
-
1
|
|
848
|
-
1
|
|
849
|
-
2
|
|
850
|
-
frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
|
|
851
|
-
2
|
|
852
|
-
1
|
|
853
|
-
1
|
|
854
|
-
frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
|
|
855
|
-
1
|
|
856
|
-
1
|
|
857
|
-
2
|
|
858
|
-
frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
|
|
859
|
-
2
|
|
860
|
-
1
|
|
861
|
-
1
|
|
862
|
-
frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
|
|
863
|
-
1
|
|
864
|
-
1
|
|
865
|
-
1
|
|
866
|
-
frozenset({"1", "2", "6", "7", "5", "8", "0", "4", "3", "9"})
|
|
867
|
-
1
|
|
868
|
-
1
|
{sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/.gitignore
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/tests/__init__.py
RENAMED
|
File without changes
|
{sws_spark_dissemination_helper-0.0.190 → sws_spark_dissemination_helper-0.0.194}/tests/test.py
RENAMED
|
File without changes
|