sws-spark-dissemination-helper 0.0.179__py3-none-any.whl → 0.0.181__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +21 -10
- {sws_spark_dissemination_helper-0.0.179.dist-info → sws_spark_dissemination_helper-0.0.181.dist-info}/METADATA +1 -1
- {sws_spark_dissemination_helper-0.0.179.dist-info → sws_spark_dissemination_helper-0.0.181.dist-info}/RECORD +5 -5
- {sws_spark_dissemination_helper-0.0.179.dist-info → sws_spark_dissemination_helper-0.0.181.dist-info}/WHEEL +0 -0
- {sws_spark_dissemination_helper-0.0.179.dist-info → sws_spark_dissemination_helper-0.0.181.dist-info}/licenses/LICENSE +0 -0
|
@@ -88,10 +88,13 @@ class SWSGoldIcebergSparkHelper:
|
|
|
88
88
|
def apply_diss_flag_filter(self, df: DataFrame) -> DataFrame:
|
|
89
89
|
return df.filter(col("diss_flag"))
|
|
90
90
|
|
|
91
|
-
def keep_dim_val_attr_columns(
|
|
91
|
+
def keep_dim_val_attr_columns(
|
|
92
|
+
self, df: DataFrame, additional_columns: List[str] = []
|
|
93
|
+
):
|
|
92
94
|
cols_to_keep_sws = self.cols_to_keep_sws
|
|
93
|
-
|
|
94
|
-
|
|
95
|
+
for additional_column in additional_columns:
|
|
96
|
+
if additional_column in df.columns:
|
|
97
|
+
cols_to_keep_sws = cols_to_keep_sws + [additional_column]
|
|
95
98
|
if "unit_of_measure_symbol" in df.columns:
|
|
96
99
|
cols_to_keep_sws = cols_to_keep_sws + ["unit_of_measure_symbol"]
|
|
97
100
|
return df.select(*cols_to_keep_sws)
|
|
@@ -137,7 +140,7 @@ class SWSGoldIcebergSparkHelper:
|
|
|
137
140
|
F.round(
|
|
138
141
|
F.col("value").cast("FLOAT") * F.pow(10, F.col("display_decimals")), 0
|
|
139
142
|
)
|
|
140
|
-
/ F.pow(10, F.col("
|
|
143
|
+
/ F.pow(10, F.col("display_decimals")).cast("STRING"),
|
|
141
144
|
)
|
|
142
145
|
|
|
143
146
|
# F.round(
|
|
@@ -156,18 +159,26 @@ class SWSGoldIcebergSparkHelper:
|
|
|
156
159
|
self.iceberg_tables.SILVER.iceberg_id
|
|
157
160
|
)
|
|
158
161
|
|
|
159
|
-
def gen_gold_sws_disseminated_data(
|
|
162
|
+
def gen_gold_sws_disseminated_data(
|
|
163
|
+
self, additional_columns: List[str] = []
|
|
164
|
+
) -> DataFrame:
|
|
160
165
|
return (
|
|
161
166
|
self.read_silver_data()
|
|
162
167
|
.transform(self.apply_diss_flag_filter)
|
|
163
|
-
.transform(self.keep_dim_val_attr_columns)
|
|
168
|
+
.transform(self.keep_dim_val_attr_columns, additional_columns)
|
|
164
169
|
)
|
|
165
170
|
|
|
166
|
-
def gen_gold_sws_data(self) -> DataFrame:
|
|
167
|
-
return self.read_bronze_data().transform(
|
|
171
|
+
def gen_gold_sws_data(self, additional_columns: List[str] = []) -> DataFrame:
|
|
172
|
+
return self.read_bronze_data().transform(
|
|
173
|
+
self.keep_dim_val_attr_columns, additional_columns
|
|
174
|
+
)
|
|
168
175
|
|
|
169
|
-
def gen_gold_sws_validated_data(
|
|
170
|
-
|
|
176
|
+
def gen_gold_sws_validated_data(
|
|
177
|
+
self, additional_columns: List[str] = []
|
|
178
|
+
) -> DataFrame:
|
|
179
|
+
return self.read_silver_data().transform(
|
|
180
|
+
self.keep_dim_val_attr_columns, additional_columns
|
|
181
|
+
)
|
|
171
182
|
|
|
172
183
|
def write_gold_sws_validated_data_to_iceberg_and_csv(
|
|
173
184
|
self, df: DataFrame
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.181
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=N0eQ2LXtpPeZQCWYi85sMLmpXRzLA2erECiba8tqOAY,29595
|
|
2
2
|
sws_spark_dissemination_helper/SWSDatatablesExportHelper.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py,sha256=csqKyYglBkJSBvEkEa1_keHarZZAIJHaV0d64gGJy98,26379
|
|
4
|
-
sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=
|
|
4
|
+
sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=atQFiY5Mmo-rzHY7WVWg-Guvg8i1ZcaaoKE4ymTaKdE,27750
|
|
5
5
|
sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=V_rH4UYoFZfMUc82U-KxeL_o8F44HnMHfLLXoyNxHxs,20016
|
|
6
6
|
sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=EXpqPHbxld8MPShE6Vo8h4y1vpBt_BjMtS4RPJcPeTU,26355
|
|
7
7
|
sws_spark_dissemination_helper/__init__.py,sha256=42TPbk7KxAud_qY3Sr_F4F7VjyofUlxEJkUXAFQsjRo,327
|
|
8
8
|
sws_spark_dissemination_helper/constants.py,sha256=cVjTS3xbJNKz-1i7c1dJk2PcOZzQhvuHUp9i4PNIPh4,14055
|
|
9
9
|
sws_spark_dissemination_helper/utils.py,sha256=Ge8zXsUIcvFihALDNLF5kCu_tAdRQUE04xE6Yn9xQF4,22008
|
|
10
|
-
sws_spark_dissemination_helper-0.0.
|
|
11
|
-
sws_spark_dissemination_helper-0.0.
|
|
12
|
-
sws_spark_dissemination_helper-0.0.
|
|
13
|
-
sws_spark_dissemination_helper-0.0.
|
|
10
|
+
sws_spark_dissemination_helper-0.0.181.dist-info/METADATA,sha256=gszaR8nx1Aj_dWKKiUsQbCVpYIUZvIkq0BJ3DlfxYyk,2822
|
|
11
|
+
sws_spark_dissemination_helper-0.0.181.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
12
|
+
sws_spark_dissemination_helper-0.0.181.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
|
|
13
|
+
sws_spark_dissemination_helper-0.0.181.dist-info/RECORD,,
|
|
File without changes
|