sws-spark-dissemination-helper 0.0.179__py3-none-any.whl → 0.0.187__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +21 -10
- sws_spark_dissemination_helper/SWSPostgresSparkReader.py +1 -1
- sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +2 -2
- sws_spark_dissemination_helper/constants.py +1 -1
- {sws_spark_dissemination_helper-0.0.179.dist-info → sws_spark_dissemination_helper-0.0.187.dist-info}/METADATA +2 -2
- {sws_spark_dissemination_helper-0.0.179.dist-info → sws_spark_dissemination_helper-0.0.187.dist-info}/RECORD +8 -8
- {sws_spark_dissemination_helper-0.0.179.dist-info → sws_spark_dissemination_helper-0.0.187.dist-info}/WHEEL +0 -0
- {sws_spark_dissemination_helper-0.0.179.dist-info → sws_spark_dissemination_helper-0.0.187.dist-info}/licenses/LICENSE +0 -0
|
@@ -88,10 +88,13 @@ class SWSGoldIcebergSparkHelper:
|
|
|
88
88
|
def apply_diss_flag_filter(self, df: DataFrame) -> DataFrame:
|
|
89
89
|
return df.filter(col("diss_flag"))
|
|
90
90
|
|
|
91
|
-
def keep_dim_val_attr_columns(
|
|
91
|
+
def keep_dim_val_attr_columns(
|
|
92
|
+
self, df: DataFrame, additional_columns: List[str] = []
|
|
93
|
+
):
|
|
92
94
|
cols_to_keep_sws = self.cols_to_keep_sws
|
|
93
|
-
|
|
94
|
-
|
|
95
|
+
for additional_column in additional_columns:
|
|
96
|
+
if additional_column in df.columns:
|
|
97
|
+
cols_to_keep_sws = cols_to_keep_sws + [additional_column]
|
|
95
98
|
if "unit_of_measure_symbol" in df.columns:
|
|
96
99
|
cols_to_keep_sws = cols_to_keep_sws + ["unit_of_measure_symbol"]
|
|
97
100
|
return df.select(*cols_to_keep_sws)
|
|
@@ -137,7 +140,7 @@ class SWSGoldIcebergSparkHelper:
|
|
|
137
140
|
F.round(
|
|
138
141
|
F.col("value").cast("FLOAT") * F.pow(10, F.col("display_decimals")), 0
|
|
139
142
|
)
|
|
140
|
-
/ F.pow(10, F.col("
|
|
143
|
+
/ F.pow(10, F.col("display_decimals")).cast("STRING"),
|
|
141
144
|
)
|
|
142
145
|
|
|
143
146
|
# F.round(
|
|
@@ -156,18 +159,26 @@ class SWSGoldIcebergSparkHelper:
|
|
|
156
159
|
self.iceberg_tables.SILVER.iceberg_id
|
|
157
160
|
)
|
|
158
161
|
|
|
159
|
-
def gen_gold_sws_disseminated_data(
|
|
162
|
+
def gen_gold_sws_disseminated_data(
|
|
163
|
+
self, additional_columns: List[str] = []
|
|
164
|
+
) -> DataFrame:
|
|
160
165
|
return (
|
|
161
166
|
self.read_silver_data()
|
|
162
167
|
.transform(self.apply_diss_flag_filter)
|
|
163
|
-
.transform(self.keep_dim_val_attr_columns)
|
|
168
|
+
.transform(self.keep_dim_val_attr_columns, additional_columns)
|
|
164
169
|
)
|
|
165
170
|
|
|
166
|
-
def gen_gold_sws_data(self) -> DataFrame:
|
|
167
|
-
return self.read_bronze_data().transform(
|
|
171
|
+
def gen_gold_sws_data(self, additional_columns: List[str] = []) -> DataFrame:
|
|
172
|
+
return self.read_bronze_data().transform(
|
|
173
|
+
self.keep_dim_val_attr_columns, additional_columns
|
|
174
|
+
)
|
|
168
175
|
|
|
169
|
-
def gen_gold_sws_validated_data(
|
|
170
|
-
|
|
176
|
+
def gen_gold_sws_validated_data(
|
|
177
|
+
self, additional_columns: List[str] = []
|
|
178
|
+
) -> DataFrame:
|
|
179
|
+
return self.read_silver_data().transform(
|
|
180
|
+
self.keep_dim_val_attr_columns, additional_columns
|
|
181
|
+
)
|
|
171
182
|
|
|
172
183
|
def write_gold_sws_validated_data_to_iceberg_and_csv(
|
|
173
184
|
self, df: DataFrame
|
|
@@ -468,7 +468,7 @@ class SWSPostgresSparkReader:
|
|
|
468
468
|
correct_domain_filter, domain=domain_code, unique_columns=["code"]
|
|
469
469
|
)
|
|
470
470
|
for col_type in mapping_dim_col_name_type.values()
|
|
471
|
-
if col_type
|
|
471
|
+
if col_type not in ("year", "other")
|
|
472
472
|
}
|
|
473
473
|
|
|
474
474
|
def import_diss_exceptions_datatable(
|
|
@@ -209,7 +209,7 @@ class SWSSilverIcebergSparkHelper:
|
|
|
209
209
|
F.array_append(
|
|
210
210
|
col("d.diss_note"),
|
|
211
211
|
F.concat(
|
|
212
|
-
col("sy.
|
|
212
|
+
col("sy.note"),
|
|
213
213
|
lit(" from "),
|
|
214
214
|
col("sy.old_code"),
|
|
215
215
|
lit(" to "),
|
|
@@ -225,7 +225,7 @@ class SWSSilverIcebergSparkHelper:
|
|
|
225
225
|
F.array_append(
|
|
226
226
|
col("new_diss_note"),
|
|
227
227
|
F.concat(
|
|
228
|
-
col("ey.
|
|
228
|
+
col("ey.note"),
|
|
229
229
|
lit(" from "),
|
|
230
230
|
col("ey.old_code"),
|
|
231
231
|
lit(" to "),
|
|
@@ -168,7 +168,7 @@ class DatasetTables:
|
|
|
168
168
|
self.OBSERVATION = self.__SWSTable(
|
|
169
169
|
postgres_id=f"{self.__dataset_id}.observation",
|
|
170
170
|
iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.{self.__dataset_id}_observation",
|
|
171
|
-
schema="id BIGINT, observation_coordinates BIGINT, version INT, value
|
|
171
|
+
schema="id BIGINT, observation_coordinates BIGINT, version INT, value STRING, flag_obs_status STRING, flag_method STRING, created_on TIMESTAMP, created_by INT, replaced_on TIMESTAMP",
|
|
172
172
|
)
|
|
173
173
|
self.OBSERVATION_COORDINATE = self.__SWSTable(
|
|
174
174
|
postgres_id=f"{self.__dataset_id}.observation_coordinate",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.187
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -49,7 +49,7 @@ Requires-Dist: pytz==2025.2
|
|
|
49
49
|
Requires-Dist: requests==2.32.3
|
|
50
50
|
Requires-Dist: s3transfer>=0.11.2
|
|
51
51
|
Requires-Dist: six==1.17.0
|
|
52
|
-
Requires-Dist: sws-api-client==2.3
|
|
52
|
+
Requires-Dist: sws-api-client==2.7.3
|
|
53
53
|
Requires-Dist: typing-extensions>=4.12.2
|
|
54
54
|
Requires-Dist: tzdata==2025.2
|
|
55
55
|
Requires-Dist: urllib3==1.26.20
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=N0eQ2LXtpPeZQCWYi85sMLmpXRzLA2erECiba8tqOAY,29595
|
|
2
2
|
sws_spark_dissemination_helper/SWSDatatablesExportHelper.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py,sha256=csqKyYglBkJSBvEkEa1_keHarZZAIJHaV0d64gGJy98,26379
|
|
4
|
-
sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=
|
|
5
|
-
sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=
|
|
6
|
-
sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=
|
|
4
|
+
sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=atQFiY5Mmo-rzHY7WVWg-Guvg8i1ZcaaoKE4ymTaKdE,27750
|
|
5
|
+
sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=qoO___xL1g1iH_KkJ0opLvtNJGU2Dm6bUn-jWem5v2U,20030
|
|
6
|
+
sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=3l5zkEWksnEC-R4mJi8JEHL3ylCMbkMD9a0qbdZQU5E,26345
|
|
7
7
|
sws_spark_dissemination_helper/__init__.py,sha256=42TPbk7KxAud_qY3Sr_F4F7VjyofUlxEJkUXAFQsjRo,327
|
|
8
|
-
sws_spark_dissemination_helper/constants.py,sha256=
|
|
8
|
+
sws_spark_dissemination_helper/constants.py,sha256=MzuC7pqsXF89r-FK7hhmWaZSk5x3GB_YPVSfuK3NYVY,14056
|
|
9
9
|
sws_spark_dissemination_helper/utils.py,sha256=Ge8zXsUIcvFihALDNLF5kCu_tAdRQUE04xE6Yn9xQF4,22008
|
|
10
|
-
sws_spark_dissemination_helper-0.0.
|
|
11
|
-
sws_spark_dissemination_helper-0.0.
|
|
12
|
-
sws_spark_dissemination_helper-0.0.
|
|
13
|
-
sws_spark_dissemination_helper-0.0.
|
|
10
|
+
sws_spark_dissemination_helper-0.0.187.dist-info/METADATA,sha256=PPrDi-8X1HkcAjYs92VJRaAvcf27I3Aw0wljIs1UMO8,2822
|
|
11
|
+
sws_spark_dissemination_helper-0.0.187.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
12
|
+
sws_spark_dissemination_helper-0.0.187.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
|
|
13
|
+
sws_spark_dissemination_helper-0.0.187.dist-info/RECORD,,
|
|
File without changes
|