sws-spark-dissemination-helper 0.0.179__py3-none-any.whl → 0.0.187__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -88,10 +88,13 @@ class SWSGoldIcebergSparkHelper:
88
88
  def apply_diss_flag_filter(self, df: DataFrame) -> DataFrame:
89
89
  return df.filter(col("diss_flag"))
90
90
 
91
- def keep_dim_val_attr_columns(self, df: DataFrame):
91
+ def keep_dim_val_attr_columns(
92
+ self, df: DataFrame, additional_columns: List[str] = []
93
+ ):
92
94
  cols_to_keep_sws = self.cols_to_keep_sws
93
- if "note" in df.columns:
94
- cols_to_keep_sws = cols_to_keep_sws + ["note"]
95
+ for additional_column in additional_columns:
96
+ if additional_column in df.columns:
97
+ cols_to_keep_sws = cols_to_keep_sws + [additional_column]
95
98
  if "unit_of_measure_symbol" in df.columns:
96
99
  cols_to_keep_sws = cols_to_keep_sws + ["unit_of_measure_symbol"]
97
100
  return df.select(*cols_to_keep_sws)
@@ -137,7 +140,7 @@ class SWSGoldIcebergSparkHelper:
137
140
  F.round(
138
141
  F.col("value").cast("FLOAT") * F.pow(10, F.col("display_decimals")), 0
139
142
  )
140
- / F.pow(10, F.col("dec")).cast("STRING"),
143
+ / F.pow(10, F.col("display_decimals")).cast("STRING"),
141
144
  )
142
145
 
143
146
  # F.round(
@@ -156,18 +159,26 @@ class SWSGoldIcebergSparkHelper:
156
159
  self.iceberg_tables.SILVER.iceberg_id
157
160
  )
158
161
 
159
- def gen_gold_sws_disseminated_data(self) -> DataFrame:
162
+ def gen_gold_sws_disseminated_data(
163
+ self, additional_columns: List[str] = []
164
+ ) -> DataFrame:
160
165
  return (
161
166
  self.read_silver_data()
162
167
  .transform(self.apply_diss_flag_filter)
163
- .transform(self.keep_dim_val_attr_columns)
168
+ .transform(self.keep_dim_val_attr_columns, additional_columns)
164
169
  )
165
170
 
166
- def gen_gold_sws_data(self) -> DataFrame:
167
- return self.read_bronze_data().transform(self.keep_dim_val_attr_columns)
171
+ def gen_gold_sws_data(self, additional_columns: List[str] = []) -> DataFrame:
172
+ return self.read_bronze_data().transform(
173
+ self.keep_dim_val_attr_columns, additional_columns
174
+ )
168
175
 
169
- def gen_gold_sws_validated_data(self) -> DataFrame:
170
- return self.read_silver_data().transform(self.keep_dim_val_attr_columns)
176
+ def gen_gold_sws_validated_data(
177
+ self, additional_columns: List[str] = []
178
+ ) -> DataFrame:
179
+ return self.read_silver_data().transform(
180
+ self.keep_dim_val_attr_columns, additional_columns
181
+ )
171
182
 
172
183
  def write_gold_sws_validated_data_to_iceberg_and_csv(
173
184
  self, df: DataFrame
@@ -468,7 +468,7 @@ class SWSPostgresSparkReader:
468
468
  correct_domain_filter, domain=domain_code, unique_columns=["code"]
469
469
  )
470
470
  for col_type in mapping_dim_col_name_type.values()
471
- if col_type != "other"
471
+ if col_type not in ("year", "other")
472
472
  }
473
473
 
474
474
  def import_diss_exceptions_datatable(
@@ -209,7 +209,7 @@ class SWSSilverIcebergSparkHelper:
209
209
  F.array_append(
210
210
  col("d.diss_note"),
211
211
  F.concat(
212
- col("sy.diss_note"),
212
+ col("sy.note"),
213
213
  lit(" from "),
214
214
  col("sy.old_code"),
215
215
  lit(" to "),
@@ -225,7 +225,7 @@ class SWSSilverIcebergSparkHelper:
225
225
  F.array_append(
226
226
  col("new_diss_note"),
227
227
  F.concat(
228
- col("ey.diss_note"),
228
+ col("ey.note"),
229
229
  lit(" from "),
230
230
  col("ey.old_code"),
231
231
  lit(" to "),
@@ -168,7 +168,7 @@ class DatasetTables:
168
168
  self.OBSERVATION = self.__SWSTable(
169
169
  postgres_id=f"{self.__dataset_id}.observation",
170
170
  iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.{self.__dataset_id}_observation",
171
- schema="id BIGINT, observation_coordinates BIGINT, version INT, value FLOAT, flag_obs_status STRING, flag_method STRING, created_on TIMESTAMP, created_by INT, replaced_on TIMESTAMP",
171
+ schema="id BIGINT, observation_coordinates BIGINT, version INT, value STRING, flag_obs_status STRING, flag_method STRING, created_on TIMESTAMP, created_by INT, replaced_on TIMESTAMP",
172
172
  )
173
173
  self.OBSERVATION_COORDINATE = self.__SWSTable(
174
174
  postgres_id=f"{self.__dataset_id}.observation_coordinate",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sws-spark-dissemination-helper
3
- Version: 0.0.179
3
+ Version: 0.0.187
4
4
  Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
5
5
  Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
6
6
  Author-email: Daniele Mansillo <danielemansillo@gmail.com>
@@ -49,7 +49,7 @@ Requires-Dist: pytz==2025.2
49
49
  Requires-Dist: requests==2.32.3
50
50
  Requires-Dist: s3transfer>=0.11.2
51
51
  Requires-Dist: six==1.17.0
52
- Requires-Dist: sws-api-client==2.3.0
52
+ Requires-Dist: sws-api-client==2.7.3
53
53
  Requires-Dist: typing-extensions>=4.12.2
54
54
  Requires-Dist: tzdata==2025.2
55
55
  Requires-Dist: urllib3==1.26.20
@@ -1,13 +1,13 @@
1
1
  sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=N0eQ2LXtpPeZQCWYi85sMLmpXRzLA2erECiba8tqOAY,29595
2
2
  sws_spark_dissemination_helper/SWSDatatablesExportHelper.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py,sha256=csqKyYglBkJSBvEkEa1_keHarZZAIJHaV0d64gGJy98,26379
4
- sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=IbJ9h1JKhuEMvfnkfUq2crrdzfa9BeuQualYVvFp1NA,27364
5
- sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=V_rH4UYoFZfMUc82U-KxeL_o8F44HnMHfLLXoyNxHxs,20016
6
- sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=EXpqPHbxld8MPShE6Vo8h4y1vpBt_BjMtS4RPJcPeTU,26355
4
+ sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=atQFiY5Mmo-rzHY7WVWg-Guvg8i1ZcaaoKE4ymTaKdE,27750
5
+ sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=qoO___xL1g1iH_KkJ0opLvtNJGU2Dm6bUn-jWem5v2U,20030
6
+ sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=3l5zkEWksnEC-R4mJi8JEHL3ylCMbkMD9a0qbdZQU5E,26345
7
7
  sws_spark_dissemination_helper/__init__.py,sha256=42TPbk7KxAud_qY3Sr_F4F7VjyofUlxEJkUXAFQsjRo,327
8
- sws_spark_dissemination_helper/constants.py,sha256=cVjTS3xbJNKz-1i7c1dJk2PcOZzQhvuHUp9i4PNIPh4,14055
8
+ sws_spark_dissemination_helper/constants.py,sha256=MzuC7pqsXF89r-FK7hhmWaZSk5x3GB_YPVSfuK3NYVY,14056
9
9
  sws_spark_dissemination_helper/utils.py,sha256=Ge8zXsUIcvFihALDNLF5kCu_tAdRQUE04xE6Yn9xQF4,22008
10
- sws_spark_dissemination_helper-0.0.179.dist-info/METADATA,sha256=lma0Z7whiF6IMbknHWIsgiI8otJCgfnPoCaI0Wx9Odo,2822
11
- sws_spark_dissemination_helper-0.0.179.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
- sws_spark_dissemination_helper-0.0.179.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
13
- sws_spark_dissemination_helper-0.0.179.dist-info/RECORD,,
10
+ sws_spark_dissemination_helper-0.0.187.dist-info/METADATA,sha256=PPrDi-8X1HkcAjYs92VJRaAvcf27I3Aw0wljIs1UMO8,2822
11
+ sws_spark_dissemination_helper-0.0.187.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ sws_spark_dissemination_helper-0.0.187.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
13
+ sws_spark_dissemination_helper-0.0.187.dist-info/RECORD,,