sws-spark-dissemination-helper 0.0.93__py3-none-any.whl → 0.0.183__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@ from pyspark.sql.window import Window
10
10
  from sws_api_client import Tags
11
11
  from sws_api_client.tags import BaseDisseminatedTagTable, TableLayer, TableType
12
12
 
13
- from .constants import IcebergDatabases, IcebergTables
13
+ from .constants import IcebergDatabases, IcebergTables, DatasetDatatables
14
14
  from .SWSPostgresSparkReader import SWSPostgresSparkReader
15
15
  from .utils import (
16
16
  get_or_create_tag,
@@ -103,16 +103,17 @@ class SWSSilverIcebergSparkHelper:
103
103
  # The diss_flag column is needed to initialize the condition expression
104
104
  # The note column will contain the eventual reasons why diss_flag has been set to false
105
105
  return df.withColumn("diss_flag", lit(True)).withColumn(
106
- "note", lit([]).cast(ArrayType(StringType()))
106
+ "diss_note", lit([]).cast(ArrayType(StringType()))
107
107
  )
108
108
 
109
109
  def read_bronze_data(self) -> DataFrame:
110
110
  return self.spark.read.option("tag", self.tag_name).table(
111
111
  self.iceberg_tables.BRONZE.iceberg_id
112
112
  )
113
+
113
114
  def read_bronze_diss_tag_data(self) -> DataFrame:
114
- return self.spark.read.option("branch", f"diss_tag_{self.tag_name}").table(
115
- self.iceberg_tables.BRONZE.iceberg_id
115
+ return self.spark.read.option("tag", self.tag_name).table(
116
+ self.iceberg_tables.BRONZE_DISS_TAG.iceberg_id
116
117
  )
117
118
 
118
119
  def _get_dim_time_flag_columns(self) -> Tuple[List[str], List[str], str, List[str]]:
@@ -162,6 +163,99 @@ class SWSSilverIcebergSparkHelper:
162
163
 
163
164
  logging.info(f"Checking time validity for {col_name} of type {col_type}")
164
165
 
166
+ if col_type == "area":
167
+ logging.info(
168
+ f'Changing start and end year according to "{DatasetDatatables.MAPPING_CODE_CORRECTION.name}"'
169
+ )
170
+ df_start_year_correction = self.df_mapping_code_correction.filter(
171
+ col("var_type") == lit("start_year")
172
+ )
173
+ df_end_year_correction = self.df_mapping_code_correction.filter(
174
+ col("var_type") == lit("end_year")
175
+ )
176
+
177
+ original_col_order = df.columns
178
+ cols_to_select = df.columns
179
+ col_name_lower = col_name.lower()
180
+ cols_to_select = [
181
+ column
182
+ for column in cols_to_select
183
+ if column.lower()
184
+ not in (
185
+ "diss_note",
186
+ f"{col_name_lower}_start_date",
187
+ f"{col_name_lower}_end_date",
188
+ )
189
+ ]
190
+
191
+ df = (
192
+ df.alias("d")
193
+ .join(
194
+ F.broadcast(df_start_year_correction).alias("sy"),
195
+ on=col(f"d.{col_name}") == col("sy.mapping_type"),
196
+ how="left",
197
+ )
198
+ .join(
199
+ F.broadcast(df_end_year_correction).alias("ey"),
200
+ on=col(f"d.{col_name}") == col("ey.mapping_type"),
201
+ how="left",
202
+ )
203
+ .withColumn("valid_new_start_year", col("sy.new_code").isNotNull())
204
+ .withColumn("valid_new_end_year", col("ey.new_code").isNotNull())
205
+ .withColumn(
206
+ "new_diss_note",
207
+ F.when(
208
+ col("valid_new_start_year"),
209
+ F.array_append(
210
+ col("d.diss_note"),
211
+ F.concat(
212
+ col("sy.note"),
213
+ lit(" from "),
214
+ col("sy.old_code"),
215
+ lit(" to "),
216
+ col("sy.new_code"),
217
+ ),
218
+ ),
219
+ ).otherwise(col("d.diss_note")),
220
+ )
221
+ .withColumn(
222
+ "new_diss_note",
223
+ F.when(
224
+ col("valid_new_end_year"),
225
+ F.array_append(
226
+ col("new_diss_note"),
227
+ F.concat(
228
+ col("ey.note"),
229
+ lit(" from "),
230
+ col("ey.old_code"),
231
+ lit(" to "),
232
+ col("ey.new_code"),
233
+ ),
234
+ ),
235
+ ).otherwise(col("new_diss_note")),
236
+ )
237
+ .withColumn(
238
+ f"new_{col_name}_start_date",
239
+ F.when(
240
+ col("valid_new_start_year"), F.to_date(col("sy.new_code"))
241
+ ).otherwise(col(f"d.{col_name}_start_date")),
242
+ )
243
+ .withColumn(
244
+ f"new_{col_name}_end_date",
245
+ F.when(
246
+ col("valid_new_end_year"),
247
+ F.to_date(F.concat(col("ey.new_code"), lit("-12-31"))),
248
+ ).otherwise(col(f"d.{col_name}_end_date")),
249
+ )
250
+ .select(
251
+ *cols_to_select,
252
+ col("new_diss_note").alias("diss_note"),
253
+ col(f"new_{col_name}_start_date").alias(f"{col_name}_start_date"),
254
+ col(f"new_{col_name}_end_date").alias(f"{col_name}_end_date"),
255
+ )
256
+ .select(*original_col_order)
257
+ )
258
+
165
259
  # Iterate through columns and build conditions dynamically
166
260
  start_date_condition = col(f"{col_name}_start_date").isNull() | (
167
261
  col(f"{col_name}_start_date") <= col(f"{self.time_column}_start_date")
@@ -176,15 +270,15 @@ class SWSSilverIcebergSparkHelper:
176
270
  start_date_condition & end_date_condition,
177
271
  )
178
272
  .withColumn("diss_flag", col("diss_flag") & col("condition_result"))
179
- # In case the condition is satisfied update diss_flag accordingly and append a note indicating the reason for the observation exclusion from the dissemination
273
+ # In case the condition is satisfied update diss_flag accordingly and append a diss_note indicating the reason for the observation exclusion from the dissemination
180
274
  .withColumn(
181
- "note",
275
+ "diss_note",
182
276
  F.when(
183
277
  ~col("condition_result"),
184
278
  F.array_append(
185
- col("note"), lit(f"{col_type} out of time validity range")
279
+ col("diss_note"), lit(f"{col_type} out of time validity range")
186
280
  ),
187
- ).otherwise(col("note")),
281
+ ).otherwise(col("diss_note")),
188
282
  )
189
283
  .drop("condition_result")
190
284
  )
@@ -296,7 +390,7 @@ class SWSSilverIcebergSparkHelper:
296
390
  col_name (str): The DataFrame column name on which to apply the filter
297
391
 
298
392
  Returns:
299
- DataFrame: The DataFrame with updated `diss_flag` and `note` columns based on the check outcome
393
+ DataFrame: The DataFrame with updated `diss_flag` and `diss_note` columns based on the check outcome
300
394
  """
301
395
 
302
396
  # Remove the duplicates that may be in the tables
@@ -334,14 +428,14 @@ class SWSSilverIcebergSparkHelper:
334
428
  col("diss_flag") & col("condition_result"),
335
429
  )
336
430
  .withColumn(
337
- "note",
431
+ "diss_note",
338
432
  F.when(
339
433
  ~col("condition_result"),
340
434
  F.array_append(
341
- col("note"),
435
+ col("diss_note"),
342
436
  lit(f"{col_type} not disseminated for this domain"),
343
437
  ),
344
- ).otherwise(col("note")),
438
+ ).otherwise(col("diss_note")),
345
439
  )
346
440
  .drop("condition_result")
347
441
  )
@@ -428,16 +522,16 @@ class SWSSilverIcebergSparkHelper:
428
522
  col("diss_flag") & col("condition_result"),
429
523
  )
430
524
  .withColumn(
431
- "note",
525
+ "diss_note",
432
526
  F.when(
433
527
  ~col("condition_result"),
434
528
  F.array_append(
435
- col("note"),
529
+ col("diss_note"),
436
530
  lit(
437
531
  f"not disseminated according to exception with note: {row_exception['note']}"
438
532
  ),
439
533
  ),
440
- ).otherwise(col("note")),
534
+ ).otherwise(col("diss_note")),
441
535
  )
442
536
  .drop("condition_result")
443
537
  )
@@ -522,7 +616,7 @@ class SWSSilverIcebergSparkHelper:
522
616
 
523
617
  df = (
524
618
  df.withColumn("metadata", F.to_json(col("metadata")))
525
- .withColumn("note", F.to_json(col("note")))
619
+ .withColumn("diss_note", F.to_json(col("diss_note")))
526
620
  .coalesce(1)
527
621
  )
528
622
 
@@ -551,6 +645,13 @@ class SWSSilverIcebergSparkHelper:
551
645
  table=self.iceberg_tables.SILVER.table,
552
646
  path=self.iceberg_tables.SILVER.path,
553
647
  structure={"columns": df.schema.jsonValue()["fields"]},
648
+ pinned_columns=[
649
+ *self.dim_columns_w_time,
650
+ "value",
651
+ *self.flag_columns,
652
+ "diss_flag",
653
+ "diss_note",
654
+ ],
554
655
  )
555
656
  tag = upsert_disseminated_table(
556
657
  sws_tags=tags,
@@ -2,3 +2,4 @@ from .SWSPostgresSparkReader import SWSPostgresSparkReader
2
2
  from .SWSBronzeIcebergSparkHelper import SWSBronzeIcebergSparkHelper
3
3
  from .SWSSilverIcebergSparkHelper import SWSSilverIcebergSparkHelper
4
4
  from .SWSGoldIcebergSparkHelper import SWSGoldIcebergSparkHelper
5
+ from .SWSEasyIcebergSparkHelper import SWSEasyIcebergSparkHelper
@@ -1,3 +1,5 @@
1
+ from typing import List
2
+
1
3
  from pyspark.sql.functions import col, lit
2
4
 
3
5
  SPARK_POSTGRES_DRIVER = "org.postgresql.Driver"
@@ -34,26 +36,70 @@ class DomainFilters:
34
36
  class DatasetDatatables:
35
37
 
36
38
  class __SWSDatatable:
37
- def __init__(self, id: str, name: str, schema: str):
39
+ def __init__(
40
+ self, id: str, name: str, schema: str, join_columns: List[str] = []
41
+ ):
38
42
  self.id = id
43
+ self.iceberg_id = f"{IcebergDatabases.BRONZE_DATABASE}.{id.split('.')[1]}"
39
44
  self.name = name
40
45
  self.schema = schema
46
+ self.join_columns = join_columns
47
+
48
+ # Aggregation Tables
49
+ AGGREGATES_COMPOSITION = __SWSDatatable(
50
+ id="datatables.aggregates_composition",
51
+ name="Aggregation - Composition",
52
+ schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, aggregation_type STRING, group_code STRING, child_code STRING, group_name STRING, child_name STRING, link_code STRING, factor STRING",
53
+ )
54
+ AGGREGATES_ELEMENTS = __SWSDatatable(
55
+ id="datatables.aggregates_elements",
56
+ name="Aggregation - Aggregates per elements",
57
+ schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, element STRING, aggregation_type STRING, code STRING",
58
+ )
41
59
 
42
60
  # Dissemination Tables
43
61
  DISSEMINATION_TYPE_LIST = __SWSDatatable(
44
62
  id="datatables.dissemination_{type}_list",
45
63
  name="Dissemination - {type} list",
46
64
  schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, code STRING, name STRING, aggregation_type STRING, dissemination BOOLEAN, aggregation BOOLEAN",
65
+ join_columns=["domain", "code"],
47
66
  )
48
67
  DISSEMINATION_EXCEPTIONS = __SWSDatatable(
49
68
  id="datatables.dissemination_exception",
50
69
  name="Dissemination - Exceptions",
51
70
  schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, dim1_code STRING, dim2_code STRING, dim3_code STRING, dim4_code STRING, dim5_code STRING, dim6_code STRING, dim7_code STRING, status_flag STRING, method_flag STRING, dissemination BOOLEAN, aggregation BOOLEAN, note STRING",
71
+ join_columns=[
72
+ "domain",
73
+ " dim1_code",
74
+ " dim2_code",
75
+ " dim3_code",
76
+ " dim4_code",
77
+ " dim5_code",
78
+ " dim6_code",
79
+ " dim7_code",
80
+ " status_flag",
81
+ " method_flag",
82
+ ],
83
+ )
84
+ DISPLAY_DECIMALS = __SWSDatatable(
85
+ id="datatables.display_decimals",
86
+ name="Dissemination - Display Decimals",
87
+ schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, column_1_name STRING, column_1_value STRING, column_2_name STRING, column_2_value STRING, display_decimals STRING",
88
+ join_columns=[
89
+ "domain",
90
+ "column_1_name",
91
+ "column_1_value",
92
+ "column_2_name",
93
+ "column_2_value",
94
+ "display_decimals",
95
+ ],
52
96
  )
97
+ # TODO Deprecate
53
98
  DISSEMINATION_ITEM_LIST_FAOSTAT = __SWSDatatable(
54
99
  id="datatables.dissemination_item_list_faostat",
55
100
  name="Dissemination - Item list - FAOSTAT",
56
101
  schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, code STRING, name STRING, aggregation_type STRING, dissemination BOOLEAN, aggregation BOOLEAN",
102
+ join_columns=["domain", "code"],
57
103
  )
58
104
 
59
105
  # Mapping Tables
@@ -61,34 +107,23 @@ class DatasetDatatables:
61
107
  id="datatables.aggregates_mapping_domains_id",
62
108
  name="Mapping - Domains ID",
63
109
  schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, domain_name STRING, sws_source_id STRING, sws_destination_id STRING",
110
+ join_columns=["domain", "sws_source_id"],
64
111
  )
65
112
  MAPPING_CODELIST_TYPE = __SWSDatatable(
66
113
  id="datatables.mapping_codelist_type",
67
114
  name="Mapping Codelist type",
68
115
  schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, col_name STRING, col_type STRING",
116
+ join_columns=["domain", "col_name"],
69
117
  )
70
118
  MAPPING_CODE_CORRECTION = __SWSDatatable(
71
119
  id="datatables.aggregates_mapping_code_correction",
72
120
  name="Mapping - Code correction",
73
121
  schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, old_code STRING, new_code STRING, var_type STRING, delete BOOLEAN, multiplier FLOAT, mapping_type STRING",
74
- )
75
- MAPPING_SDMX_COLUMN_NAMES = __SWSDatatable(
76
- id="datatables.mapping_sdmx_col_names",
77
- name="Mapping - SDMX column names",
78
- schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, internal_name STRING, external_name STRING, delete BOOLEAN, add BOOLEAN, default_value STRING",
79
- )
80
- MAPPING_SDMX_CODES = __SWSDatatable(
81
- id="datatables.mapping_pre_dissemination",
82
- name="Mapping - Pre dissemination",
83
- schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, internal_code STRING, external_code STRING, var_type STRING, delete BOOLEAN, multiplier FLOAT, mapping_type STRING",
84
- )
85
- MAPPING_UNITS_OF_MEASURE = __SWSDatatable(
86
- id="datatables.mapping_units_of_measure",
87
- name="Mapping - Units of measure",
88
- schema=f"{DATATABLE_COLUMNS_SCHEMA}, domain STRING, sws_code STRING, sws_multiplier INT, sdmx_code STRING, sdmx_multiplier INT, value_multiplier INT, delete BOOLEAN, mapping_type STRING",
122
+ join_columns=["domain", "old_code", "var_type", "mapping_type"],
89
123
  )
90
124
 
91
125
  # Non-SWS Sources Tables
126
+ # TODO To deprecate
92
127
  FAOSTAT_CODE_MAPPING = __SWSDatatable(
93
128
  id="datatables.faostat_code_mapping",
94
129
  name="FAOSTAT Code Mapping",
@@ -150,6 +185,11 @@ class DatasetTables:
150
185
  iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.{self.__dataset_id}_metadata_element",
151
186
  schema="id BIGINT, metadata INT, metadata_element_type INT, value STRING",
152
187
  )
188
+ self.TAG_OBSERVATION = self.__SWSTable(
189
+ postgres_id=f"{self.__dataset_id}.tag_observation",
190
+ iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.{self.__dataset_id}_tag_observation",
191
+ schema="tag BIGINT, observation INT",
192
+ )
153
193
 
154
194
  # Reference data
155
195
  self.CODELISTS = [
@@ -181,18 +221,21 @@ class DatasetTables:
181
221
  iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.metadata_element_type",
182
222
  schema="id INT, metadata_type INT, code STRING, description STRING, mandatory BOOLEAN, repeatable BOOLEAN, private BOOLEAN",
183
223
  )
184
-
185
224
  LANGUAGE = __SWSTable(
186
225
  postgres_id="reference_data.language",
187
226
  iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.language",
188
227
  schema="id INT, country_code STRING, description STRING",
189
228
  )
190
-
191
229
  UNIT_OF_MEASURE = __SWSTable(
192
230
  postgres_id="reference_data.unit_of_measure",
193
231
  iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.unit_of_measure",
194
232
  schema="id INT, code STRING, sdmx_code STRING, metric BOOLEAN, description STRING, symbol STRING, base_unit STRING, multiplier DECIMAL",
195
233
  )
234
+ DATASET = __SWSTable(
235
+ postgres_id="reference_data.dataset",
236
+ iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.dataset",
237
+ schema="id INT, xml_name STRING",
238
+ )
196
239
 
197
240
  # Operational data
198
241
  USER = __SWSTable(
@@ -200,6 +243,11 @@ class DatasetTables:
200
243
  iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.user",
201
244
  schema="id INT, username STRING, preferences INT, email STRING, active BOOLEAN, settings STRING",
202
245
  )
246
+ TAG = __SWSTable(
247
+ postgres_id="operational_data.tag",
248
+ iceberg_id=f"{IcebergDatabases.STAGING_DATABASE}.tag",
249
+ schema="id INT, name STRING, reference_date DATE, dataset INT, type STRING, released_ON DATE, released_by INT, properties STRING",
250
+ )
203
251
 
204
252
 
205
253
  class IcebergTable:
@@ -218,24 +266,44 @@ class IcebergTables:
218
266
  self.__dataset_id = dataset_id
219
267
  self.__tag_name = tag_name
220
268
 
221
- self.BRONZE = self._create_iceberg_table("BRONZE")
222
- self.SILVER = self._create_iceberg_table("SILVER", prefix=domain)
269
+ # TODO Fix later with a more appropriate DATABASE
270
+ self.DENORMALIZED_OBSERVATION = self.create_iceberg_table(
271
+ "BRONZE", suffix="denormalized_observation"
272
+ )
273
+ self.DENORMALIZED_METADATA = self.create_iceberg_table(
274
+ "BRONZE", suffix="denormalized_metadata"
275
+ )
276
+ self.GROUPED_METADATA = self.create_iceberg_table(
277
+ "BRONZE", suffix="grouped_metadata"
278
+ )
279
+ self.TABLE = self.create_iceberg_table("BRONZE")
280
+ self.TABLE_FILTERED = self.create_iceberg_table("BRONZE", suffix="filtered")
281
+ self.BRONZE = self.create_iceberg_table("BRONZE")
282
+ self.BRONZE_DISS_TAG = self.create_iceberg_table("BRONZE", suffix="diss_tag")
283
+ self.SILVER = self.create_iceberg_table("SILVER", prefix=domain)
223
284
 
224
285
  # GOLD tables with specific suffixes
225
- self.GOLD_SDMX = self._create_iceberg_table(
286
+ self.GOLD_SWS = self.create_iceberg_table("GOLD", prefix=domain, suffix="sws")
287
+ self.GOLD_SDMX = self.create_iceberg_table(
226
288
  "GOLD", prefix=domain, suffix="sdmx_disseminated"
227
289
  )
228
- self.GOLD_SWS_VALIDATED = self._create_iceberg_table(
290
+ self.GOLD_SWS_VALIDATED = self.create_iceberg_table(
229
291
  "GOLD", prefix=domain, suffix="sws_validated"
230
292
  )
231
- self.GOLD_SWS_DISSEMINATED = self._create_iceberg_table(
293
+ self.GOLD_SWS_DISSEMINATED = self.create_iceberg_table(
232
294
  "GOLD", prefix=domain, suffix="sws_disseminated"
233
295
  )
234
- self.GOLD_PRE_SDMX = self._create_iceberg_table(
296
+ self.GOLD_PRE_SDMX = self.create_iceberg_table(
235
297
  "GOLD", prefix=domain, suffix="pre_sdmx"
236
298
  )
299
+ self.GOLD_FAOSTAT = self.create_iceberg_table(
300
+ "GOLD", prefix=domain, suffix="faostat"
301
+ )
302
+ self.GOLD_FAOSTAT_UNFILTERED = self.create_iceberg_table(
303
+ "GOLD", prefix=domain, suffix="faostat_unfiltered"
304
+ )
237
305
 
238
- def _create_iceberg_table(
306
+ def create_iceberg_table(
239
307
  self, level: str, prefix: str = "", suffix: str = ""
240
308
  ) -> IcebergTable:
241
309
  database = getattr(IcebergDatabases, f"{level}_DATABASE")
@@ -363,16 +363,34 @@ def map_codes_and_remove_null_duplicates(
363
363
  "diss_flag", F.when(col("delete"), lit(False)).otherwise(col("diss_flag"))
364
364
  )
365
365
  .withColumn(
366
- "note",
366
+ "diss_note",
367
367
  F.when(
368
368
  col("delete"),
369
369
  F.array_append(
370
- col("note"),
370
+ col("diss_note"),
371
371
  lit(
372
372
  f"The observation is not disseminated according to the Mapping - Code correction table"
373
373
  ),
374
374
  ),
375
- ).otherwise(col("note")),
375
+ ).otherwise(col("diss_note")),
376
+ )
377
+ # Add mapping message to notes
378
+ .withColumn(
379
+ "diss_note",
380
+ F.when(
381
+ ~col("is_duplicate")
382
+ & col("new_dim_code").isNotNull()
383
+ & (col("new_dim_code") != lit("")),
384
+ F.array_append(
385
+ col("diss_note"),
386
+ F.concat(
387
+ lit(f"Dimension {col_name} code was changed from "),
388
+ col(col_name),
389
+ lit(" to "),
390
+ col("new_dim_code"),
391
+ ),
392
+ ),
393
+ ).otherwise(col("diss_note")),
376
394
  )
377
395
  .withColumn(
378
396
  col_name,
@@ -391,18 +409,18 @@ def map_codes_and_remove_null_duplicates(
391
409
  ).otherwise(col("diss_flag")),
392
410
  )
393
411
  .withColumn(
394
- "note",
412
+ "diss_note",
395
413
  F.when(
396
414
  col("is_duplicate")
397
415
  & col("new_dim_code").isNotNull()
398
416
  & (col("new_dim_code") != lit("")),
399
417
  F.array_append(
400
- col("note"),
418
+ col("diss_note"),
401
419
  lit(
402
420
  f"The code correction was not applied to avoid observation duplications"
403
421
  ),
404
422
  ),
405
- ).otherwise(col("note")),
423
+ ).otherwise(col("diss_note")),
406
424
  )
407
425
  # Check the domain specific multiplier first and then the standard multiplier
408
426
  .withColumn("value", col("value") * F.coalesce(col("multiplier"), lit(1)))
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sws-spark-dissemination-helper
3
- Version: 0.0.93
3
+ Version: 0.0.183
4
4
  Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
5
- Project-URL: Repository, https://bitbucket.org/cioapps/sws-it-python-spark-dissemination-helper
5
+ Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
6
6
  Author-email: Daniele Mansillo <danielemansillo@gmail.com>
7
7
  License: MIT License
8
8
 
@@ -31,27 +31,27 @@ Classifier: Operating System :: OS Independent
31
31
  Classifier: Programming Language :: Python :: 3
32
32
  Requires-Python: >=3.9
33
33
  Requires-Dist: annotated-types==0.7.0
34
- Requires-Dist: boto3==1.36.18
35
- Requires-Dist: botocore==1.36.18
34
+ Requires-Dist: boto3>=1.40.0
35
+ Requires-Dist: botocore>=1.40.0
36
36
  Requires-Dist: certifi==2025.1.31
37
37
  Requires-Dist: charset-normalizer==3.4.1
38
- Requires-Dist: idna==3.10
38
+ Requires-Dist: idna>=3.10
39
39
  Requires-Dist: jmespath==1.0.1
40
40
  Requires-Dist: numpy==2.0.2
41
- Requires-Dist: pandas==2.2.3
41
+ Requires-Dist: pandas==2.3.3
42
42
  Requires-Dist: py4j==0.10.9.7
43
43
  Requires-Dist: pydantic-core==2.27.2
44
44
  Requires-Dist: pydantic==2.10.6
45
45
  Requires-Dist: pyspark==3.5.4
46
46
  Requires-Dist: python-dateutil==2.9.0.post0
47
47
  Requires-Dist: python-dotenv==0.19.2
48
- Requires-Dist: pytz==2025.1
48
+ Requires-Dist: pytz==2025.2
49
49
  Requires-Dist: requests==2.32.3
50
- Requires-Dist: s3transfer==0.11.2
50
+ Requires-Dist: s3transfer>=0.11.2
51
51
  Requires-Dist: six==1.17.0
52
- Requires-Dist: sws-api-client==1.4.5
53
- Requires-Dist: typing-extensions==4.12.2
54
- Requires-Dist: tzdata==2025.1
52
+ Requires-Dist: sws-api-client==2.3.0
53
+ Requires-Dist: typing-extensions>=4.12.2
54
+ Requires-Dist: tzdata==2025.2
55
55
  Requires-Dist: urllib3==1.26.20
56
56
  Description-Content-Type: text/markdown
57
57
 
@@ -0,0 +1,13 @@
1
+ sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=N0eQ2LXtpPeZQCWYi85sMLmpXRzLA2erECiba8tqOAY,29595
2
+ sws_spark_dissemination_helper/SWSDatatablesExportHelper.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py,sha256=csqKyYglBkJSBvEkEa1_keHarZZAIJHaV0d64gGJy98,26379
4
+ sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=atQFiY5Mmo-rzHY7WVWg-Guvg8i1ZcaaoKE4ymTaKdE,27750
5
+ sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=V_rH4UYoFZfMUc82U-KxeL_o8F44HnMHfLLXoyNxHxs,20016
6
+ sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=3l5zkEWksnEC-R4mJi8JEHL3ylCMbkMD9a0qbdZQU5E,26345
7
+ sws_spark_dissemination_helper/__init__.py,sha256=42TPbk7KxAud_qY3Sr_F4F7VjyofUlxEJkUXAFQsjRo,327
8
+ sws_spark_dissemination_helper/constants.py,sha256=cVjTS3xbJNKz-1i7c1dJk2PcOZzQhvuHUp9i4PNIPh4,14055
9
+ sws_spark_dissemination_helper/utils.py,sha256=Ge8zXsUIcvFihALDNLF5kCu_tAdRQUE04xE6Yn9xQF4,22008
10
+ sws_spark_dissemination_helper-0.0.183.dist-info/METADATA,sha256=LDVmzDL6ZDhGrRBd3flpX0TPEIJONpdZJodUGrAvemw,2822
11
+ sws_spark_dissemination_helper-0.0.183.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
12
+ sws_spark_dissemination_helper-0.0.183.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
13
+ sws_spark_dissemination_helper-0.0.183.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py,sha256=ZPCpHgPVCsf7-7tWl6DDWgnXLkS02RoCvsomO3TmQ24,20418
2
- sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py,sha256=ZC7hxkppo6qmfCc2z5vm2Y2iH1901F-rx9Er9cxuzP4,16037
3
- sws_spark_dissemination_helper/SWSPostgresSparkReader.py,sha256=ja7AbOfbmC_EXHCJk7UMDzzbA-LRxzPkaaUmuvcihJ8,17449
4
- sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py,sha256=zEppNq5shiHZH2yt5faWGsb5QEmpAQS0ToIrG6fmv6o,22231
5
- sws_spark_dissemination_helper/__init__.py,sha256=Efjoe9V4vGXWVp-DY5P6NbRwIUr_zkZJkDmMi-lf5Bc,262
6
- sws_spark_dissemination_helper/constants.py,sha256=hpHHlbojShMWRfyIelXz6c5BqFzO48Oap1zmztlMMrs,11349
7
- sws_spark_dissemination_helper/utils.py,sha256=6SzrXX0xhvynRyv-vRFDbc6V4UNe_RzKKETZAtefnhg,21341
8
- sws_spark_dissemination_helper-0.0.93.dist-info/METADATA,sha256=y1PL3ZygwfoyBxglsrNeP6IZvaUGTYCM03RuIjrqDMc,2823
9
- sws_spark_dissemination_helper-0.0.93.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
10
- sws_spark_dissemination_helper-0.0.93.dist-info/licenses/LICENSE,sha256=zFzeb_j_6pXEHwH8Z0OpIkKFJk7vmhZjdem-K0d4zU4,1073
11
- sws_spark_dissemination_helper-0.0.93.dist-info/RECORD,,