sws-spark-dissemination-helper 0.0.126__tar.gz → 0.0.128__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/PKG-INFO +1 -1
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/pyproject.toml +1 -1
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/src/sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py +53 -46
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/.gitignore +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/LICENSE +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/README.md +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/old_requirements.txt +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/requirements.txt +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/src/sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/src/sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/src/sws_spark_dissemination_helper/SWSPostgresSparkReader.py +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/src/sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/src/sws_spark_dissemination_helper/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/src/sws_spark_dissemination_helper/constants.py +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/src/sws_spark_dissemination_helper/utils.py +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/tests/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/tests/test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.128
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://bitbucket.org/cioapps/sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -25,6 +25,7 @@ class SWSEasyIcebergSparkHelper:
|
|
|
25
25
|
dataset_details: dict = None,
|
|
26
26
|
dataset_tables: DatasetTables = None,
|
|
27
27
|
keep_history: bool = False,
|
|
28
|
+
write_csv: bool = True,
|
|
28
29
|
) -> None:
|
|
29
30
|
self.spark: SparkSession = spark
|
|
30
31
|
self.dataset_details: dict = dataset_details
|
|
@@ -35,6 +36,7 @@ class SWSEasyIcebergSparkHelper:
|
|
|
35
36
|
self.dataset_tables: DatasetTables = dataset_tables
|
|
36
37
|
self.iceberg_tables: IcebergTables = iceberg_tables
|
|
37
38
|
self.keep_history: bool = keep_history
|
|
39
|
+
self.write_csv: bool = write_csv
|
|
38
40
|
|
|
39
41
|
if dataset_details is not None:
|
|
40
42
|
(
|
|
@@ -335,16 +337,16 @@ class SWSEasyIcebergSparkHelper:
|
|
|
335
337
|
|
|
336
338
|
logging.info(f"Iceberg tag '{self.tag_name}' created")
|
|
337
339
|
|
|
338
|
-
df_denorm = self.df_denorm.withColumn(
|
|
339
|
-
|
|
340
|
-
|
|
340
|
+
df_denorm = self.df_denorm.withColumn("metadata", F.to_json(col("metadata")))
|
|
341
|
+
if self.write_csv:
|
|
342
|
+
df_denorm = df_denorm.coalesce(1)
|
|
341
343
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
344
|
+
save_cache_csv(
|
|
345
|
+
df=df_denorm,
|
|
346
|
+
bucket=self.bucket,
|
|
347
|
+
prefix=self.iceberg_tables.TABLE.csv_prefix,
|
|
348
|
+
tag_name=self.tag_name,
|
|
349
|
+
)
|
|
348
350
|
|
|
349
351
|
return df_denorm
|
|
350
352
|
|
|
@@ -372,21 +374,22 @@ class SWSEasyIcebergSparkHelper:
|
|
|
372
374
|
)
|
|
373
375
|
logging.debug(f"Tag with Added Iceberg Table: {tag}")
|
|
374
376
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
377
|
+
if self.write_csv:
|
|
378
|
+
new_csv_table = BaseDisseminatedTagTable(
|
|
379
|
+
id="unfiltered_csv",
|
|
380
|
+
name="Unfiltered csv",
|
|
381
|
+
description="Csv table containing all the raw data imported from the SWS and denormalized",
|
|
382
|
+
layer=TableLayer.CACHE,
|
|
383
|
+
private=True,
|
|
384
|
+
debug=True,
|
|
385
|
+
type=TableType.CSV,
|
|
386
|
+
path=self.iceberg_tables.TABLE.csv_path,
|
|
387
|
+
structure={"columns": self.df_denorm.schema.jsonValue()["fields"]},
|
|
388
|
+
)
|
|
389
|
+
tag = tags.add_dissemination_table(
|
|
390
|
+
self.dataset_id, self.tag_name, new_csv_table
|
|
391
|
+
)
|
|
392
|
+
logging.debug(f"Tag with Added csv Table: {tag}")
|
|
390
393
|
|
|
391
394
|
logging.info("Unfiltered data tags successfully written")
|
|
392
395
|
|
|
@@ -418,18 +421,21 @@ class SWSEasyIcebergSparkHelper:
|
|
|
418
421
|
|
|
419
422
|
disseminated_tag_df = self.filtered_df.withColumn(
|
|
420
423
|
"metadata", F.to_json(col("metadata"))
|
|
421
|
-
).coalesce(1)
|
|
422
|
-
|
|
423
|
-
save_cache_csv(
|
|
424
|
-
df=disseminated_tag_df,
|
|
425
|
-
bucket=self.bucket,
|
|
426
|
-
prefix=f"{self.iceberg_tables.TABLE_FILTERED.csv_prefix}",
|
|
427
|
-
tag_name=self.tag_name,
|
|
428
424
|
)
|
|
429
425
|
|
|
426
|
+
if self.write_csv:
|
|
427
|
+
disseminated_tag_df = disseminated_tag_df.coalesce(1)
|
|
428
|
+
|
|
429
|
+
save_cache_csv(
|
|
430
|
+
df=disseminated_tag_df,
|
|
431
|
+
bucket=self.bucket,
|
|
432
|
+
prefix=f"{self.iceberg_tables.TABLE_FILTERED.csv_prefix}",
|
|
433
|
+
tag_name=self.tag_name,
|
|
434
|
+
)
|
|
435
|
+
|
|
430
436
|
return disseminated_tag_df
|
|
431
437
|
|
|
432
|
-
def
|
|
438
|
+
def write_sws_filtered_dissemination_tag(self, tags: Tags):
|
|
433
439
|
# Get or create a new tag
|
|
434
440
|
tag = get_or_create_tag(tags, self.dataset_id, self.tag_name, self.tag_name)
|
|
435
441
|
logging.debug(f"Tag: {tag}")
|
|
@@ -452,20 +458,21 @@ class SWSEasyIcebergSparkHelper:
|
|
|
452
458
|
)
|
|
453
459
|
logging.debug(f"Tag with Added Iceberg Table: {tag}")
|
|
454
460
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
461
|
+
if self.write_csv:
|
|
462
|
+
new_csv_table = BaseDisseminatedTagTable(
|
|
463
|
+
id="filtered_csv",
|
|
464
|
+
name="Filtered csv",
|
|
465
|
+
description="Csv table containing the raw data imported from the SWS, denormalized and filtered per dimension cached",
|
|
466
|
+
layer=TableLayer.CACHE,
|
|
467
|
+
private=True,
|
|
468
|
+
type=TableType.CSV,
|
|
469
|
+
path=self.iceberg_tables.TABLE_FILTERED.csv_path,
|
|
470
|
+
structure={"columns": self.filtered_df.schema.jsonValue()["fields"]},
|
|
471
|
+
)
|
|
472
|
+
tag = tags.add_dissemination_table(
|
|
473
|
+
self.dataset_id, self.tag_name, new_csv_table
|
|
474
|
+
)
|
|
468
475
|
|
|
469
|
-
|
|
476
|
+
logging.debug(f"Tag with Added csv Table: {tag}")
|
|
470
477
|
|
|
471
478
|
logging.info("Filtered data tags successfully written")
|
{sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/.gitignore
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/requirements.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/tests/__init__.py
RENAMED
|
File without changes
|
{sws_spark_dissemination_helper-0.0.126 → sws_spark_dissemination_helper-0.0.128}/tests/test.py
RENAMED
|
File without changes
|