sws-spark-dissemination-helper 0.0.156__tar.gz → 0.0.158__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/PKG-INFO +1 -1
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/pyproject.toml +1 -1
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/src/sws_spark_dissemination_helper/SWSBronzeIcebergSparkHelper.py +5 -7
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/src/sws_spark_dissemination_helper/SWSPostgresSparkReader.py +31 -19
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/.gitignore +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/LICENSE +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/README.md +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/src/sws_spark_dissemination_helper/SWSDatatablesExportHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/src/sws_spark_dissemination_helper/SWSEasyIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/src/sws_spark_dissemination_helper/SWSGoldIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/src/sws_spark_dissemination_helper/SWSSilverIcebergSparkHelper.py +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/src/sws_spark_dissemination_helper/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/src/sws_spark_dissemination_helper/constants.py +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/src/sws_spark_dissemination_helper/utils.py +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/tests/__init__.py +0 -0
- {sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/tests/test.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sws-spark-dissemination-helper
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.158
|
|
4
4
|
Summary: A Python helper package providing streamlined Spark functions for efficient data dissemination processes
|
|
5
5
|
Project-URL: Repository, https://github.com/un-fao/fao-sws-it-python-spark-dissemination-helper
|
|
6
6
|
Author-email: Daniele Mansillo <danielemansillo@gmail.com>
|
|
@@ -347,12 +347,11 @@ class SWSBronzeIcebergSparkHelper:
|
|
|
347
347
|
for dimension_column, df_dimension in zip(
|
|
348
348
|
self.dim_columns_w_time, dfs_dimension_w_validity
|
|
349
349
|
):
|
|
350
|
-
logging.
|
|
351
|
-
logging.
|
|
352
|
-
logging.
|
|
350
|
+
logging.debug(f"Joining dimension column: {dimension_column}")
|
|
351
|
+
logging.debug(f"df_obs_denorm columns: {df_obs_denorm.columns}")
|
|
352
|
+
logging.debug(
|
|
353
353
|
f"Is dimension {dimension_column} in the dataframe? {dimension_column in df_obs_denorm.columns}"
|
|
354
354
|
)
|
|
355
|
-
df_dimension.show(5)
|
|
356
355
|
df_obs_denorm = (
|
|
357
356
|
df_obs_denorm.alias("o")
|
|
358
357
|
.join(
|
|
@@ -363,9 +362,8 @@ class SWSBronzeIcebergSparkHelper:
|
|
|
363
362
|
)
|
|
364
363
|
.drop("code", "join_id")
|
|
365
364
|
)
|
|
366
|
-
logging.
|
|
365
|
+
logging.debug(f"After join count: {df_obs_denorm.count()}")
|
|
367
366
|
|
|
368
|
-
df_element_uom.show(5)
|
|
369
367
|
df_obs_denorm = (
|
|
370
368
|
df_obs_denorm.alias("d")
|
|
371
369
|
.join(
|
|
@@ -375,7 +373,7 @@ class SWSBronzeIcebergSparkHelper:
|
|
|
375
373
|
)
|
|
376
374
|
.drop("element_code")
|
|
377
375
|
)
|
|
378
|
-
logging.
|
|
376
|
+
logging.debug(f"After uom count: {df_obs_denorm.count()}")
|
|
379
377
|
|
|
380
378
|
return df_obs_denorm
|
|
381
379
|
|
|
@@ -94,25 +94,37 @@ class SWSPostgresSparkReader:
|
|
|
94
94
|
|
|
95
95
|
logging.info(f"{pg_table} read start")
|
|
96
96
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
97
|
+
if min_id is None or max_id is None:
|
|
98
|
+
df = (
|
|
99
|
+
self.spark.read.format("jdbc")
|
|
100
|
+
.option("customSchema", custom_schema)
|
|
101
|
+
.option("dbtable", pg_table)
|
|
102
|
+
.option("fetchsize", "1000")
|
|
103
|
+
.option("url", self.jdbc_url)
|
|
104
|
+
.option("user", self.jdbc_conn_properties["user"])
|
|
105
|
+
.option("password", self.jdbc_conn_properties["password"])
|
|
106
|
+
.option("driver", SPARK_POSTGRES_DRIVER)
|
|
107
|
+
.load()
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
df = (
|
|
111
|
+
self.spark.read.format("jdbc")
|
|
112
|
+
.option("customSchema", custom_schema)
|
|
113
|
+
.option("dbtable", pg_table)
|
|
114
|
+
.option("partitionColumn", partition_column)
|
|
115
|
+
.option("lowerBound", min_id)
|
|
116
|
+
.option("upperBound", max_id)
|
|
117
|
+
.option("numPartitions", num_partitions)
|
|
118
|
+
.option("fetchsize", "1000")
|
|
119
|
+
.option("url", self.jdbc_url)
|
|
120
|
+
.option("user", self.jdbc_conn_properties["user"])
|
|
121
|
+
.option("password", self.jdbc_conn_properties["password"])
|
|
122
|
+
.option("driver", SPARK_POSTGRES_DRIVER)
|
|
123
|
+
.load()
|
|
124
|
+
# .repartition(1024, partition_column)
|
|
125
|
+
# .sortWithinPartitions(partition_column)
|
|
126
|
+
# .cache()
|
|
127
|
+
)
|
|
116
128
|
else:
|
|
117
129
|
df = (
|
|
118
130
|
self.spark.read.format("jdbc")
|
{sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/.gitignore
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/tests/__init__.py
RENAMED
|
File without changes
|
{sws_spark_dissemination_helper-0.0.156 → sws_spark_dissemination_helper-0.0.158}/tests/test.py
RENAMED
|
File without changes
|