tgedr-dataops 0.0.33__py3-none-any.whl → 0.0.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,8 +3,9 @@ import logging
3
3
  from typing import Any, Dict, List, Optional
4
4
  from pandas import DataFrame
5
5
  from deltalake import DeltaTable
6
+ from deltalake.exceptions import TableNotFoundError
6
7
 
7
- from tgedr.dataops.source.source import Source, SourceException
8
+ from tgedr.dataops.source.source import Source, SourceException, NoSourceException
8
9
 
9
10
 
10
11
  logger = logging.getLogger()
@@ -36,10 +37,13 @@ class DeltaTableSource(Source, ABC):
36
37
  if self.CONTEXT_KEY_COLUMNS in context:
37
38
  columns = context[self.CONTEXT_KEY_COLUMNS]
38
39
 
39
- delta_table = DeltaTable(
40
- table_uri=context[self.CONTEXT_KEY_URL], storage_options=self._storage_options, without_files=True
41
- )
42
- result = delta_table.to_pandas(columns=columns)
40
+ try:
41
+ delta_table = DeltaTable(
42
+ table_uri=context[self.CONTEXT_KEY_URL], storage_options=self._storage_options, without_files=True
43
+ )
44
+ result = delta_table.to_pandas(columns=columns)
45
+ except TableNotFoundError as tnfe:
46
+ raise NoSourceException(f"could not find delta table: {context[self.CONTEXT_KEY_URL]}")
43
47
 
44
48
  logger.info(f"[get|out] => {result}")
45
49
  return result
@@ -8,6 +8,10 @@ class SourceException(Exception):
8
8
  pass
9
9
 
10
10
 
11
+ class NoSourceException(SourceException):
12
+ pass
13
+
14
+
11
15
  class SourceInterface(metaclass=abc.ABCMeta):
12
16
  """
13
17
  def get(self, context: Optional[Dict[str, Any]] = None) -> Any:
@@ -6,7 +6,7 @@ import pandas as pd
6
6
  import pyarrow as pa
7
7
 
8
8
  from tgedr.dataops.store.fs_single_partition_parquet import FsSinglePartitionParquetStore
9
- from src.nn.gs.ss.dataops.commons.utils_fs import remove_s3_protocol
9
+ from tgedr.dataops.commons.utils_fs import remove_s3_protocol
10
10
 
11
11
 
12
12
  logger = logging.getLogger(__name__)
@@ -0,0 +1,10 @@
1
+ from typing import Any
2
+ from great_expectations.dataset.dataset import Dataset
3
+
4
+ from tgedr.dataops.validation.abs import DataValidation
5
+ from great_expectations.dataset.sparkdf_dataset import SparkDFDataset
6
+
7
+
8
+ class Impl(DataValidation):
9
+ def _get_dataset(self, df: Any) -> Dataset:
10
+ return SparkDFDataset(df)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tgedr-dataops
3
- Version: 0.0.33
3
+ Version: 0.0.36
4
4
  Summary: data operations related code
5
5
  Home-page: https://github.com/jtviegas-sandbox/dataops
6
6
  Author: joao tiago viegas
@@ -14,23 +14,24 @@ tgedr/dataops/sink/s3_file_sink.py,sha256=m1D6SwuqYEVqvdA9XrL6nVlX_oWLRDT6v74mh_
14
14
  tgedr/dataops/sink/sink.py,sha256=8rG3ZNpzeZ82Ac1IoPzkdQTs006IbG-k39APFCeXogk,1271
15
15
  tgedr/dataops/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  tgedr/dataops/source/abstract_s3_file_source.py,sha256=AMnTWxs57Jni46jz5PvrZTRjd-Ah_TUZEP4Rcx7YziM,1513
17
- tgedr/dataops/source/delta_table_source.py,sha256=ElZezb6JeYRdkvY8Ryk1EJUvbUHCWRrlblmdUZZBSW4,1410
17
+ tgedr/dataops/source/delta_table_source.py,sha256=tXeJCM-QhssztEpyKGzb6zcM1AlaUwV1Y5d2RHCiOTU,1653
18
18
  tgedr/dataops/source/local_delta_table.py,sha256=Z9413skOSsNGfSNeBs-ETVXQJcW4bhS3MYmCwySo9Tc,1545
19
19
  tgedr/dataops/source/local_fs_file_source.py,sha256=wihK_wVV6xb8AlkCmZHSRlRmqqm6w9Aq-PI0oOnhn_k,2570
20
20
  tgedr/dataops/source/pd_df_s3_source.py,sha256=3Mc0VDGTOmk1HT951l6Qd_bqL0DpPjlfZk0JQjWh3JE,2092
21
21
  tgedr/dataops/source/s3_delta_table.py,sha256=d_FoFDNogjZifRQv8V2OmTXbPYymx9QaQcW15uX34pI,2974
22
22
  tgedr/dataops/source/s3_file_copy.py,sha256=DbHvstAqi23cywoG6nHpQxvzerrnepApOsv6zsELYNQ,3930
23
23
  tgedr/dataops/source/s3_file_source.py,sha256=C8Y0h89p1eBWNGxV4oTNzVLLwZoAoyshHuMlDKKqDA0,4072
24
- tgedr/dataops/source/source.py,sha256=xm9XNGG-WTo_EGdJ3cfNfSC-nuHeEO2XeKiPIhv4Qoc,1297
24
+ tgedr/dataops/source/source.py,sha256=REeqluMGLMjoDWtdZthzUYkmVeHemSV7t9wjc6eTpJE,1350
25
25
  tgedr/dataops/store/fs_single_partition_parquet.py,sha256=CR3406emhxn33jjObnMotXEmZGfh4Iu5Ygv30FvkY6Y,9695
26
26
  tgedr/dataops/store/local_fs_single_partition_parquet.py,sha256=N_I96fqxQAp2fWBngoDci3aR1-kcmkWjIVRD0nUi07U,683
27
- tgedr/dataops/store/s3_single_partition_parquet.py,sha256=JwMhRO9403OLhepeAdbSDNhmM7I4dLoriQfr2IxymHE,3256
27
+ tgedr/dataops/store/s3_single_partition_parquet.py,sha256=2vSyLb-mZ2mAbRhaDoCcsOEnKzQ30AsUYTlqhKQCroc,3249
28
28
  tgedr/dataops/store/spark_delta.py,sha256=AHqIKDi9axOKpMJHt4AiBGX8V2mFV_7vNPRpaw37rDY,15101
29
29
  tgedr/dataops/store/store.py,sha256=uAuR7MWVdKRaisQ69rFqliLnJrsgpTzbsOh7uPmLSlI,1315
30
30
  tgedr/dataops/validation/abs.py,sha256=84HGUuh6k_uG-ON0bauR4lDBTfUeI3GmxOiWsMkTu3E,1521
31
31
  tgedr/dataops/validation/pandas.py,sha256=Vfr38f3txbTy098ufPUcRsCgrYu47Rg35upZ9IXcLSk,315
32
- tgedr_dataops-0.0.33.dist-info/LICENSE,sha256=awOCsWJ58m_2kBQwBUGWejVqZm6wuRtCL2hi9rfa0X4,1211
33
- tgedr_dataops-0.0.33.dist-info/METADATA,sha256=UBD63O5LSsgSWLEyc-c0avh5AXFtl9ADSBE2HPQN91g,607
34
- tgedr_dataops-0.0.33.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
35
- tgedr_dataops-0.0.33.dist-info/top_level.txt,sha256=acugNvvENatFXbPxKQD9YI5PpzzehbAwyY1keiIkR7I,6
36
- tgedr_dataops-0.0.33.dist-info/RECORD,,
32
+ tgedr/dataops/validation/pyspark.py,sha256=4OEnA21_vSwB5HjaD5KZdfIjBnTn3KUAqvVGHnY-zNI,317
33
+ tgedr_dataops-0.0.36.dist-info/LICENSE,sha256=awOCsWJ58m_2kBQwBUGWejVqZm6wuRtCL2hi9rfa0X4,1211
34
+ tgedr_dataops-0.0.36.dist-info/METADATA,sha256=O_K6YMP__Vgb4Hf4t-34hZYXFRIrvgV6ZkoW3xDseXw,607
35
+ tgedr_dataops-0.0.36.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
36
+ tgedr_dataops-0.0.36.dist-info/top_level.txt,sha256=acugNvvENatFXbPxKQD9YI5PpzzehbAwyY1keiIkR7I,6
37
+ tgedr_dataops-0.0.36.dist-info/RECORD,,