tgedr-dataops-abs 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,129 @@
1
+ """Chain abstractions for building processing pipelines.
2
+
3
+ Provides abstract base classes and interfaces for chaining operations together
4
+ in a chain of responsibility pattern, allowing sequential processing steps.
5
+ """
6
+
7
+ import abc
8
+ from typing import Any
9
+
10
+ from tgedr_dataops_abs.processor import Processor
11
+
12
+
13
+ class ChainException(Exception):
14
+ """Exception raised for chain-related errors."""
15
+
16
+
17
+ class ChainInterface(metaclass=abc.ABCMeta):
18
+ """Interface for chain implementations.
19
+
20
+ Defines the contract for classes that implement next and execute operations.
21
+ """
22
+
23
+ @classmethod
24
+ def __subclasshook__(cls, subclass): # noqa: ANN001, ANN206
25
+ """Check if a class implements the chain interface."""
26
+ return (
27
+ hasattr(subclass, "next")
28
+ and callable(subclass.next)
29
+ and hasattr(subclass, "execute")
30
+ and callable(subclass.execute)
31
+ ) or NotImplemented
32
+
33
+
34
+ class ChainMixin(abc.ABC):
35
+ """Mixin providing chain functionality for sequential execution.
36
+
37
+ Implements the chain of responsibility pattern for processing operations.
38
+ """
39
+
40
+ def next(self, handler: "ChainMixin") -> "ChainMixin":
41
+ """Add the next handler in the chain.
42
+
43
+ Parameters
44
+ ----------
45
+ handler : ChainMixin
46
+ The next handler to add to the chain.
47
+
48
+ Returns
49
+ -------
50
+ ChainMixin
51
+ The current chain instance for method chaining.
52
+ """
53
+ if "_next" not in self.__dict__ or self._next is None:
54
+ self._next: "ChainMixin" = handler # noqa: UP037
55
+ else:
56
+ self._next.next(handler)
57
+ return self
58
+
59
+ @abc.abstractmethod
60
+ def execute(self, context: dict[str, Any] | None = None) -> Any:
61
+ """Execute the operation in the chain.
62
+
63
+ Parameters
64
+ ----------
65
+ context : dict[str, Any] | None
66
+ Context to pass through the chain.
67
+
68
+ Returns
69
+ -------
70
+ Any
71
+ Result of the execution.
72
+ """
73
+ raise NotImplementedError
74
+
75
+
76
+ class ProcessorChainMixin(ChainMixin):
77
+ """Mixin that combines processor and chain capabilities.
78
+
79
+ Executes processor logic and passes control to the next handler in the chain.
80
+ """
81
+
82
+ def execute(self, context: dict[str, Any] | None = None) -> Any:
83
+ """Execute processor and continue to next handler.
84
+
85
+ Parameters
86
+ ----------
87
+ context : dict[str, Any] | None
88
+ Context to process and pass to next handler.
89
+
90
+ Returns
91
+ -------
92
+ Any
93
+ Result from processing.
94
+ """
95
+ self.process(context=context)
96
+ if "_next" in self.__dict__ and self._next is not None:
97
+ self._next.execute(context=context)
98
+
99
+
100
+ @ChainInterface.register
101
+ class ProcessorChain(ProcessorChainMixin, Processor):
102
+ """Concrete processor that can be chained with other processors.
103
+
104
+ Combines ProcessorChainMixin and Processor for chainable processing.
105
+ """
106
+
107
+
108
+ @ChainInterface.register
109
+ class Chain(ChainMixin, abc.ABC):
110
+ """Abstract base class for chainable operations.
111
+
112
+ Extends ChainMixin to provide a base for custom chainable components.
113
+ """
114
+
115
+ @abc.abstractmethod
116
+ def execute(self, context: dict[str, Any] | None = None) -> Any:
117
+ """Execute the chain operation.
118
+
119
+ Parameters
120
+ ----------
121
+ context : dict[str, Any] | None
122
+ Context to pass through execution.
123
+
124
+ Returns
125
+ -------
126
+ Any
127
+ Result of the execution.
128
+ """
129
+ raise NotImplementedError
@@ -0,0 +1,145 @@
1
+ """ETL (Extract, Transform, Load) abstractions for data processing workflows.
2
+
3
+ Provides abstract base classes for implementing ETL pipelines with configuration
4
+ injection, validation hooks, and structured execution flow.
5
+
6
+ Example:
7
+ -------
8
+ ```python
9
+ class MyEtl(Etl):
10
+ @Etl.inject_configuration
11
+ def extract(self, MY_PARAM) -> None:
12
+ # "MY_PARAM" should be supplied in 'configuration' dict
13
+ # otherwise an exception will be raised
14
+ pass
15
+
16
+ @Etl.inject_configuration
17
+ def load(self, NOT_IN_CONFIG=123) -> None:
18
+ # If you try to inject a configuration key that is NOT in the
19
+ # configuration dictionary supplied to the constructor, it will
20
+ # not throw an error as long as you set a default value
21
+ assert NOT_IN_CONFIG == 123, "This will be ok"
22
+ ```
23
+ """
24
+
25
+ from abc import ABC, abstractmethod
26
+ import inspect
27
+ import logging
28
+ from typing import Any
29
+
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ class EtlException(Exception):
35
+ """Exception raised for ETL-related errors."""
36
+
37
+
38
+ class Etl(ABC):
39
+ """Abstract base class for ETL (Extract, Transform, Load) operations.
40
+
41
+ Provides a template method pattern for ETL workflows with configuration
42
+ injection and optional validation hooks.
43
+ """
44
+
45
+ def __init__(self, configuration: dict[str, Any] | None = None) -> None:
46
+ """Initialize a new instance of ETL.
47
+
48
+ Parameters
49
+ ----------
50
+ configuration : dict[str, Any]
51
+ source for configuration injection
52
+ """
53
+ self._configuration = configuration
54
+
55
+ @abstractmethod
56
+ def extract(self) -> Any:
57
+ """Extract data from source.
58
+
59
+ Returns
60
+ -------
61
+ Any
62
+ Extracted data.
63
+ """
64
+ raise NotImplementedError
65
+
66
+ @abstractmethod
67
+ def transform(self) -> Any:
68
+ """Transform extracted data.
69
+
70
+ Returns
71
+ -------
72
+ Any
73
+ Transformed data.
74
+ """
75
+ raise NotImplementedError
76
+
77
+ @abstractmethod
78
+ def load(self) -> Any:
79
+ """Load transformed data to destination.
80
+
81
+ Returns
82
+ -------
83
+ Any
84
+ Result of load operation.
85
+ """
86
+ raise NotImplementedError
87
+
88
+ def validate_extract(self) -> None: # noqa: B027
89
+ """Optional extra checks for extract step."""
90
+
91
+ def validate_transform(self) -> None: # noqa: B027
92
+ """Optional extra checks for transform step."""
93
+
94
+ def run(self) -> Any:
95
+ """Execute the complete ETL workflow.
96
+
97
+ Runs extract, validate_extract, transform, validate_transform, and load
98
+ in sequence with structured logging.
99
+
100
+ Returns
101
+ -------
102
+ Any
103
+ Result from the load operation.
104
+ """
105
+ logger.info("[run|in]")
106
+
107
+ self.extract()
108
+ self.validate_extract()
109
+
110
+ self.transform()
111
+ self.validate_transform()
112
+
113
+ result: Any = self.load()
114
+
115
+ logger.info("[run|out] => %s", result)
116
+ return result
117
+
118
+ @staticmethod
119
+ def inject_configuration(f): # noqa: ANN001, ANN205, D102
120
+ def decorator(self): # noqa: ANN001, ANN202
121
+ signature = inspect.signature(f)
122
+
123
+ missing_params = []
124
+ params = {}
125
+ for param in [parameter for parameter in signature.parameters if parameter != "self"]:
126
+ if signature.parameters[param].default != inspect._empty: # noqa: SLF001
127
+ params[param] = signature.parameters[param].default
128
+ else:
129
+ params[param] = None
130
+ if self._configuration is None or param not in self._configuration:
131
+ missing_params.append(param)
132
+
133
+ if self._configuration is not None and param in self._configuration:
134
+ params[param] = self._configuration[param]
135
+
136
+ if 0 < len(missing_params):
137
+ msg = f"missing required configuration parameters: {missing_params}"
138
+ raise EtlException(msg)
139
+
140
+ return f(
141
+ self,
142
+ *[params[argument] for argument in params],
143
+ )
144
+
145
+ return decorator
@@ -0,0 +1,55 @@
1
+ """Processor abstractions for data transformation operations.
2
+
3
+ Provides abstract base classes and interfaces for implementing processors that
4
+ transform data based on context.
5
+ """
6
+
7
+ import abc
8
+ from typing import Any
9
+
10
+
11
+ class ProcessorException(Exception):
12
+ """Exception raised for processor-related errors."""
13
+
14
+
15
+ class ProcessorInterface(metaclass=abc.ABCMeta):
16
+ """Interface for processor implementations.
17
+
18
+ Defines the contract for classes that implement process operations.
19
+ """
20
+
21
+ @classmethod
22
+ def __subclasshook__(cls, subclass): # noqa: ANN001, ANN206
23
+ """Check if a class implements the processor interface."""
24
+ return (hasattr(subclass, "process") and callable(subclass.process)) or NotImplemented
25
+
26
+
27
+ @ProcessorInterface.register
28
+ class Processor(abc.ABC):
29
+ """Abstract base class for processors that transform data."""
30
+
31
+ def __init__(self, config: dict[str, Any] | None = None) -> None:
32
+ """Initialize processor with optional configuration.
33
+
34
+ Parameters
35
+ ----------
36
+ config : dict[str, Any] | None
37
+ Configuration dictionary for the processor.
38
+ """
39
+ self._config = config
40
+
41
+ @abc.abstractmethod
42
+ def process(self, context: dict[str, Any] | None = None) -> Any:
43
+ """Process data based on the provided context.
44
+
45
+ Parameters
46
+ ----------
47
+ context : dict[str, Any] | None
48
+ Context containing data and parameters for processing.
49
+
50
+ Returns
51
+ -------
52
+ Any
53
+ Processed result.
54
+ """
55
+ raise NotImplementedError
@@ -0,0 +1,96 @@
1
+ """Sink abstractions for write-only data persistence operations.
2
+
3
+ Provides abstract base classes and interfaces for implementing sinks that handle
4
+ data output operations (put, delete) without read capabilities.
5
+ """
6
+
7
+ import abc
8
+ from typing import Any
9
+
10
+ from tgedr_dataops_abs.chain import Chain
11
+
12
+
13
+ class SinkException(Exception):
14
+ """Exception raised for sink-related errors."""
15
+
16
+
17
+ class SinkInterface(metaclass=abc.ABCMeta):
18
+ """Interface for sink implementations.
19
+
20
+ Defines the contract for classes that implement put and delete operations.
21
+ """
22
+
23
+ @classmethod
24
+ def __subclasshook__(cls, subclass): # noqa: ANN001, ANN206
25
+ """Check if a class implements the sink interface."""
26
+ return (
27
+ hasattr(subclass, "put")
28
+ and callable(subclass.put)
29
+ and hasattr(subclass, "delete")
30
+ and callable(subclass.delete)
31
+ ) or NotImplemented
32
+
33
+
34
+ @SinkInterface.register
35
+ class Sink(abc.ABC):
36
+ """Abstract class defining methods ('put' and 'delete') to manage persistence of data somewhere as defined by implementing classes."""
37
+
38
+ def __init__(self, config: dict[str, Any] | None = None) -> None:
39
+ """Initialize sink with optional configuration.
40
+
41
+ Parameters
42
+ ----------
43
+ config : dict[str, Any] | None
44
+ Configuration dictionary for the sink.
45
+ """
46
+ self._config = config
47
+
48
+ @abc.abstractmethod
49
+ def put(self, context: dict[str, Any] | None = None) -> Any:
50
+ """Put data to the sink.
51
+
52
+ Parameters
53
+ ----------
54
+ context : dict[str, Any] | None
55
+ Context containing data and metadata for the put operation.
56
+
57
+ Returns
58
+ -------
59
+ Any
60
+ Result of the put operation.
61
+ """
62
+ raise NotImplementedError
63
+
64
+ @abc.abstractmethod
65
+ def delete(self, context: dict[str, Any] | None = None) -> None:
66
+ """Delete data from the sink.
67
+
68
+ Parameters
69
+ ----------
70
+ context : dict[str, Any] | None
71
+ Context containing information about what to delete.
72
+ """
73
+ raise NotImplementedError
74
+
75
+
76
+ @SinkInterface.register
77
+ class SinkChain(Chain, abc.ABC):
78
+ """Abstract sink that can be chained with other operations.
79
+
80
+ Combines Chain and Sink capabilities for building processing pipelines.
81
+ """
82
+
83
+ def execute(self, context: dict[str, Any] | None = None) -> Any:
84
+ """Execute the sink operation by calling put.
85
+
86
+ Parameters
87
+ ----------
88
+ context : dict[str, Any] | None
89
+ Context to pass to the put operation.
90
+
91
+ Returns
92
+ -------
93
+ Any
94
+ Result of the put operation.
95
+ """
96
+ return self.put(context=context)
@@ -0,0 +1,105 @@
1
+ """Source abstractions for read-only data retrieval operations.
2
+
3
+ Provides abstract base classes and interfaces for implementing sources that handle
4
+ data input operations (get, list) without write capabilities.
5
+ """
6
+
7
+ import abc
8
+ from typing import Any
9
+
10
+ from tgedr_dataops_abs.chain import Chain
11
+
12
+
13
+ class SourceException(Exception):
14
+ """Exception raised for source-related errors."""
15
+
16
+
17
+ class NoSourceException(SourceException):
18
+ """Exception raised when a requested source is not found."""
19
+
20
+
21
+ class SourceInterface(metaclass=abc.ABCMeta):
22
+ """Interface for source implementations.
23
+
24
+ Defines the contract for classes that implement get and list operations.
25
+ """
26
+
27
+ @classmethod
28
+ def __subclasshook__(cls, subclass): # noqa: ANN001, ANN206
29
+ """Check if a class implements the source interface."""
30
+ return (
31
+ hasattr(subclass, "get")
32
+ and callable(subclass.get)
33
+ and hasattr(subclass, "list")
34
+ and callable(subclass.list)
35
+ ) or NotImplemented
36
+
37
+
38
+ @SourceInterface.register
39
+ class Source(abc.ABC):
40
+ """Abstract class defining methods ('list' and 'get') to manage retrieval of data from somewhere as defined by implementing classes."""
41
+
42
+ def __init__(self, config: dict[str, Any] | None = None) -> None:
43
+ """Initialize source with optional configuration.
44
+
45
+ Parameters
46
+ ----------
47
+ config : dict[str, Any] | None
48
+ Configuration dictionary for the source.
49
+ """
50
+ self._config = config
51
+
52
+ @abc.abstractmethod
53
+ def get(self, context: dict[str, Any] | None = None) -> Any:
54
+ """Get data from the source.
55
+
56
+ Parameters
57
+ ----------
58
+ context : dict[str, Any] | None
59
+ Context containing parameters for the get operation.
60
+
61
+ Returns
62
+ -------
63
+ Any
64
+ Retrieved data.
65
+ """
66
+ raise NotImplementedError
67
+
68
+ @abc.abstractmethod
69
+ def list(self, context: dict[str, Any] | None = None) -> Any:
70
+ """List available items in the source.
71
+
72
+ Parameters
73
+ ----------
74
+ context : dict[str, Any] | None
75
+ Context containing parameters for the list operation.
76
+
77
+ Returns
78
+ -------
79
+ Any
80
+ List of available items.
81
+ """
82
+ raise NotImplementedError
83
+
84
+
85
+ @SourceInterface.register
86
+ class SourceChain(Chain, abc.ABC):
87
+ """Abstract source that can be chained with other operations.
88
+
89
+ Combines Chain and Source capabilities for building processing pipelines.
90
+ """
91
+
92
+ def execute(self, context: dict[str, Any] | None = None) -> Any:
93
+ """Execute the source operation by calling get.
94
+
95
+ Parameters
96
+ ----------
97
+ context : dict[str, Any] | None
98
+ Context to pass to the get operation.
99
+
100
+ Returns
101
+ -------
102
+ Any
103
+ Result of the get operation.
104
+ """
105
+ return self.get(context=context)
@@ -0,0 +1,113 @@
1
+ """Store abstractions for CRUD (Create, Read, Update, Delete) operations.
2
+
3
+ Provides abstract base classes and interfaces for implementing stores that handle
4
+ full data persistence operations including get, save, update, and delete.
5
+ """
6
+
7
+ import abc
8
+ from typing import Any
9
+
10
+
11
+ class StoreException(Exception):
12
+ """Exception raised for store-related errors."""
13
+
14
+
15
+ class NoStoreException(StoreException):
16
+ """Exception raised when a requested store item is not found."""
17
+
18
+
19
+ class StoreInterface(metaclass=abc.ABCMeta):
20
+ """Interface for store implementations.
21
+
22
+ Defines the contract for classes that implement CRUD operations.
23
+ """
24
+
25
+ @classmethod
26
+ def __subclasshook__(cls, subclass): # noqa: ANN001, ANN206
27
+ """Check if a class implements the store interface."""
28
+ return (
29
+ hasattr(subclass, "get")
30
+ and callable(subclass.get)
31
+ and hasattr(subclass, "delete")
32
+ and callable(subclass.delete)
33
+ and hasattr(subclass, "save")
34
+ and callable(subclass.save)
35
+ and hasattr(subclass, "update")
36
+ and callable(subclass.update)
37
+ ) or NotImplemented
38
+
39
+
40
+ @StoreInterface.register
41
+ class Store(abc.ABC):
42
+ """Abstract class used to manage persistence, defining CRUD-like (CreateReadUpdateDelete) methods."""
43
+
44
+ def __init__(self, config: dict[str, Any] | None = None) -> None:
45
+ """Initialize store with optional configuration.
46
+
47
+ Parameters
48
+ ----------
49
+ config : dict[str, Any] | None
50
+ Configuration dictionary for the store.
51
+ """
52
+ self._config = config
53
+
54
+ @abc.abstractmethod
55
+ def get(self, key: str, **kwargs) -> Any: # noqa: ANN003
56
+ """Get data from the store by key.
57
+
58
+ Parameters
59
+ ----------
60
+ key : str
61
+ The key identifying the data to retrieve.
62
+ **kwargs
63
+ Additional store-specific parameters.
64
+
65
+ Returns
66
+ -------
67
+ Any
68
+ Retrieved data.
69
+ """
70
+ raise NotImplementedError
71
+
72
+ @abc.abstractmethod
73
+ def delete(self, key: str, **kwargs) -> None: # noqa: ANN003
74
+ """Delete data from the store by key.
75
+
76
+ Parameters
77
+ ----------
78
+ key : str
79
+ The key identifying the data to delete.
80
+ **kwargs
81
+ Additional store-specific parameters.
82
+ """
83
+ raise NotImplementedError
84
+
85
+ @abc.abstractmethod
86
+ def save(self, df: Any, key: str, **kwargs) -> Any: # noqa: ANN003
87
+ """Save data to the store.
88
+
89
+ Parameters
90
+ ----------
91
+ df : Any
92
+ The data to save.
93
+ key : str
94
+ The key to associate with the data.
95
+ **kwargs
96
+ Additional store-specific parameters.
97
+ """
98
+ raise NotImplementedError
99
+
100
+ @abc.abstractmethod
101
+ def update(self, df: Any, key: str, **kwargs) -> Any: # noqa: ANN003
102
+ """Update existing data in the store.
103
+
104
+ Parameters
105
+ ----------
106
+ df : Any
107
+ The updated data.
108
+ key : str
109
+ The key identifying the data to update.
110
+ **kwargs
111
+ Additional store-specific parameters.
112
+ """
113
+ raise NotImplementedError
@@ -0,0 +1,44 @@
1
+ Metadata-Version: 2.4
2
+ Name: tgedr-dataops-abs
3
+ Version: 0.0.1
4
+ Summary: this is a template for a python package
5
+ Author-email: developer <developer@email.com>
6
+ Requires-Python: >=3.11
7
+ Description-Content-Type: text/markdown
8
+ Requires-Dist: pandas>=2.3.0
9
+
10
+ # tgedr-dataops-abs
11
+
12
+ ![Coverage](./coverage.svg)
13
+ [![PyPI](https://img.shields.io/pypi/v/tgedr-dataops-abs)](https://pypi.org/project/tgedr-dataops-abs/)
14
+
15
+
16
+ data operations related code - abstractions
17
+
18
+
19
+ ## motivation
20
+ abstract constructs to depict commonly use cases in data engineering context. Think about commonly accepted abstract classes and interfaces that can be implemented and extended according to different requirements and constraints.
21
+
22
+
23
+ ## package namespaces and its contents
24
+
25
+ - __Chain__ : chain-like abstract class (for sequential processing) ([example](tests/tgedr_dataops_abs/test_processor_chain.py))
26
+ - __Etl__ : Extract-Transform-Load abstract class to be extended and used in data pipelines ([example](tests/tgedr_dataops_abs/test_etl.py))
27
+ - __Processor__ : abstract class for data processing ([example](tests/tgedr_dataops_abs/test_processor_chain.py))
28
+ - __Sink__: abstract **sink** class defining methods (`put`and `delete`) to manage persistence of data somewhere as defined by implementing classes ([example](tests/tgedr_dataops_abs/test_sink.py))
29
+ - __Source__: abstract **source** class defining methods (`list` and `get`) to manage retrieval of data from somewhere as defined by implementing classes ([example](tests/tgedr_dataops_abs/test_source.py))
30
+ - __Store__ : abstract class used to manage persistence, defining CRUD-like (CreateReadUpdateDelete) methods ([example](tests/tgedr_dataops_abs/test_store.py))
31
+
32
+
33
+
34
+ ## development
35
+ - main requirements:
36
+ - _uv_
37
+ - _bash_
38
+ - Clone the repository like this:
39
+
40
+ ``` bash
41
+ git clone git@github.com:tgedr/dataops-abs
42
+ ```
43
+ - cd into the folder: `cd dataops-abs`
44
+ - install requirements: `./helper.sh reqs`
@@ -0,0 +1,11 @@
1
+ tgedr_dataops_abs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ tgedr_dataops_abs/chain.py,sha256=ucz7CvZ4R6v5jZRT9smcX1BHJ9fresraoqfr_kSRovk,3571
3
+ tgedr_dataops_abs/etl.py,sha256=43lL-LPRdY8O2YDlowfvVXL5Kz5_EIWG93f3gzy479I,4129
4
+ tgedr_dataops_abs/processor.py,sha256=a5Gv6RcxzoORLfLbL_KXZiogfXUSDjoP2IJJDNroT6Y,1582
5
+ tgedr_dataops_abs/sink.py,sha256=kLNaui9v3fyx5qfLKCagBIp2G4xotPGwcfkVke354qo,2720
6
+ tgedr_dataops_abs/source.py,sha256=oDd-Ov4R8ioWu3WRqdPsRCPgjjOZRMiT0_wLYGxRQ3o,2924
7
+ tgedr_dataops_abs/store.py,sha256=u9BQrlBe4Qfx00QxynPtSZKlvOMez4nlNP0EpDVD6JY,3211
8
+ tgedr_dataops_abs-0.0.1.dist-info/METADATA,sha256=eDNLfEhG6U3gUNx2m23dRPeTyD-FKwrG8G7LJmnzUtY,1926
9
+ tgedr_dataops_abs-0.0.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
10
+ tgedr_dataops_abs-0.0.1.dist-info/top_level.txt,sha256=VJEbLpMLOMqqPUh_BXrAxxV1LP0kBtXNS95AGxknAJ8,18
11
+ tgedr_dataops_abs-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.10.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ tgedr_dataops_abs