tgedr-dataops 0.0.37__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {tgedr/dataops → tgedr_dataops}/commons/s3_connector.py +32 -5
  2. tgedr_dataops/commons/utils_fs.py +187 -0
  3. tgedr_dataops/quality/pandas_validation.py +21 -0
  4. tgedr_dataops/sink/local_fs_file_sink.py +77 -0
  5. {tgedr/dataops → tgedr_dataops}/sink/s3_file_sink.py +47 -11
  6. tgedr_dataops/source/abstract_s3_file_source.py +72 -0
  7. tgedr_dataops/source/local_fs_file_source.py +108 -0
  8. tgedr_dataops/source/pd_df_s3_source.py +130 -0
  9. {tgedr/dataops → tgedr_dataops}/source/s3_file_copy.py +64 -28
  10. tgedr_dataops/source/s3_file_extended_source.py +68 -0
  11. {tgedr/dataops → tgedr_dataops}/source/s3_file_source.py +63 -27
  12. tgedr_dataops/store/fs_single_partition_parquet.py +331 -0
  13. tgedr_dataops/store/local_fs_single_partition_parquet.py +56 -0
  14. tgedr_dataops/store/s3_single_partition_parquet.py +193 -0
  15. tgedr_dataops-1.0.1.dist-info/METADATA +72 -0
  16. tgedr_dataops-1.0.1.dist-info/RECORD +22 -0
  17. {tgedr_dataops-0.0.37.dist-info → tgedr_dataops-1.0.1.dist-info}/WHEEL +1 -1
  18. tgedr_dataops-1.0.1.dist-info/top_level.txt +1 -0
  19. tgedr/dataops/chain.py +0 -51
  20. tgedr/dataops/commons/dataset.py +0 -23
  21. tgedr/dataops/commons/metadata.py +0 -172
  22. tgedr/dataops/commons/utils_fs.py +0 -85
  23. tgedr/dataops/commons/utils_spark.py +0 -87
  24. tgedr/dataops/etl.py +0 -112
  25. tgedr/dataops/processor.py +0 -27
  26. tgedr/dataops/sink/local_fs_file_sink.py +0 -47
  27. tgedr/dataops/sink/sink.py +0 -46
  28. tgedr/dataops/source/abstract_s3_file_source.py +0 -43
  29. tgedr/dataops/source/delta_table_source.py +0 -49
  30. tgedr/dataops/source/local_delta_table.py +0 -47
  31. tgedr/dataops/source/local_fs_file_source.py +0 -71
  32. tgedr/dataops/source/pd_df_s3_source.py +0 -76
  33. tgedr/dataops/source/s3_delta_table.py +0 -75
  34. tgedr/dataops/source/s3_file_extended_source.py +0 -39
  35. tgedr/dataops/source/source.py +0 -51
  36. tgedr/dataops/store/fs_single_partition_parquet.py +0 -231
  37. tgedr/dataops/store/local_fs_single_partition_parquet.py +0 -24
  38. tgedr/dataops/store/s3_single_partition_parquet.py +0 -102
  39. tgedr/dataops/store/spark_delta.py +0 -369
  40. tgedr/dataops/store/store.py +0 -49
  41. tgedr/dataops/utils_reflection.py +0 -134
  42. tgedr/dataops/validation/abs.py +0 -46
  43. tgedr/dataops/validation/pandas.py +0 -10
  44. tgedr/dataops/validation/pyspark.py +0 -10
  45. tgedr_dataops-0.0.37.dist-info/METADATA +0 -21
  46. tgedr_dataops-0.0.37.dist-info/RECORD +0 -38
  47. tgedr_dataops-0.0.37.dist-info/top_level.txt +0 -1
  48. {tgedr/dataops → tgedr_dataops}/__init__.py +0 -0
  49. {tgedr/dataops → tgedr_dataops}/sink/__init__.py +0 -0
  50. {tgedr/dataops → tgedr_dataops}/source/__init__.py +0 -0
  51. {tgedr_dataops-0.0.37.dist-info → tgedr_dataops-1.0.1.dist-info/licenses}/LICENSE +0 -0
@@ -1,17 +1,30 @@
1
+ """AWS S3 connector module.
2
+
3
+ This module provides the S3Connector class for establishing and managing
4
+ connections to AWS S3 resources.
5
+ """
1
6
  import os
2
7
  import boto3
3
8
 
4
9
 
5
10
  class S3Connector:
6
- """utility base class to be extended, providing a connection session with aws s3 resources"""
11
+ """utility base class to be extended, providing a connection session with aws s3 resources."""
7
12
 
8
- def __init__(self):
13
+ def __init__(self) -> None:
14
+ """Initialize the S3Connector with empty session, resource, and client attributes."""
9
15
  self.__resource = None
10
16
  self.__session = None
11
17
  self.__client = None
12
18
 
13
19
  @property
14
- def _session(self): # pragma: no cover
20
+ def _session(self) -> boto3.Session:
21
+ """Get or create a boto3 session.
22
+
23
+ Returns
24
+ -------
25
+ boto3.Session
26
+ The boto3 session instance, using credentials if configured.
27
+ """
15
28
  if self.__session is None:
16
29
  if "1" == os.getenv("S3_CONNECTOR_USE_CREDENTIALS", default="0"):
17
30
  self.__session = boto3.Session(
@@ -26,13 +39,27 @@ class S3Connector:
26
39
  return self.__session
27
40
 
28
41
  @property
29
- def _resource(self):
42
+ def _resource(self) -> boto3.resources.base.ServiceResource:
43
+ """Get or create an S3 resource.
44
+
45
+ Returns
46
+ -------
47
+ boto3.resources.base.ServiceResource
48
+ The boto3 S3 resource instance.
49
+ """
30
50
  if self.__resource is None:
31
51
  self.__resource = self._session.resource("s3")
32
52
  return self.__resource
33
53
 
34
54
  @property
35
- def _client(self):
55
+ def _client(self) -> boto3.client:
56
+ """Get or create an S3 client.
57
+
58
+ Returns
59
+ -------
60
+ boto3.client
61
+ The boto3 S3 client instance.
62
+ """
36
63
  if self.__client is None:
37
64
  self.__client = self._session.client("s3")
38
65
  return self.__client
@@ -0,0 +1,187 @@
1
+ """Filesystem utility functions for file and path operations.
2
+
3
+ This module provides utilities for:
4
+ - Creating temporary files and directories
5
+ - Parsing and processing S3 URLs and paths
6
+ - Extracting URL protocols
7
+ - Generating file hashes
8
+ """
9
+ import tempfile
10
+ import re
11
+ import hashlib
12
+ from pathlib import Path
13
+
14
+
15
+ def temp_dir(root: str | None = None, suffix: str | None = None, prefix: str | None = None) -> str:
16
+ """Create a temporary directory and return its path.
17
+
18
+ Parameters
19
+ ----------
20
+ root : str | None
21
+ Directory where the temporary directory will be created.
22
+ suffix : str | None
23
+ Suffix for the temporary directory name.
24
+ prefix : str | None
25
+ Prefix for the temporary directory name.
26
+
27
+ Returns
28
+ -------
29
+ str
30
+ Path to the created temporary directory.
31
+ """
32
+ return tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=root)
33
+
34
+
35
+ def temp_file(
36
+ root: str | None = None, suffix: str | None = None, prefix: str | None = None,
37
+ discard_handle: bool = True
38
+ ) -> str | tuple[int, str]:
39
+ """Create a temporary file and return its path or handle and path.
40
+
41
+ Parameters
42
+ ----------
43
+ root : str | None
44
+ Directory where the temporary file will be created.
45
+ suffix : str | None
46
+ Suffix for the temporary file name.
47
+ prefix : str | None
48
+ Prefix for the temporary file name.
49
+ discard_handle : bool
50
+ If True, return only the file path. If False, return tuple of (handle, path).
51
+
52
+ Returns
53
+ -------
54
+ str | tuple[int, str]
55
+ File path if discard_handle is True, otherwise tuple of (file handle, file path).
56
+ """
57
+ h, f = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=root)
58
+ if discard_handle:
59
+ return f
60
+ return (h, f)
61
+
62
+
63
+ def resolve_url_protocol(url: str) -> str:
64
+ """Extract the protocol from a URL.
65
+
66
+ Parameters
67
+ ----------
68
+ url : str
69
+ The URL to extract the protocol from.
70
+
71
+ Returns
72
+ -------
73
+ str
74
+ The protocol (e.g., 'http://', 'https://') or None if no protocol is found.
75
+ """
76
+ result = None
77
+ group_match = re.search("(.*://).*", url)
78
+ if group_match is not None:
79
+ result = group_match.group(1)
80
+ return result
81
+
82
+
83
+ def resolve_s3_protocol(url: str) -> str:
84
+ """Extract the S3 protocol from a URL.
85
+
86
+ Parameters
87
+ ----------
88
+ url : str
89
+ The URL to extract the S3 protocol from.
90
+
91
+ Returns
92
+ -------
93
+ str
94
+ The S3 protocol (e.g., 's3://', 's3a://') or None if no S3 protocol is found.
95
+ """
96
+ result = None
97
+ group_match = re.search("(s3[a]?://).*", url)
98
+ if group_match is not None:
99
+ result = group_match.group(1)
100
+ return result
101
+
102
+
103
+ def remove_s3_protocol(url: str) -> str:
104
+ """Remove the S3 protocol prefix from a URL.
105
+
106
+ Parameters
107
+ ----------
108
+ url : str
109
+ The S3 URL to remove the protocol from.
110
+
111
+ Returns
112
+ -------
113
+ str
114
+ The URL without the S3 protocol prefix (s3:// or s3a://).
115
+ """
116
+ if url.startswith("s3://"):
117
+ result = url[5:]
118
+ elif url.startswith("s3a://"):
119
+ result = url[6:]
120
+ else:
121
+ result = url
122
+ return result
123
+
124
+
125
+ def process_s3_path(path: str) -> tuple[str, str]:
126
+ """Extract bucket and key from an S3 path.
127
+
128
+ Parameters
129
+ ----------
130
+ path : str
131
+ The S3 path to process (with or without protocol).
132
+
133
+ Returns
134
+ -------
135
+ tuple[str, str]
136
+ A tuple containing (bucket, key).
137
+ """
138
+ no_protocol_path = remove_s3_protocol(path)
139
+ path_elements = no_protocol_path.split("/")
140
+ bucket = path_elements[0]
141
+ key = "/".join(path_elements[1:])
142
+ return (bucket, key)
143
+
144
+
145
+ def process_s3_url(url: str) -> tuple[str, str, str]:
146
+ """Extract protocol, bucket, and key from an S3 URL.
147
+
148
+ Parameters
149
+ ----------
150
+ url : str
151
+ The S3 URL to process.
152
+
153
+ Returns
154
+ -------
155
+ tuple[str, str, str]
156
+ A tuple containing (protocol, bucket, key).
157
+ """
158
+ protocol = resolve_s3_protocol(url)
159
+ no_protocol_url = remove_s3_protocol(url)
160
+ path_elements = no_protocol_url.split("/")
161
+ bucket = path_elements[0]
162
+ key = "/".join(path_elements[1:])
163
+ return ("" if protocol is None else protocol, bucket, key)
164
+
165
+
166
+ def hash_file(filepath: str, hash_func=hashlib.sha256) -> str: # noqa: ANN001
167
+ """Generate a hash for a file.
168
+
169
+ Args:
170
+ filepath (str): The path to the file.
171
+ hash_func: A hashlib hash function, e.g., hashlib.md5().
172
+
173
+ Returns:
174
+ str: The hexadecimal hash string of the file.
175
+ """
176
+ # Initialize the hash object
177
+ hasher = hash_func()
178
+
179
+ # Open the file in binary read mode
180
+ with Path(filepath).open("rb") as file:
181
+ # Read the file in chunks to avoid using too much memory
182
+ chunk_size = 8192
183
+ while chunk := file.read(chunk_size):
184
+ hasher.update(chunk)
185
+
186
+ # Return the hexadecimal digest of the hash
187
+ return hasher.hexdigest()
@@ -0,0 +1,21 @@
1
+ """Pandas DataFrame validation implementation module.
2
+
3
+ This module provides the Pandas-specific implementation of Great Expectations validation.
4
+ """
5
+ from great_expectations.execution_engine import ExecutionEngine
6
+ from great_expectations.execution_engine import PandasExecutionEngine
7
+ from tgedr_dataops_abs.great_expectations_validation import GreatExpectationsValidation
8
+
9
+
10
+ class PandasValidation(GreatExpectationsValidation):
11
+ """Pandas DataFrame validation implementation."""
12
+
13
+ def _get_execution_engine(self, batch_data_dict: dict) -> ExecutionEngine:
14
+ """Get the execution engine used by the validation implementation.
15
+
16
+ Returns
17
+ -------
18
+ ExecutionEngine
19
+ The execution engine instance.
20
+ """
21
+ return PandasExecutionEngine(batch_data_dict=batch_data_dict)
@@ -0,0 +1,77 @@
1
+ """Local filesystem sink implementation for persisting files.
2
+
3
+ This module provides:
4
+ - LocalFsFileSink: sink class for saving/persisting files to local filesystem.
5
+ """
6
+
7
+ import logging
8
+ import shutil
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from tgedr_dataops_abs.sink import Sink, SinkException
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class LocalFsFileSink(Sink):
19
+ """sink class used to save/persist an object/file to a local fs location."""
20
+
21
+ CONTEXT_SOURCE_PATH = "source"
22
+ CONTEXT_TARGET_PATH = "target"
23
+
24
+ def put(self, context: dict[str, Any] | None = None) -> Any:
25
+ """Copy a file from source to target on local filesystem.
26
+
27
+ Parameters
28
+ ----------
29
+ context : dict[str, Any], optional
30
+ Context dictionary containing 'source' and 'target' paths.
31
+
32
+ Raises
33
+ ------
34
+ SinkException
35
+ If source or target context is missing.
36
+ """
37
+ logger.info(f"[put|in] ({context})")
38
+
39
+ if self.CONTEXT_SOURCE_PATH not in context:
40
+ raise SinkException(f"you must provide context for {self.CONTEXT_SOURCE_PATH}")
41
+ if self.CONTEXT_TARGET_PATH not in context:
42
+ raise SinkException(f"you must provide context for {self.CONTEXT_TARGET_PATH}")
43
+
44
+ source = context[self.CONTEXT_SOURCE_PATH]
45
+ target = context[self.CONTEXT_TARGET_PATH]
46
+
47
+ shutil.copy(source, target)
48
+ logger.info("[put|out]")
49
+
50
+ def delete(self, context: dict[str, Any] | None = None) -> None:
51
+ """Delete a file or directory from local filesystem.
52
+
53
+ Parameters
54
+ ----------
55
+ context : dict[str, Any], optional
56
+ Context dictionary containing 'target' path.
57
+
58
+ Raises
59
+ ------
60
+ SinkException
61
+ If target context is missing or target is neither file nor directory.
62
+ """
63
+ logger.info(f"[delete|in] ({context})")
64
+
65
+ if self.CONTEXT_TARGET_PATH not in context:
66
+ raise SinkException(f"you must provide context for {self.CONTEXT_TARGET_PATH}")
67
+
68
+ target = Path(context[self.CONTEXT_TARGET_PATH])
69
+
70
+ if target.is_file():
71
+ target.unlink()
72
+ elif target.is_dir():
73
+ shutil.rmtree(target)
74
+ else:
75
+ raise SinkException(f"[delete] is it a dir or a folder? {target}")
76
+
77
+ logger.info("[delete|out]")
@@ -1,26 +1,50 @@
1
+ """S3 sink implementation for persisting files to S3.
2
+
3
+ This module provides:
4
+ - S3FileSink: sink class for saving/persisting files to S3 buckets.
5
+ """
1
6
  import logging
2
- import os
3
- from typing import Any, Dict, Optional
4
- from tgedr.dataops.commons.s3_connector import S3Connector
7
+ from pathlib import Path
8
+ from typing import Any
9
+ from tgedr_dataops.commons.s3_connector import S3Connector
5
10
 
6
- from tgedr.dataops.sink.sink import Sink, SinkException
7
- from tgedr.dataops.commons.utils_fs import remove_s3_protocol
11
+ from tgedr_dataops_abs.sink import Sink, SinkException
12
+ from tgedr_dataops.commons.utils_fs import remove_s3_protocol
8
13
 
9
14
 
10
15
  logger = logging.getLogger(__name__)
11
16
 
12
17
 
13
18
  class S3FileSink(Sink, S3Connector):
14
- """sink class used to save/persist a local object/file to an s3 bucket"""
19
+ """sink class used to save/persist a local object/file to an s3 bucket."""
15
20
 
16
21
  CONTEXT_SOURCE_PATH = "source"
17
22
  CONTEXT_TARGET_PATH = "target"
18
23
 
19
- def __init__(self, config: Optional[Dict[str, Any]] = None):
24
+ def __init__(self, config: dict[str, Any] | None = None) -> None:
25
+ """Initialize the S3FileSink.
26
+
27
+ Parameters
28
+ ----------
29
+ config : dict[str, Any], optional
30
+ Configuration dictionary.
31
+ """
20
32
  Sink.__init__(self, config=config)
21
33
  S3Connector.__init__(self)
22
34
 
23
- def put(self, context: Optional[Dict[str, Any]] = None) -> Any:
35
+ def put(self, context: dict[str, Any] | None = None) -> Any:
36
+ """Upload a local file to S3 bucket.
37
+
38
+ Parameters
39
+ ----------
40
+ context : dict[str, Any], optional
41
+ Context dictionary containing 'source' (local file) and 'target' (S3 URL) paths.
42
+
43
+ Raises
44
+ ------
45
+ SinkException
46
+ If source or target context is missing, or if source is a directory.
47
+ """
24
48
  logger.info(f"[put|in] ({context})")
25
49
 
26
50
  if self.CONTEXT_SOURCE_PATH not in context:
@@ -29,7 +53,7 @@ class S3FileSink(Sink, S3Connector):
29
53
  raise SinkException(f"you must provide context for {self.CONTEXT_TARGET_PATH}")
30
54
 
31
55
  source = context[self.CONTEXT_SOURCE_PATH]
32
- if os.path.isdir(source):
56
+ if Path(source).is_dir():
33
57
  raise SinkException("source can't be a folder, must be a file")
34
58
 
35
59
  target = remove_s3_protocol(context[self.CONTEXT_TARGET_PATH])
@@ -38,7 +62,7 @@ class S3FileSink(Sink, S3Connector):
38
62
  target_key = "/".join(target_elements[1:])
39
63
 
40
64
  if target_key.endswith("/"):
41
- target_file = os.path.basename(source)
65
+ target_file = Path(source).name
42
66
  target_key = target_key + target_file
43
67
 
44
68
  logger.info(f"[put] uploading {source} to key: {target_key} in bucket: {target_bucket}")
@@ -46,7 +70,19 @@ class S3FileSink(Sink, S3Connector):
46
70
 
47
71
  logger.info("[put|out]")
48
72
 
49
- def delete(self, context: Optional[Dict[str, Any]] = None):
73
+ def delete(self, context: dict[str, Any] | None = None) -> None:
74
+ """Delete an object from S3 bucket.
75
+
76
+ Parameters
77
+ ----------
78
+ context : dict[str, Any], optional
79
+ Context dictionary containing 'target' (S3 URL) path.
80
+
81
+ Raises
82
+ ------
83
+ SinkException
84
+ If target context is missing.
85
+ """
50
86
  logger.info(f"[delete|in] ({context})")
51
87
 
52
88
  if self.CONTEXT_TARGET_PATH not in context:
@@ -0,0 +1,72 @@
1
+ """Abstract base class for S3 file sources.
2
+
3
+ This module provides:
4
+ - AbstractS3FileSource: abstract class for reading file sources from S3.
5
+ """
6
+ from abc import ABC
7
+ import logging
8
+ from typing import Any
9
+
10
+ from tgedr_dataops.commons.s3_connector import S3Connector
11
+ from tgedr_dataops.commons.utils_fs import process_s3_url
12
+ from tgedr_dataops_abs.source import Source, SourceException
13
+
14
+
15
+ logger = logging.getLogger()
16
+
17
+
18
+ class AbstractS3FileSource(Source, S3Connector, ABC):
19
+ """abstract class used to read file sources from s3."""
20
+
21
+ CONTEXT_KEY_URL = "url"
22
+ CONTEXT_KEY_SUFFIX = "suffix"
23
+
24
+ def __init__(self, config: dict[str, Any] | None = None) -> None:
25
+ """Initialize the S3 file source.
26
+
27
+ Parameters
28
+ ----------
29
+ config : dict[str, Any], optional
30
+ Configuration dictionary for the source.
31
+ """
32
+ Source.__init__(self, config=config)
33
+ S3Connector.__init__(self)
34
+
35
+ def list(self, context: dict[str, Any] | None = None) -> list[str]:
36
+ """List objects in the S3 bucket.
37
+
38
+ Parameters
39
+ ----------
40
+ context : dict[str, Any], optional
41
+ Context dictionary containing 'source' S3 URL.
42
+
43
+ Returns
44
+ -------
45
+ list[str]
46
+ List of S3 object keys in the source bucket/prefix.
47
+
48
+ Raises
49
+ ------
50
+ SourceException
51
+ If source context is missing.
52
+ """
53
+ logger.info(f"[list|in] ({context})")
54
+
55
+ result: list[str] = []
56
+ if self.CONTEXT_KEY_URL not in context:
57
+ raise SourceException(f"you must provide context for {self.CONTEXT_KEY_URL}")
58
+
59
+ protocol, bucket, key = process_s3_url(context[self.CONTEXT_KEY_URL])
60
+
61
+ objs = self._client.list_objects_v2(Bucket=bucket, Prefix=key)
62
+ result = [
63
+ (protocol + bucket + "/" + entry["Key"]) for entry in objs["Contents"] if not (entry["Key"]).endswith("/")
64
+ ]
65
+
66
+ if self.CONTEXT_KEY_SUFFIX in context:
67
+ suffix: str = context[self.CONTEXT_KEY_SUFFIX]
68
+ result = [f for f in result if f.endswith(suffix)]
69
+
70
+ logger.debug(f"[list|out] => {result}")
71
+ logger.info(f"[list|out] => result len: {len(result)}")
72
+ return result
@@ -0,0 +1,108 @@
1
+ """Local filesystem source implementation for file operations.
2
+
3
+ This module provides:
4
+ - LocalFsFileSource: A source class for retrieving and listing local files.
5
+ """
6
+ import logging
7
+ import shutil
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from tgedr_dataops_abs.source import Source, SourceException
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class LocalFsFileSource(Source):
18
+ """source class used to retrieve local objects/files to a another local fs location."""
19
+
20
+ CONTEXT_KEY_SOURCE = "source"
21
+ CONTEXT_KEY_TARGET = "target"
22
+ CONTEXT_KEY_SUFFIX = "suffix"
23
+ CONTEXT_KEY_FILES = "files"
24
+
25
+ def list(self, context: dict[str, Any] | None = None) -> list[str]:
26
+ """List files in the local filesystem directory.
27
+
28
+ Parameters
29
+ ----------
30
+ context : dict[str, Any], optional
31
+ Context dictionary containing 'source' path.
32
+
33
+ Returns
34
+ -------
35
+ list[str]
36
+ List of file paths in the source directory.
37
+
38
+ Raises
39
+ ------
40
+ SourceException
41
+ If source context is missing or if source path is not a directory.
42
+ """
43
+ logger.info(f"[list|in] ({context})")
44
+ result: list[str] = []
45
+ if self.CONTEXT_KEY_SOURCE not in context:
46
+ raise SourceException(f"you must provide context for {self.CONTEXT_KEY_SOURCE}")
47
+
48
+ source = context[self.CONTEXT_KEY_SOURCE]
49
+ source_path = Path(source)
50
+ if source_path.is_dir():
51
+ suffix = None
52
+ if self.CONTEXT_KEY_SUFFIX in context:
53
+ suffix = context[self.CONTEXT_KEY_SUFFIX]
54
+ result: list[str] = [str(file) for file in source_path.iterdir() if file.name.endswith(suffix)]
55
+ else:
56
+ result: list[str] = [str(file) for file in source_path.iterdir()]
57
+ elif source_path.is_file():
58
+ result: list[str] = [source]
59
+
60
+ logger.debug(f"[list|out] => {result}")
61
+ logger.info(f"[list|out] => result len: {len(result)}")
62
+ return result
63
+
64
+ def get(self, context: dict[str, Any] | None = None) -> Any:
65
+ """Retrieve file(s) from local filesystem.
66
+
67
+ Parameters
68
+ ----------
69
+ context : dict[str, Any], optional
70
+ Context dictionary containing 'source' path.
71
+
72
+ Returns
73
+ -------
74
+ Any
75
+ List of file paths if source is a directory, or single file path if source is a file.
76
+
77
+ Raises
78
+ ------
79
+ SourceException
80
+ If source context is missing.
81
+ """
82
+ logger.info(f"[get|in] ({context})")
83
+
84
+ if self.CONTEXT_KEY_FILES not in context or self.CONTEXT_KEY_TARGET not in context:
85
+ raise SourceException(f"{self.CONTEXT_KEY_FILES} and {self.CONTEXT_KEY_TARGET} must be provided in config")
86
+ files = context[self.CONTEXT_KEY_FILES]
87
+ target = context[self.CONTEXT_KEY_TARGET]
88
+
89
+ if "list" != type(files).__name__:
90
+ if "str" == type(files).__name__:
91
+ files = [files]
92
+ else:
93
+ raise SourceException("files argument must be a list of strings or a string")
94
+
95
+ target_is_dir: bool = False
96
+ if Path(target).is_dir():
97
+ target_is_dir = True
98
+
99
+ result: list[str] = []
100
+
101
+ for file in files:
102
+ basename = Path(file).name
103
+ new_file = str(Path(target) / basename) if target_is_dir else target
104
+ shutil.copy(file, new_file)
105
+ result.append(new_file)
106
+
107
+ logger.info("[get|out] => {result}")
108
+ return result