vd-dlt 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vd_dlt-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,71 @@
1
+ Metadata-Version: 2.4
2
+ Name: vd-dlt
3
+ Version: 0.1.0
4
+ Summary: Core DLT ingestion framework for VibeData pipelines
5
+ Project-URL: Homepage, https://github.com/accelerate-data/vd-dlt-connectors
6
+ Project-URL: Repository, https://github.com/accelerate-data/vd-dlt-connectors
7
+ Author-email: VibeData <info@vibedata.dev>
8
+ License-Expression: MIT
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Database
17
+ Classifier: Topic :: Software Development :: Libraries
18
+ Requires-Python: >=3.9
19
+ Requires-Dist: pyyaml>=6.0
20
+ Provides-Extra: notion
21
+ Requires-Dist: vd-dlt-notion; extra == 'notion'
22
+ Provides-Extra: notion-schema
23
+ Requires-Dist: vd-dlt-notion-schema; extra == 'notion-schema'
24
+ Provides-Extra: pipeline
25
+ Requires-Dist: dlt[deltalake,filesystem]; extra == 'pipeline'
26
+ Requires-Dist: pyarrow>=17.0.0; extra == 'pipeline'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # vd-dlt
30
+
31
+ Core DLT ingestion framework for VibeData pipelines. Provides config resolution, credential management (Azure Key Vault), pipeline execution, and observability.
32
+
33
+ ## Installation
34
+
35
+ ```bash
36
+ # Core only
37
+ pip install vd-dlt
38
+
39
+ # With pipeline dependencies (dlt, pyarrow)
40
+ pip install vd-dlt[pipeline]
41
+
42
+ # With Notion connector
43
+ pip install vd-dlt[notion]
44
+
45
+ # With Notion schema (defaults, docs)
46
+ pip install vd-dlt[notion-schema]
47
+
48
+ # Everything for Notion
49
+ pip install vd-dlt[pipeline,notion,notion-schema]
50
+ ```
51
+
52
+ ## Quick Start
53
+
54
+ ```python
55
+ from vd_dlt import PipelineRunner
56
+
57
+ runner = PipelineRunner(
58
+ vault_url="https://my-vault.vault.azure.net/",
59
+ )
60
+ result = runner.run("my_source_name")
61
+ print(f"Loaded {result.total_rows_loaded} rows")
62
+ ```
63
+
64
+ ## Architecture
65
+
66
+ The framework uses a 4-level config hierarchy (most specific wins):
67
+
68
+ 1. **Connector Defaults** - from connector schema package
69
+ 2. **Source Config** - from source YAML (including `default_sync`)
70
+ 3. **Group Config** - optional grouping within source
71
+ 4. **Resource Config** - per-table overrides
vd_dlt-0.1.0/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # vd-dlt
2
+
3
+ Core DLT ingestion framework for VibeData pipelines. Provides config resolution, credential management (Azure Key Vault), pipeline execution, and observability.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ # Core only
9
+ pip install vd-dlt
10
+
11
+ # With pipeline dependencies (dlt, pyarrow)
12
+ pip install vd-dlt[pipeline]
13
+
14
+ # With Notion connector
15
+ pip install vd-dlt[notion]
16
+
17
+ # With Notion schema (defaults, docs)
18
+ pip install vd-dlt[notion-schema]
19
+
20
+ # Everything for Notion
21
+ pip install vd-dlt[pipeline,notion,notion-schema]
22
+ ```
23
+
24
+ ## Quick Start
25
+
26
+ ```python
27
+ from vd_dlt import PipelineRunner
28
+
29
+ runner = PipelineRunner(
30
+ vault_url="https://my-vault.vault.azure.net/",
31
+ )
32
+ result = runner.run("my_source_name")
33
+ print(f"Loaded {result.total_rows_loaded} rows")
34
+ ```
35
+
36
+ ## Architecture
37
+
38
+ The framework uses a 4-level config hierarchy (most specific wins):
39
+
40
+ 1. **Connector Defaults** - from connector schema package
41
+ 2. **Source Config** - from source YAML (including `default_sync`)
42
+ 3. **Group Config** - optional grouping within source
43
+ 4. **Resource Config** - per-table overrides
@@ -0,0 +1,47 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "vd-dlt"
7
+ version = "0.1.0"
8
+ description = "Core DLT ingestion framework for VibeData pipelines"
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = "MIT"
12
+ authors = [
13
+ { name = "VibeData", email = "info@vibedata.dev" },
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 4 - Beta",
17
+ "Intended Audience :: Developers",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.9",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Database",
24
+ "Topic :: Software Development :: Libraries",
25
+ ]
26
+ dependencies = [
27
+ "pyyaml>=6.0",
28
+ ]
29
+
30
+ [project.optional-dependencies]
31
+ pipeline = [
32
+ "dlt[filesystem,deltalake]",
33
+ "pyarrow>=17.0.0",
34
+ ]
35
+ notion = [
36
+ "vd-dlt-notion",
37
+ ]
38
+ notion-schema = [
39
+ "vd-dlt-notion-schema",
40
+ ]
41
+
42
+ [project.urls]
43
+ Homepage = "https://github.com/accelerate-data/vd-dlt-connectors"
44
+ Repository = "https://github.com/accelerate-data/vd-dlt-connectors"
45
+
46
+ [tool.hatch.build.targets.wheel]
47
+ packages = ["src/vd_dlt"]
@@ -0,0 +1,33 @@
1
+ """
2
+ vd-dlt: Core DLT Ingestion Framework
3
+
4
+ Provides:
5
+ - Config resolution (4-level hierarchy)
6
+ - Credential resolution (Key Vault integration)
7
+ - Pipeline execution
8
+ - Observability (logging, sync history)
9
+ - Schema validation
10
+ """
11
+
12
+ __version__ = "0.1.0"
13
+
14
+ from .config_resolver import ConfigResolver, resolve_resource_config
15
+ from .credential_resolver import CredentialResolver, resolve_credentials
16
+ from .pipeline_runner import PipelineRunner, run_source
17
+ from .observability import setup_logging, SyncHistoryTracker, SyncRecord, ResourceResult
18
+ from .schema_validator import ValidationError, validate_all
19
+
20
+ __all__ = [
21
+ "ConfigResolver",
22
+ "resolve_resource_config",
23
+ "CredentialResolver",
24
+ "resolve_credentials",
25
+ "PipelineRunner",
26
+ "run_source",
27
+ "setup_logging",
28
+ "SyncHistoryTracker",
29
+ "SyncRecord",
30
+ "ResourceResult",
31
+ "ValidationError",
32
+ "validate_all",
33
+ ]
@@ -0,0 +1,238 @@
1
+ """
2
+ Config Resolution Engine - 4-Level Hierarchy
3
+
4
+ Resolution order (most specific wins):
5
+ 1. Connector Defaults - connectors/{name}/defaults.yml
6
+ 2. Source - sources/{source_name}.yml (including default_sync)
7
+ 3. Group - Optional grouping within source config
8
+ 4. Resource - Per-table overrides in source config
9
+
10
+ Priority: Resource > Group > Source > Connector Defaults
11
+ """
12
+
13
+ import os
14
+ from typing import Any, Dict, List, Optional
15
+ from .utils import (
16
+ deep_merge,
17
+ load_yaml,
18
+ get_connectors_path,
19
+ get_configs_path,
20
+ file_exists,
21
+ )
22
+
23
+
24
+ class ConfigResolver:
25
+ """
26
+ Resolves configuration from the 4-level hierarchy.
27
+
28
+ Usage:
29
+ resolver = ConfigResolver()
30
+ source_config = resolver.load_source_config("notion_wiki")
31
+ resource_config = resolver.resolve_resource_config(source_config, "database_rows")
32
+ """
33
+
34
+ def __init__(self, base_path: Optional[str] = None):
35
+ """
36
+ Initialize config resolver.
37
+
38
+ Args:
39
+ base_path: Base path for configs (default: from environment)
40
+ """
41
+ if base_path:
42
+ self.connectors_path = f"{base_path}/connectors"
43
+ self.configs_path = f"{base_path}/configs"
44
+ else:
45
+ self.connectors_path = get_connectors_path()
46
+ self.configs_path = get_configs_path()
47
+
48
+ self._connector_cache: Dict[str, Dict] = {}
49
+
50
+ def load_connector_defaults(self, connector_name: str) -> Dict[str, Any]:
51
+ """
52
+ Load connector defaults (Level 1).
53
+
54
+ Args:
55
+ connector_name: Name of the connector (e.g., "notion")
56
+
57
+ Returns:
58
+ Connector defaults dictionary
59
+ """
60
+ if connector_name in self._connector_cache:
61
+ return self._connector_cache[connector_name]
62
+
63
+ defaults_path = f"{self.connectors_path}/{connector_name}/defaults.yml"
64
+ if file_exists(defaults_path):
65
+ defaults = load_yaml(defaults_path)
66
+ else:
67
+ defaults = {}
68
+
69
+ self._connector_cache[connector_name] = defaults
70
+ return defaults
71
+
72
+ def load_connector_manifest(self, connector_name: str) -> Dict[str, Any]:
73
+ """
74
+ Load connector manifest (metadata).
75
+
76
+ Args:
77
+ connector_name: Name of the connector
78
+
79
+ Returns:
80
+ Connector manifest dictionary
81
+ """
82
+ manifest_path = f"{self.connectors_path}/{connector_name}/manifest.yml"
83
+ if file_exists(manifest_path):
84
+ return load_yaml(manifest_path)
85
+ return {}
86
+
87
+ def load_source_config(self, source_name: str) -> Dict[str, Any]:
88
+ """
89
+ Load source config (Level 2).
90
+
91
+ Args:
92
+ source_name: Name of the source (e.g., "notion_wiki")
93
+
94
+ Returns:
95
+ Source config dictionary
96
+
97
+ Raises:
98
+ FileNotFoundError: If source config doesn't exist
99
+ """
100
+ source_path = f"{self.configs_path}/sources/{source_name}.yml"
101
+ if not file_exists(source_path):
102
+ raise FileNotFoundError(f"Source config not found: {source_path}")
103
+
104
+ return load_yaml(source_path)
105
+
106
+ def get_group_for_resource(
107
+ self, source_config: Dict[str, Any], resource_name: str
108
+ ) -> Optional[Dict[str, Any]]:
109
+ """
110
+ Find the group config for a resource (Level 3).
111
+
112
+ Args:
113
+ source_config: Source configuration
114
+ resource_name: Name of the resource
115
+
116
+ Returns:
117
+ Group config if resource belongs to a group, None otherwise
118
+ """
119
+ groups = source_config.get("groups", {})
120
+ for group_name, group_config in groups.items():
121
+ if resource_name in group_config.get("resources", []):
122
+ # Return group settings without the 'resources' list
123
+ return {k: v for k, v in group_config.items() if k != "resources"}
124
+ return None
125
+
126
+ def resolve_resource_config(
127
+ self, source_config: Dict[str, Any], resource_name: str
128
+ ) -> Dict[str, Any]:
129
+ """
130
+ Resolve effective config for a resource.
131
+
132
+ Applies the 4-level hierarchy:
133
+ Priority: Resource > Group > Source > Connector Defaults
134
+
135
+ Args:
136
+ source_config: Source configuration (from load_source_config)
137
+ resource_name: Name of the resource to resolve
138
+
139
+ Returns:
140
+ Fully resolved configuration for the resource
141
+ """
142
+ connector_name = source_config.get("connector", "rest_api_generic")
143
+
144
+ # Level 1: Start with connector defaults
145
+ connector_defaults = self.load_connector_defaults(connector_name)
146
+ effective = deep_merge({}, connector_defaults.get("defaults", {}))
147
+
148
+ # Apply resource-specific connector defaults if exists
149
+ resource_connector_defaults = connector_defaults.get("resource_defaults", {}).get(resource_name, {})
150
+ effective = deep_merge(effective, resource_connector_defaults)
151
+
152
+ # Level 2: Apply source-level config (including default_sync)
153
+ source_level = {
154
+ k: v for k, v in source_config.items()
155
+ if k not in ["groups", "resources", "source_name", "connector",
156
+ "connector_version", "secrets"]
157
+ }
158
+ effective = deep_merge(effective, source_level)
159
+
160
+ # Level 3: Apply group config (if resource belongs to a group)
161
+ group_config = self.get_group_for_resource(source_config, resource_name)
162
+ if group_config:
163
+ effective = deep_merge(effective, group_config)
164
+
165
+ # Level 4: Apply resource-specific overrides
166
+ # Handle both list format (new) and dict format (legacy)
167
+ resources = source_config.get("resources", [])
168
+ resource_config = {}
169
+ if isinstance(resources, list):
170
+ for res in resources:
171
+ if res.get("name") == resource_name:
172
+ resource_config = {k: v for k, v in res.items() if k != "name"}
173
+ break
174
+ elif isinstance(resources, dict):
175
+ resource_config = resources.get(resource_name, {})
176
+ effective = deep_merge(effective, resource_config)
177
+
178
+ # Add metadata
179
+ effective["_resource_name"] = resource_name
180
+ effective["_source_name"] = source_config.get("source_name", "unknown")
181
+ effective["_connector"] = connector_name
182
+
183
+ return effective
184
+
185
+ def get_enabled_resources(self, source_config: Dict[str, Any]) -> List[str]:
186
+ """
187
+ Get list of enabled resources from source config.
188
+
189
+ Supports both list format (new) and dict format (legacy):
190
+ - List: [{"name": "projects", "enabled": true}, ...]
191
+ - Dict: {"projects": {"enabled": true}, ...}
192
+
193
+ Args:
194
+ source_config: Source configuration
195
+
196
+ Returns:
197
+ List of enabled resource names
198
+ """
199
+ resources = source_config.get("resources", [])
200
+ enabled = []
201
+
202
+ if isinstance(resources, list):
203
+ # New list format
204
+ for resource in resources:
205
+ if resource.get("enabled", True):
206
+ name = resource.get("name")
207
+ if name:
208
+ enabled.append(name)
209
+ elif isinstance(resources, dict):
210
+ # Legacy dict format
211
+ for name, config in resources.items():
212
+ if isinstance(config, dict) and config.get("enabled", True):
213
+ enabled.append(name)
214
+ elif config is None or config == {}:
215
+ enabled.append(name)
216
+
217
+ return enabled
218
+
219
+
220
+ def resolve_resource_config(
221
+ source_name: str,
222
+ resource_name: str,
223
+ base_path: Optional[str] = None,
224
+ ) -> Dict[str, Any]:
225
+ """
226
+ Convenience function to resolve resource config.
227
+
228
+ Args:
229
+ source_name: Name of the source
230
+ resource_name: Name of the resource
231
+ base_path: Optional base path override
232
+
233
+ Returns:
234
+ Resolved resource configuration
235
+ """
236
+ resolver = ConfigResolver(base_path)
237
+ source_config = resolver.load_source_config(source_name)
238
+ return resolver.resolve_resource_config(source_config, resource_name)