sqlmesh-dag-generator 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sqlmesh_dag_generator/__init__.py +14 -0
- sqlmesh_dag_generator/cli.py +202 -0
- sqlmesh_dag_generator/config.py +116 -0
- sqlmesh_dag_generator/dag_builder.py +449 -0
- sqlmesh_dag_generator/generator.py +421 -0
- sqlmesh_dag_generator/models.py +135 -0
- sqlmesh_dag_generator/utils.py +170 -0
- sqlmesh_dag_generator-0.1.0.dist-info/METADATA +304 -0
- sqlmesh_dag_generator-0.1.0.dist-info/RECORD +13 -0
- sqlmesh_dag_generator-0.1.0.dist-info/WHEEL +5 -0
- sqlmesh_dag_generator-0.1.0.dist-info/entry_points.txt +2 -0
- sqlmesh_dag_generator-0.1.0.dist-info/licenses/LICENSE +22 -0
- sqlmesh_dag_generator-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SQLMesh DAG Generator - Open Source Airflow Integration for SQLMesh
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
__version__ = "0.1.0"
|
|
6
|
+
|
|
7
|
+
from sqlmesh_dag_generator.generator import SQLMeshDAGGenerator
|
|
8
|
+
from sqlmesh_dag_generator.config import DAGGeneratorConfig
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"SQLMeshDAGGenerator",
|
|
12
|
+
"DAGGeneratorConfig",
|
|
13
|
+
]
|
|
14
|
+
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line interface for SQLMesh DAG Generator
|
|
3
|
+
"""
|
|
4
|
+
import argparse
|
|
5
|
+
import logging
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from sqlmesh_dag_generator import SQLMeshDAGGenerator
|
|
10
|
+
from sqlmesh_dag_generator.config import DAGGeneratorConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def setup_logging(verbose: bool = False):
|
|
14
|
+
"""Setup logging configuration"""
|
|
15
|
+
level = logging.DEBUG if verbose else logging.INFO
|
|
16
|
+
logging.basicConfig(
|
|
17
|
+
level=level,
|
|
18
|
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def main():
|
|
23
|
+
"""Main CLI entry point"""
|
|
24
|
+
parser = argparse.ArgumentParser(
|
|
25
|
+
description="Generate Airflow DAGs from SQLMesh projects",
|
|
26
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
27
|
+
epilog="""
|
|
28
|
+
Examples:
|
|
29
|
+
# Generate DAG from SQLMesh project
|
|
30
|
+
sqlmesh-dag-gen --project-path /path/to/sqlmesh --dag-id my_dag
|
|
31
|
+
|
|
32
|
+
# Generate with custom output directory
|
|
33
|
+
sqlmesh-dag-gen --project-path /path/to/sqlmesh --output-dir /path/to/dags
|
|
34
|
+
|
|
35
|
+
# Use configuration file
|
|
36
|
+
sqlmesh-dag-gen --config config.yaml
|
|
37
|
+
|
|
38
|
+
# Validate without generating
|
|
39
|
+
sqlmesh-dag-gen --project-path /path/to/sqlmesh --validate-only
|
|
40
|
+
"""
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Configuration source
|
|
44
|
+
config_group = parser.add_mutually_exclusive_group()
|
|
45
|
+
config_group.add_argument(
|
|
46
|
+
'--config', '-c',
|
|
47
|
+
help='Path to configuration YAML file'
|
|
48
|
+
)
|
|
49
|
+
config_group.add_argument(
|
|
50
|
+
'--project-path', '-p',
|
|
51
|
+
help='Path to SQLMesh project'
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# SQLMesh options
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
'--environment', '-e',
|
|
57
|
+
default='prod',
|
|
58
|
+
help='SQLMesh environment (default: prod)'
|
|
59
|
+
)
|
|
60
|
+
parser.add_argument(
|
|
61
|
+
'--gateway', '-g',
|
|
62
|
+
help='SQLMesh gateway name'
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Airflow DAG options
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
'--dag-id',
|
|
68
|
+
help='Airflow DAG ID'
|
|
69
|
+
)
|
|
70
|
+
parser.add_argument(
|
|
71
|
+
'--schedule',
|
|
72
|
+
help='Airflow schedule interval (cron expression or preset)'
|
|
73
|
+
)
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
'--tags',
|
|
76
|
+
nargs='+',
|
|
77
|
+
default=['sqlmesh'],
|
|
78
|
+
help='Airflow DAG tags'
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Generation options
|
|
82
|
+
parser.add_argument(
|
|
83
|
+
'--output-dir', '-o',
|
|
84
|
+
default='./dags',
|
|
85
|
+
help='Output directory for generated DAG files (default: ./dags)'
|
|
86
|
+
)
|
|
87
|
+
parser.add_argument(
|
|
88
|
+
'--operator-type',
|
|
89
|
+
choices=['python', 'bash', 'kubernetes'],
|
|
90
|
+
default='python',
|
|
91
|
+
help='Airflow operator type to use (default: python)'
|
|
92
|
+
)
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
'--include-models',
|
|
95
|
+
nargs='+',
|
|
96
|
+
help='Only include these models'
|
|
97
|
+
)
|
|
98
|
+
parser.add_argument(
|
|
99
|
+
'--exclude-models',
|
|
100
|
+
nargs='+',
|
|
101
|
+
help='Exclude these models'
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Actions
|
|
105
|
+
parser.add_argument(
|
|
106
|
+
'--validate-only',
|
|
107
|
+
action='store_true',
|
|
108
|
+
help='Only validate, do not generate DAG'
|
|
109
|
+
)
|
|
110
|
+
parser.add_argument(
|
|
111
|
+
'--dry-run',
|
|
112
|
+
action='store_true',
|
|
113
|
+
help='Generate DAG code but do not write to file'
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Other options
|
|
117
|
+
parser.add_argument(
|
|
118
|
+
'--verbose', '-v',
|
|
119
|
+
action='store_true',
|
|
120
|
+
help='Enable verbose logging'
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
args = parser.parse_args()
|
|
124
|
+
|
|
125
|
+
# Setup logging
|
|
126
|
+
setup_logging(args.verbose)
|
|
127
|
+
logger = logging.getLogger(__name__)
|
|
128
|
+
|
|
129
|
+
try:
|
|
130
|
+
# Load or build configuration
|
|
131
|
+
if args.config:
|
|
132
|
+
logger.info(f"Loading configuration from: {args.config}")
|
|
133
|
+
config = DAGGeneratorConfig.from_file(args.config)
|
|
134
|
+
else:
|
|
135
|
+
if not args.project_path:
|
|
136
|
+
parser.error("Either --config or --project-path must be provided")
|
|
137
|
+
|
|
138
|
+
if not args.dag_id:
|
|
139
|
+
# Generate DAG ID from project path
|
|
140
|
+
project_name = Path(args.project_path).name
|
|
141
|
+
args.dag_id = f"sqlmesh_{project_name}"
|
|
142
|
+
|
|
143
|
+
logger.info("Building configuration from command-line arguments")
|
|
144
|
+
config = DAGGeneratorConfig.from_dict({
|
|
145
|
+
"sqlmesh": {
|
|
146
|
+
"project_path": args.project_path,
|
|
147
|
+
"environment": args.environment,
|
|
148
|
+
"gateway": args.gateway,
|
|
149
|
+
},
|
|
150
|
+
"airflow": {
|
|
151
|
+
"dag_id": args.dag_id,
|
|
152
|
+
"schedule_interval": args.schedule,
|
|
153
|
+
"tags": args.tags,
|
|
154
|
+
},
|
|
155
|
+
"generation": {
|
|
156
|
+
"output_dir": args.output_dir,
|
|
157
|
+
"operator_type": args.operator_type,
|
|
158
|
+
"include_models": args.include_models,
|
|
159
|
+
"exclude_models": args.exclude_models,
|
|
160
|
+
"dry_run": args.dry_run,
|
|
161
|
+
},
|
|
162
|
+
})
|
|
163
|
+
|
|
164
|
+
# Create generator
|
|
165
|
+
generator = SQLMeshDAGGenerator(config=config)
|
|
166
|
+
|
|
167
|
+
# Validate
|
|
168
|
+
logger.info("Validating SQLMesh project...")
|
|
169
|
+
if not generator.validate():
|
|
170
|
+
logger.error("Validation failed")
|
|
171
|
+
sys.exit(1)
|
|
172
|
+
|
|
173
|
+
logger.info("✓ Validation passed")
|
|
174
|
+
|
|
175
|
+
if args.validate_only:
|
|
176
|
+
logger.info("Validation complete (--validate-only specified)")
|
|
177
|
+
sys.exit(0)
|
|
178
|
+
|
|
179
|
+
# Generate DAG
|
|
180
|
+
logger.info(f"Generating Airflow DAG: {config.airflow.dag_id}")
|
|
181
|
+
dag_code = generator.generate_dag()
|
|
182
|
+
|
|
183
|
+
if args.dry_run:
|
|
184
|
+
logger.info("=" * 60)
|
|
185
|
+
logger.info("Generated DAG (dry-run mode):")
|
|
186
|
+
logger.info("=" * 60)
|
|
187
|
+
print(dag_code)
|
|
188
|
+
else:
|
|
189
|
+
output_path = generator._get_output_path()
|
|
190
|
+
logger.info(f"✓ DAG file generated: {output_path}")
|
|
191
|
+
|
|
192
|
+
logger.info("Success!")
|
|
193
|
+
sys.exit(0)
|
|
194
|
+
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.error(f"Error: {e}", exc_info=args.verbose)
|
|
197
|
+
sys.exit(1)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
if __name__ == "__main__":
|
|
201
|
+
main()
|
|
202
|
+
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration module for SQLMesh DAG Generator
|
|
3
|
+
"""
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from typing import Dict, List, Optional, Any
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
import yaml
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SQLMeshConfig:
|
|
12
|
+
"""SQLMesh project configuration"""
|
|
13
|
+
project_path: str
|
|
14
|
+
environment: str = "prod"
|
|
15
|
+
gateway: Optional[str] = None
|
|
16
|
+
config_path: Optional[str] = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class AirflowConfig:
|
|
21
|
+
"""Airflow DAG configuration"""
|
|
22
|
+
dag_id: str
|
|
23
|
+
schedule_interval: Optional[str] = None
|
|
24
|
+
default_args: Dict[str, Any] = field(default_factory=dict)
|
|
25
|
+
tags: List[str] = field(default_factory=list)
|
|
26
|
+
catchup: bool = False
|
|
27
|
+
max_active_runs: int = 1
|
|
28
|
+
description: Optional[str] = None
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class GenerationConfig:
|
|
33
|
+
"""DAG generation settings"""
|
|
34
|
+
output_dir: str = "./dags"
|
|
35
|
+
mode: str = "dynamic" # "static" or "dynamic" - dynamic is default (fire & forget!)
|
|
36
|
+
operator_type: str = "python" # python, bash, or kubernetes
|
|
37
|
+
include_tests: bool = False
|
|
38
|
+
parallel_tasks: bool = True
|
|
39
|
+
max_parallel_tasks: Optional[int] = None
|
|
40
|
+
include_models: Optional[List[str]] = None
|
|
41
|
+
exclude_models: Optional[List[str]] = None
|
|
42
|
+
model_pattern: Optional[str] = None
|
|
43
|
+
dry_run: bool = False
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class DAGGeneratorConfig:
|
|
48
|
+
"""Complete configuration for DAG generator"""
|
|
49
|
+
sqlmesh: SQLMeshConfig
|
|
50
|
+
airflow: AirflowConfig
|
|
51
|
+
generation: GenerationConfig = field(default_factory=GenerationConfig)
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def from_file(cls, config_path: str) -> "DAGGeneratorConfig":
|
|
55
|
+
"""Load configuration from YAML file"""
|
|
56
|
+
config_file = Path(config_path)
|
|
57
|
+
if not config_file.exists():
|
|
58
|
+
raise FileNotFoundError(f"Configuration file not found: {config_path}")
|
|
59
|
+
|
|
60
|
+
with open(config_file, "r") as f:
|
|
61
|
+
config_data = yaml.safe_load(f)
|
|
62
|
+
|
|
63
|
+
return cls(
|
|
64
|
+
sqlmesh=SQLMeshConfig(**config_data.get("sqlmesh", {})),
|
|
65
|
+
airflow=AirflowConfig(**config_data.get("airflow", {})),
|
|
66
|
+
generation=GenerationConfig(**config_data.get("generation", {})),
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def from_dict(cls, config_dict: Dict[str, Any]) -> "DAGGeneratorConfig":
|
|
71
|
+
"""Load configuration from dictionary"""
|
|
72
|
+
return cls(
|
|
73
|
+
sqlmesh=SQLMeshConfig(**config_dict.get("sqlmesh", {})),
|
|
74
|
+
airflow=AirflowConfig(**config_dict.get("airflow", {})),
|
|
75
|
+
generation=GenerationConfig(**config_dict.get("generation", {})),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
79
|
+
"""Convert configuration to dictionary"""
|
|
80
|
+
return {
|
|
81
|
+
"sqlmesh": {
|
|
82
|
+
"project_path": self.sqlmesh.project_path,
|
|
83
|
+
"environment": self.sqlmesh.environment,
|
|
84
|
+
"gateway": self.sqlmesh.gateway,
|
|
85
|
+
"config_path": self.sqlmesh.config_path,
|
|
86
|
+
},
|
|
87
|
+
"airflow": {
|
|
88
|
+
"dag_id": self.airflow.dag_id,
|
|
89
|
+
"schedule_interval": self.airflow.schedule_interval,
|
|
90
|
+
"default_args": self.airflow.default_args,
|
|
91
|
+
"tags": self.airflow.tags,
|
|
92
|
+
"catchup": self.airflow.catchup,
|
|
93
|
+
"max_active_runs": self.airflow.max_active_runs,
|
|
94
|
+
"description": self.airflow.description,
|
|
95
|
+
},
|
|
96
|
+
"generation": {
|
|
97
|
+
"output_dir": self.generation.output_dir,
|
|
98
|
+
"operator_type": self.generation.operator_type,
|
|
99
|
+
"include_tests": self.generation.include_tests,
|
|
100
|
+
"parallel_tasks": self.generation.parallel_tasks,
|
|
101
|
+
"max_parallel_tasks": self.generation.max_parallel_tasks,
|
|
102
|
+
"include_models": self.generation.include_models,
|
|
103
|
+
"exclude_models": self.generation.exclude_models,
|
|
104
|
+
"model_pattern": self.generation.model_pattern,
|
|
105
|
+
"dry_run": self.generation.dry_run,
|
|
106
|
+
},
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
def save(self, output_path: str) -> None:
|
|
110
|
+
"""Save configuration to YAML file"""
|
|
111
|
+
output_file = Path(output_path)
|
|
112
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
113
|
+
|
|
114
|
+
with open(output_file, "w") as f:
|
|
115
|
+
yaml.dump(self.to_dict(), f, default_flow_style=False, sort_keys=False)
|
|
116
|
+
|