trellis-datamodel 0.3.3__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- trellis_datamodel/adapters/base.py +5 -1
- trellis_datamodel/adapters/dbt_core.py +79 -8
- trellis_datamodel/config.py +43 -1
- trellis_datamodel/routes/__init__.py +2 -0
- trellis_datamodel/routes/lineage.py +60 -0
- trellis_datamodel/routes/manifest.py +6 -0
- trellis_datamodel/server.py +2 -1
- trellis_datamodel/services/__init__.py +2 -0
- trellis_datamodel/services/lineage.py +427 -0
- trellis_datamodel/static/_app/immutable/assets/0.DRr1NRor.css +1 -0
- trellis_datamodel/static/_app/immutable/chunks/{CXDUumOQ.js → COKQndWa.js} +1 -1
- trellis_datamodel/static/_app/immutable/entry/{app.abCkWeAJ.js → app.BA1wC-Z2.js} +2 -2
- trellis_datamodel/static/_app/immutable/entry/start.Cq8bDFFs.js +1 -0
- trellis_datamodel/static/_app/immutable/nodes/{1.J_r941Qf.js → 1.CITYWtIe.js} +1 -1
- trellis_datamodel/static/_app/immutable/nodes/2.DBgiABuH.js +27 -0
- trellis_datamodel/static/_app/version.json +1 -1
- trellis_datamodel/static/index.html +6 -6
- trellis_datamodel/tests/test_dbt_schema.py +260 -4
- trellis_datamodel/tests/test_yaml_handler.py +228 -0
- trellis_datamodel/utils/yaml_handler.py +131 -20
- {trellis_datamodel-0.3.3.dist-info → trellis_datamodel-0.4.0.dist-info}/METADATA +8 -4
- {trellis_datamodel-0.3.3.dist-info → trellis_datamodel-0.4.0.dist-info}/RECORD +28 -25
- trellis_datamodel/static/_app/immutable/assets/0.ByDwyx3a.css +0 -1
- trellis_datamodel/static/_app/immutable/entry/start.B7CjH6Z7.js +0 -1
- trellis_datamodel/static/_app/immutable/nodes/2.WqbMkq6o.js +0 -27
- /trellis_datamodel/static/_app/immutable/nodes/{0.bFI_DI3G.js → 0.CXLfbIn-.js} +0 -0
- {trellis_datamodel-0.3.3.dist-info → trellis_datamodel-0.4.0.dist-info}/WHEEL +0 -0
- {trellis_datamodel-0.3.3.dist-info → trellis_datamodel-0.4.0.dist-info}/entry_points.txt +0 -0
- {trellis_datamodel-0.3.3.dist-info → trellis_datamodel-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {trellis_datamodel-0.3.3.dist-info → trellis_datamodel-0.4.0.dist-info}/licenses/NOTICE +0 -0
- {trellis_datamodel-0.3.3.dist-info → trellis_datamodel-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -50,7 +50,7 @@ class ModelSchema(TypedDict, total=False):
|
|
|
50
50
|
file_path: str
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
class Relationship(TypedDict):
|
|
53
|
+
class Relationship(TypedDict, total=False):
|
|
54
54
|
"""Relationship inferred from framework metadata."""
|
|
55
55
|
|
|
56
56
|
source: str
|
|
@@ -59,6 +59,10 @@ class Relationship(TypedDict):
|
|
|
59
59
|
type: str # e.g., "one_to_many"
|
|
60
60
|
source_field: str
|
|
61
61
|
target_field: str
|
|
62
|
+
source_model_name: str # name of the source model (e.g., "employee")
|
|
63
|
+
source_model_version: Optional[int] # version of the source model if versioned
|
|
64
|
+
target_model_name: str # name of the target model (e.g., "employee_history")
|
|
65
|
+
target_model_version: Optional[int] # version of the target model if versioned
|
|
62
66
|
|
|
63
67
|
|
|
64
68
|
class TransformationAdapter(Protocol):
|
|
@@ -670,16 +670,32 @@ class DbtCoreAdapter:
|
|
|
670
670
|
if not to_ref or not target_field:
|
|
671
671
|
continue
|
|
672
672
|
|
|
673
|
-
target_base,
|
|
673
|
+
target_base, target_version_str = self._parse_ref(
|
|
674
674
|
to_ref
|
|
675
675
|
)
|
|
676
676
|
|
|
677
|
+
# Convert version string to int if present
|
|
678
|
+
target_version_int = None
|
|
679
|
+
if target_version_str:
|
|
680
|
+
try:
|
|
681
|
+
target_version_int = int(target_version_str)
|
|
682
|
+
except ValueError:
|
|
683
|
+
pass
|
|
684
|
+
|
|
685
|
+
# Convert model_version to int if present
|
|
686
|
+
source_version_int = None
|
|
687
|
+
if model_version is not None:
|
|
688
|
+
try:
|
|
689
|
+
source_version_int = int(model_version)
|
|
690
|
+
except (ValueError, TypeError):
|
|
691
|
+
pass
|
|
692
|
+
|
|
677
693
|
# When include_unbound, use raw model name
|
|
678
694
|
if include_unbound:
|
|
679
695
|
target_entity_id = target_base
|
|
680
696
|
else:
|
|
681
697
|
target_entity_id = self._resolve_entity_id(
|
|
682
|
-
model_to_entity, target_base,
|
|
698
|
+
model_to_entity, target_base, target_version_str
|
|
683
699
|
)
|
|
684
700
|
|
|
685
701
|
# Skip relationships where either side is not bound
|
|
@@ -697,6 +713,10 @@ class DbtCoreAdapter:
|
|
|
697
713
|
"type": "one_to_many",
|
|
698
714
|
"source_field": target_field,
|
|
699
715
|
"target_field": column.get("name"),
|
|
716
|
+
"source_model_name": target_base,
|
|
717
|
+
"source_model_version": target_version_int,
|
|
718
|
+
"target_model_name": base_model_name,
|
|
719
|
+
"target_model_version": source_version_int,
|
|
700
720
|
}
|
|
701
721
|
)
|
|
702
722
|
except Exception as e:
|
|
@@ -737,8 +757,13 @@ class DbtCoreAdapter:
|
|
|
737
757
|
eid: self._entity_to_model_name(ent) for eid, ent in entity_map.items()
|
|
738
758
|
}
|
|
739
759
|
|
|
740
|
-
# Group relationships by
|
|
760
|
+
# Group relationships by entity (the one with the FK)
|
|
761
|
+
# FK is always on the "many" side of the relationship
|
|
741
762
|
fk_by_entity: dict[str, list[dict]] = {}
|
|
763
|
+
|
|
764
|
+
# Track all fields that appear in ANY relationship in the data model
|
|
765
|
+
# This helps us identify which relationship tests are managed by us vs manually added
|
|
766
|
+
all_relationship_fields_by_entity: dict[str, set[str]] = {}
|
|
742
767
|
|
|
743
768
|
for rel in relationships:
|
|
744
769
|
source_id = rel.get("source")
|
|
@@ -750,11 +775,33 @@ class DbtCoreAdapter:
|
|
|
750
775
|
if not source_field or not target_field:
|
|
751
776
|
continue
|
|
752
777
|
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
778
|
+
# Determine which side has the "many" cardinality (where FK should be)
|
|
779
|
+
# Relationship types ending in "_to_many" or starting with "many_to_" have FK on target/source respectively
|
|
780
|
+
# For one_to_one, FK is typically on source (FK holder → referenced table per spec)
|
|
781
|
+
if rel_type in ("one_to_many", "one_to_zero_or_many", "zero_or_one_to_many", "zero_or_many_to_many"):
|
|
782
|
+
# FK on target (target is the "many" side)
|
|
783
|
+
fk_entity = target_id
|
|
784
|
+
fk_field = target_field
|
|
785
|
+
ref_entity = source_id
|
|
786
|
+
ref_field = source_field
|
|
787
|
+
elif rel_type in ("many_to_one", "many_to_many", "zero_or_many_to_one"):
|
|
788
|
+
# FK on source (source is the "many" side)
|
|
789
|
+
fk_entity = source_id
|
|
790
|
+
fk_field = source_field
|
|
791
|
+
ref_entity = target_id
|
|
792
|
+
ref_field = target_field
|
|
793
|
+
elif rel_type == "one_to_one":
|
|
794
|
+
# For one_to_one, FK is on source (FK holder → referenced table per spec)
|
|
795
|
+
fk_entity = source_id
|
|
796
|
+
fk_field = source_field
|
|
797
|
+
ref_entity = target_id
|
|
798
|
+
ref_field = target_field
|
|
799
|
+
else:
|
|
800
|
+
# Fallback: assume FK on target (default behavior)
|
|
801
|
+
fk_entity = target_id
|
|
802
|
+
fk_field = target_field
|
|
803
|
+
ref_entity = source_id
|
|
804
|
+
ref_field = source_field
|
|
758
805
|
|
|
759
806
|
fk_by_entity.setdefault(fk_entity, []).append(
|
|
760
807
|
{
|
|
@@ -763,6 +810,10 @@ class DbtCoreAdapter:
|
|
|
763
810
|
"ref_field": ref_field,
|
|
764
811
|
}
|
|
765
812
|
)
|
|
813
|
+
|
|
814
|
+
# Track which fields are involved in relationships
|
|
815
|
+
all_relationship_fields_by_entity.setdefault(source_id, set()).add(source_field)
|
|
816
|
+
all_relationship_fields_by_entity.setdefault(target_id, set()).add(target_field)
|
|
766
817
|
|
|
767
818
|
models_dir = self.get_model_dirs()[0]
|
|
768
819
|
os.makedirs(models_dir, exist_ok=True)
|
|
@@ -816,7 +867,27 @@ class DbtCoreAdapter:
|
|
|
816
867
|
)
|
|
817
868
|
|
|
818
869
|
# Sync Relationships (FKs)
|
|
870
|
+
# Build a map of which fields should have which relationship tests
|
|
819
871
|
fk_list = fk_by_entity.get(entity_id, [])
|
|
872
|
+
fk_fields = {fk_info["fk_field"] for fk_info in fk_list}
|
|
873
|
+
|
|
874
|
+
# Get all fields that appear in relationships for this entity
|
|
875
|
+
relationship_fields = all_relationship_fields_by_entity.get(entity_id, set())
|
|
876
|
+
|
|
877
|
+
# Clean up: Remove relationship tests from fields that:
|
|
878
|
+
# 1. Are in a relationship in the data model (relationship_fields)
|
|
879
|
+
# 2. But are NOT currently FKs (not in fk_fields)
|
|
880
|
+
# This removes tests when relationships are moved or type changes
|
|
881
|
+
# But preserves manually added tests (not in relationship_fields)
|
|
882
|
+
if "columns" in model_entry:
|
|
883
|
+
for col in model_entry.get("columns", []):
|
|
884
|
+
col_name = col.get("name")
|
|
885
|
+
if col_name and col_name in relationship_fields and col_name not in fk_fields:
|
|
886
|
+
# This field was in a relationship but is no longer an FK
|
|
887
|
+
# Remove its relationship test
|
|
888
|
+
self.yaml_handler.remove_relationship_test(col)
|
|
889
|
+
|
|
890
|
+
# Now add/update relationship tests for current FKs
|
|
820
891
|
for fk_info in fk_list:
|
|
821
892
|
fk_field = fk_info["fk_field"]
|
|
822
893
|
ref_entity = fk_info["ref_entity"]
|
trellis_datamodel/config.py
CHANGED
|
@@ -10,10 +10,24 @@ import os
|
|
|
10
10
|
import yaml
|
|
11
11
|
from pathlib import Path
|
|
12
12
|
from typing import Optional
|
|
13
|
+
from dataclasses import dataclass, field
|
|
13
14
|
|
|
14
15
|
# Check for test mode - allows overriding config via environment
|
|
15
16
|
_TEST_DIR = os.environ.get("DATAMODEL_TEST_DIR", "")
|
|
16
17
|
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class GuidanceConfig:
|
|
21
|
+
"""Configuration for entity creation guidance features."""
|
|
22
|
+
entity_wizard_enabled: bool = True
|
|
23
|
+
push_warning_enabled: bool = True
|
|
24
|
+
min_description_length: int = 10
|
|
25
|
+
disabled_guidance: list[str] = field(default_factory=list)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Global guidance configuration (set by load_config)
|
|
29
|
+
GUIDANCE_CONFIG: GuidanceConfig = GuidanceConfig()
|
|
30
|
+
|
|
17
31
|
if _TEST_DIR:
|
|
18
32
|
# Test mode: use temp directory paths
|
|
19
33
|
CONFIG_PATH = os.path.join(_TEST_DIR, "config.yml")
|
|
@@ -38,6 +52,8 @@ if _TEST_DIR:
|
|
|
38
52
|
DBT_MODEL_PATHS: list[str] = ["3_core"]
|
|
39
53
|
FRONTEND_BUILD_DIR: str = os.path.join(_TEST_DIR, "frontend/build")
|
|
40
54
|
DBT_COMPANY_DUMMY_PATH: str = os.path.join(_TEST_DIR, "dbt_company_dummy")
|
|
55
|
+
LINEAGE_LAYERS: list[str] = []
|
|
56
|
+
GUIDANCE_CONFIG: GuidanceConfig = GuidanceConfig()
|
|
41
57
|
else:
|
|
42
58
|
# Production mode: will be set by load_config()
|
|
43
59
|
CONFIG_PATH: str = ""
|
|
@@ -51,6 +67,7 @@ else:
|
|
|
51
67
|
DBT_MODEL_PATHS: list[str] = []
|
|
52
68
|
FRONTEND_BUILD_DIR: str = ""
|
|
53
69
|
DBT_COMPANY_DUMMY_PATH: str = ""
|
|
70
|
+
LINEAGE_LAYERS: list[str] = []
|
|
54
71
|
|
|
55
72
|
|
|
56
73
|
def find_config_file(config_override: Optional[str] = None) -> Optional[str]:
|
|
@@ -85,7 +102,7 @@ def find_config_file(config_override: Optional[str] = None) -> Optional[str]:
|
|
|
85
102
|
|
|
86
103
|
def load_config(config_path: Optional[str] = None) -> None:
|
|
87
104
|
"""Load and resolve all paths from config file."""
|
|
88
|
-
global FRAMEWORK, MANIFEST_PATH, DATA_MODEL_PATH, DBT_MODEL_PATHS, CATALOG_PATH, DBT_PROJECT_PATH, CANVAS_LAYOUT_PATH, CANVAS_LAYOUT_VERSION_CONTROL, CONFIG_PATH, FRONTEND_BUILD_DIR, DBT_COMPANY_DUMMY_PATH
|
|
105
|
+
global FRAMEWORK, MANIFEST_PATH, DATA_MODEL_PATH, DBT_MODEL_PATHS, CATALOG_PATH, DBT_PROJECT_PATH, CANVAS_LAYOUT_PATH, CANVAS_LAYOUT_VERSION_CONTROL, CONFIG_PATH, FRONTEND_BUILD_DIR, DBT_COMPANY_DUMMY_PATH, LINEAGE_LAYERS, GUIDANCE_CONFIG
|
|
89
106
|
|
|
90
107
|
# Skip loading config file in test mode (paths already set via environment)
|
|
91
108
|
if _TEST_DIR:
|
|
@@ -219,6 +236,29 @@ def load_config(config_path: Optional[str] = None) -> None:
|
|
|
219
236
|
DBT_COMPANY_DUMMY_PATH = p
|
|
220
237
|
# Note: No default set here - CLI handles fallback to cwd/dbt_company_dummy
|
|
221
238
|
|
|
239
|
+
# 10. Load lineage layers configuration
|
|
240
|
+
if "lineage_layers" in config:
|
|
241
|
+
LINEAGE_LAYERS = config["lineage_layers"]
|
|
242
|
+
if not isinstance(LINEAGE_LAYERS, list):
|
|
243
|
+
LINEAGE_LAYERS = []
|
|
244
|
+
else:
|
|
245
|
+
LINEAGE_LAYERS = []
|
|
246
|
+
|
|
247
|
+
# 11. Load guidance configuration
|
|
248
|
+
if "guidance" in config:
|
|
249
|
+
guidance_config = config["guidance"]
|
|
250
|
+
GUIDANCE_CONFIG = GuidanceConfig(
|
|
251
|
+
entity_wizard_enabled=guidance_config.get("entity_wizard_enabled", True),
|
|
252
|
+
push_warning_enabled=guidance_config.get("push_warning_enabled", True),
|
|
253
|
+
min_description_length=guidance_config.get("min_description_length", 10),
|
|
254
|
+
disabled_guidance=guidance_config.get("disabled_guidance", [])
|
|
255
|
+
if isinstance(guidance_config.get("disabled_guidance"), list)
|
|
256
|
+
else [],
|
|
257
|
+
)
|
|
258
|
+
else:
|
|
259
|
+
# Use defaults if guidance section is missing
|
|
260
|
+
GUIDANCE_CONFIG = GuidanceConfig()
|
|
261
|
+
|
|
222
262
|
except Exception as e:
|
|
223
263
|
print(f"Error loading config: {e}")
|
|
224
264
|
|
|
@@ -235,5 +275,7 @@ def print_config() -> None:
|
|
|
235
275
|
print(f"Looking for canvas layout at: {CANVAS_LAYOUT_PATH}")
|
|
236
276
|
print(f"Canvas layout version control: {CANVAS_LAYOUT_VERSION_CONTROL}")
|
|
237
277
|
print(f"Filtering models by paths: {DBT_MODEL_PATHS}")
|
|
278
|
+
if LINEAGE_LAYERS:
|
|
279
|
+
print(f"Lineage layers: {LINEAGE_LAYERS}")
|
|
238
280
|
if DBT_COMPANY_DUMMY_PATH:
|
|
239
281
|
print(f"dbt company dummy path: {DBT_COMPANY_DUMMY_PATH}")
|
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
from .manifest import router as manifest_router
|
|
3
3
|
from .data_model import router as data_model_router
|
|
4
4
|
from .schema import router as schema_router
|
|
5
|
+
from .lineage import router as lineage_router
|
|
5
6
|
|
|
6
7
|
__all__ = [
|
|
7
8
|
"manifest_router",
|
|
8
9
|
"data_model_router",
|
|
9
10
|
"schema_router",
|
|
11
|
+
"lineage_router",
|
|
10
12
|
]
|
|
11
13
|
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Routes for lineage operations."""
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, HTTPException
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
from trellis_datamodel import config as cfg
|
|
7
|
+
from trellis_datamodel.services.lineage import extract_upstream_lineage, LineageError
|
|
8
|
+
|
|
9
|
+
router = APIRouter(prefix="/api", tags=["lineage"])
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@router.get("/lineage/{model_id}")
|
|
13
|
+
async def get_lineage(model_id: str):
|
|
14
|
+
"""
|
|
15
|
+
Get upstream table-level lineage for a given model.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
model_id: Unique ID of the model (e.g., "model.project.model_name")
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
JSON response with nodes, edges, and metadata
|
|
22
|
+
|
|
23
|
+
Raises:
|
|
24
|
+
404: If model not found
|
|
25
|
+
500: If lineage extraction fails
|
|
26
|
+
"""
|
|
27
|
+
try:
|
|
28
|
+
# Validate paths exist
|
|
29
|
+
if not cfg.MANIFEST_PATH or not os.path.exists(cfg.MANIFEST_PATH):
|
|
30
|
+
raise HTTPException(
|
|
31
|
+
status_code=500,
|
|
32
|
+
detail=f"Manifest not found at {cfg.MANIFEST_PATH}. Please ensure manifest.json exists.",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# Extract lineage
|
|
36
|
+
lineage_data = extract_upstream_lineage(
|
|
37
|
+
manifest_path=cfg.MANIFEST_PATH,
|
|
38
|
+
catalog_path=cfg.CATALOG_PATH,
|
|
39
|
+
model_unique_id=model_id,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
return lineage_data
|
|
43
|
+
|
|
44
|
+
except FileNotFoundError as e:
|
|
45
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
46
|
+
except LineageError as e:
|
|
47
|
+
# Check if it's a catalog missing error
|
|
48
|
+
error_msg = str(e)
|
|
49
|
+
if "catalog" in error_msg.lower() and "not found" in error_msg.lower():
|
|
50
|
+
raise HTTPException(
|
|
51
|
+
status_code=500,
|
|
52
|
+
detail=f"{error_msg}. Please run 'dbt docs generate' to create catalog.json",
|
|
53
|
+
)
|
|
54
|
+
raise HTTPException(status_code=500, detail=str(e))
|
|
55
|
+
except Exception as e:
|
|
56
|
+
raise HTTPException(
|
|
57
|
+
status_code=500,
|
|
58
|
+
detail=f"Error extracting lineage: {str(e)}",
|
|
59
|
+
)
|
|
60
|
+
|
|
@@ -94,6 +94,12 @@ async def get_config_info():
|
|
|
94
94
|
"frontend_build_dir": cfg.FRONTEND_BUILD_DIR,
|
|
95
95
|
"model_paths_configured": cfg.DBT_MODEL_PATHS,
|
|
96
96
|
"model_paths_resolved": model_dirs,
|
|
97
|
+
"guidance": {
|
|
98
|
+
"entity_wizard_enabled": cfg.GUIDANCE_CONFIG.entity_wizard_enabled,
|
|
99
|
+
"push_warning_enabled": cfg.GUIDANCE_CONFIG.push_warning_enabled,
|
|
100
|
+
"min_description_length": cfg.GUIDANCE_CONFIG.min_description_length,
|
|
101
|
+
"disabled_guidance": cfg.GUIDANCE_CONFIG.disabled_guidance,
|
|
102
|
+
},
|
|
97
103
|
}
|
|
98
104
|
|
|
99
105
|
|
trellis_datamodel/server.py
CHANGED
|
@@ -14,7 +14,7 @@ from importlib.resources import files
|
|
|
14
14
|
|
|
15
15
|
from trellis_datamodel import config as cfg
|
|
16
16
|
from trellis_datamodel.config import print_config
|
|
17
|
-
from trellis_datamodel.routes import manifest_router, data_model_router, schema_router
|
|
17
|
+
from trellis_datamodel.routes import manifest_router, data_model_router, schema_router, lineage_router
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def create_app() -> FastAPI:
|
|
@@ -61,6 +61,7 @@ def create_app() -> FastAPI:
|
|
|
61
61
|
app.include_router(manifest_router)
|
|
62
62
|
app.include_router(data_model_router)
|
|
63
63
|
app.include_router(schema_router)
|
|
64
|
+
app.include_router(lineage_router)
|
|
64
65
|
|
|
65
66
|
# Mount static files AFTER API routes
|
|
66
67
|
# Important: app.mount() creates a sub-application, so we mount AFTER registering API routes
|