tdfs4ds 0.2.5.3__py3-none-any.whl → 0.2.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/feature_store/feature_data_processing.py +7 -7
- tdfs4ds/feature_store/feature_store_management.py +14 -13
- tdfs4ds/genai/__init__.py +4 -2
- tdfs4ds/genai/documentation.py +251 -72
- tdfs4ds/lineage/__init__.py +21 -0
- tdfs4ds/lineage/indexing.py +501 -0
- tdfs4ds/lineage/lineage.py +409 -0
- tdfs4ds/lineage/network.py +446 -0
- tdfs4ds/utils/lineage.py +2 -0
- tdfs4ds/utils/query_management.py +13 -6
- {tdfs4ds-0.2.5.3.dist-info → tdfs4ds-0.2.5.5.dist-info}/METADATA +10 -3
- {tdfs4ds-0.2.5.3.dist-info → tdfs4ds-0.2.5.5.dist-info}/RECORD +15 -11
- {tdfs4ds-0.2.5.3.dist-info → tdfs4ds-0.2.5.5.dist-info}/WHEEL +1 -1
- {tdfs4ds-0.2.5.3.dist-info → tdfs4ds-0.2.5.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, asdict
|
|
4
|
+
from typing import Dict, List, Tuple, Set, Optional, Any
|
|
5
|
+
|
|
6
|
+
from tdfs4ds.lineage.lineage import analyze_sql_query
|
|
7
|
+
from tdfs4ds.lineage.indexing import analyze_teradata_ddl
|
|
8
|
+
|
|
9
|
+
from tdfs4ds import logger_safe
|
|
10
|
+
from tdfs4ds.utils.query_management import execute_query
|
|
11
|
+
import teradataml as tdml
|
|
12
|
+
|
|
13
|
+
from typing import Dict, Any, List, Tuple, Optional
|
|
14
|
+
import plotly.graph_objects as go
|
|
15
|
+
import plotly.io as pio
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class NodeInfo:
|
|
20
|
+
"""Graph node metadata."""
|
|
21
|
+
name: str # canonical name, e.g. '"db"."obj"' or '<DEFAULT_DATABASE>."obj"'
|
|
22
|
+
object_type: str # "view" | "table" | "unknown"
|
|
23
|
+
database: Optional[str] = None
|
|
24
|
+
object: Optional[str] = None
|
|
25
|
+
ddl: Optional[str] = None # optional, can disable storing for memory reasons
|
|
26
|
+
table_info: Optional[Dict[str, Any]] = None # output of analyze_teradata_ddl for tables
|
|
27
|
+
error: Optional[str] = None # if we couldn't show/parse
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _require_teradataml():
|
|
31
|
+
if execute_query is None:
|
|
32
|
+
raise RuntimeError(
|
|
33
|
+
"teradataml execute_query is not available. "
|
|
34
|
+
"Make sure teradataml is installed and imported correctly."
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _strip_outer_quotes(s: str) -> str:
|
|
39
|
+
s = s.strip()
|
|
40
|
+
if len(s) >= 2 and ((s[0] == s[-1] == '"') or (s[0] == s[-1] == "'")):
|
|
41
|
+
return s[1:-1]
|
|
42
|
+
return s
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _parse_node_name(node_name: str) -> Tuple[Optional[str], str]:
|
|
46
|
+
"""
|
|
47
|
+
Accepts names like:
|
|
48
|
+
- '"sales"."orders"'
|
|
49
|
+
- '<DEFAULT_DATABASE>."orders"'
|
|
50
|
+
- 'sales.orders'
|
|
51
|
+
- 'orders' (rare; treat as object only)
|
|
52
|
+
Returns (database, object).
|
|
53
|
+
"""
|
|
54
|
+
s = node_name.strip()
|
|
55
|
+
|
|
56
|
+
# canonical already (like your analyzer outputs)
|
|
57
|
+
if s.startswith("<DEFAULT_DATABASE>"):
|
|
58
|
+
# format: <DEFAULT_DATABASE>."obj"
|
|
59
|
+
parts = s.split(".", 1)
|
|
60
|
+
obj = parts[1] if len(parts) > 1 else s
|
|
61
|
+
obj = _strip_outer_quotes(obj)
|
|
62
|
+
return None, obj
|
|
63
|
+
|
|
64
|
+
if s.startswith('"'):
|
|
65
|
+
# format: "db"."obj" (assume exactly two quoted identifiers)
|
|
66
|
+
# very small parser: split on "." between quoted segments
|
|
67
|
+
# e.g. '"db"."obj"' -> ['"db"', '"obj"']
|
|
68
|
+
bits = s.split(".")
|
|
69
|
+
if len(bits) >= 2:
|
|
70
|
+
db = _strip_outer_quotes(bits[0])
|
|
71
|
+
obj = _strip_outer_quotes(bits[1])
|
|
72
|
+
return db, obj
|
|
73
|
+
|
|
74
|
+
# unquoted forms
|
|
75
|
+
if "." in s:
|
|
76
|
+
db, obj = s.split(".", 1)
|
|
77
|
+
return db.strip(), obj.strip()
|
|
78
|
+
|
|
79
|
+
return None, s
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _quote_ident(ident: str) -> str:
|
|
83
|
+
# Teradata supports "quoted identifiers"; quoting is safest for mixed case / special chars.
|
|
84
|
+
ident = ident.replace('"', '""')
|
|
85
|
+
return f'"{ident}"'
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _qualified_name_for_show(node_name: str, default_database: Optional[str]) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Build a safe qualified name for SHOW statements.
|
|
91
|
+
- If node_name includes db, use it.
|
|
92
|
+
- If it is <DEFAULT_DATABASE>, use default_database.
|
|
93
|
+
- If no db and default_database is given, qualify with it.
|
|
94
|
+
"""
|
|
95
|
+
db, obj = _parse_node_name(node_name)
|
|
96
|
+
if db is None:
|
|
97
|
+
# <DEFAULT_DATABASE> or missing db
|
|
98
|
+
if default_database:
|
|
99
|
+
return f"{_quote_ident(default_database)}.{_quote_ident(obj)}"
|
|
100
|
+
# If no default database, try unqualified (may work if session has a default DB)
|
|
101
|
+
return _quote_ident(obj)
|
|
102
|
+
return f"{_quote_ident(db)}.{_quote_ident(obj)}"
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _fetch_single_column_rows(sql: str) -> List[str]:
|
|
106
|
+
"""
|
|
107
|
+
Executes SQL and returns a list of strings from the first column.
|
|
108
|
+
Works for SHOW VIEW / SHOW TABLE outputs (usually one column with text lines).
|
|
109
|
+
"""
|
|
110
|
+
_require_teradataml()
|
|
111
|
+
return tdml.execute_sql(sql).fetchall()[0][0].replace('\r','\n')
|
|
112
|
+
|
|
113
|
+
def _show_view_or_table(node_name: str, default_database: Optional[str]) -> Tuple[str, str]:
|
|
114
|
+
"""
|
|
115
|
+
Try SHOW VIEW first; if fails, try SHOW TABLE.
|
|
116
|
+
Returns (object_type, ddl_text).
|
|
117
|
+
object_type is "view" or "table".
|
|
118
|
+
"""
|
|
119
|
+
qualified = _qualified_name_for_show(node_name, default_database)
|
|
120
|
+
|
|
121
|
+
# SHOW VIEW
|
|
122
|
+
try:
|
|
123
|
+
ddl = _fetch_single_column_rows(f"SHOW VIEW {qualified};")
|
|
124
|
+
if ddl:
|
|
125
|
+
return "view", ddl
|
|
126
|
+
except Exception:
|
|
127
|
+
pass
|
|
128
|
+
|
|
129
|
+
# SHOW TABLE
|
|
130
|
+
ddl = _fetch_single_column_rows(f"SHOW TABLE {qualified};")
|
|
131
|
+
return "table", ddl
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def build_teradata_dependency_graph(
|
|
135
|
+
sql_query: str,
|
|
136
|
+
*,
|
|
137
|
+
default_database: Optional[str] = None,
|
|
138
|
+
max_depth: int = 50,
|
|
139
|
+
store_ddl: bool = False,
|
|
140
|
+
) -> Dict[str, Any]:
|
|
141
|
+
"""
|
|
142
|
+
Builds a dependency graph by:
|
|
143
|
+
1) analyzing the initial SQL via analyze_sql_query
|
|
144
|
+
2) for each discovered source object:
|
|
145
|
+
- SHOW VIEW and re-run analyze_sql_query on its DDL to expand dependencies
|
|
146
|
+
- or SHOW TABLE and run analyze_teradata_ddl to attach PI/partition metadata
|
|
147
|
+
|
|
148
|
+
Graph edges are parent -> child (parent depends on child).
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
{
|
|
152
|
+
"nodes": { node_name: { ...NodeInfo... }, ... },
|
|
153
|
+
"edges": [ (parent, child), ... ],
|
|
154
|
+
"roots": [ ...targets from initial SQL... ],
|
|
155
|
+
}
|
|
156
|
+
"""
|
|
157
|
+
initial = analyze_sql_query(sql_query)
|
|
158
|
+
roots = list(initial.get("target", []) or [])
|
|
159
|
+
first_sources = list(initial.get("source", []) or [])
|
|
160
|
+
|
|
161
|
+
nodes: Dict[str, NodeInfo] = {}
|
|
162
|
+
edges: List[Tuple[str, str]] = []
|
|
163
|
+
visited: Set[str] = set()
|
|
164
|
+
|
|
165
|
+
def ensure_node(name: str) -> NodeInfo:
|
|
166
|
+
if name not in nodes:
|
|
167
|
+
db, obj = _parse_node_name(name)
|
|
168
|
+
nodes[name] = NodeInfo(
|
|
169
|
+
name=name,
|
|
170
|
+
object_type="unknown",
|
|
171
|
+
database=db,
|
|
172
|
+
object=obj,
|
|
173
|
+
)
|
|
174
|
+
return nodes[name]
|
|
175
|
+
|
|
176
|
+
# If the SQL has targets, connect each target -> each source as first-level edges.
|
|
177
|
+
# If there's no target (pure SELECT), we treat sources as roots to expand.
|
|
178
|
+
if roots:
|
|
179
|
+
for t in roots:
|
|
180
|
+
ensure_node(t).object_type = "view" # could be table too; we can resolve later if needed
|
|
181
|
+
for s in first_sources:
|
|
182
|
+
ensure_node(s)
|
|
183
|
+
edges.append((t, s))
|
|
184
|
+
else:
|
|
185
|
+
for s in first_sources:
|
|
186
|
+
ensure_node(s)
|
|
187
|
+
|
|
188
|
+
def expand_from(parent: Optional[str], obj_name: str, depth: int) -> None:
|
|
189
|
+
if depth > max_depth:
|
|
190
|
+
ensure_node(obj_name).error = f"max_depth {max_depth} exceeded"
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
if obj_name in visited:
|
|
194
|
+
return
|
|
195
|
+
visited.add(obj_name)
|
|
196
|
+
|
|
197
|
+
node = ensure_node(obj_name)
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
obj_type, ddl = _show_view_or_table(obj_name, default_database)
|
|
201
|
+
node.object_type = obj_type
|
|
202
|
+
if store_ddl:
|
|
203
|
+
node.ddl = ddl
|
|
204
|
+
|
|
205
|
+
if obj_type == "view":
|
|
206
|
+
parsed = analyze_sql_query(ddl)
|
|
207
|
+
children = list(parsed.get("source", []) or [])
|
|
208
|
+
for child in children:
|
|
209
|
+
ensure_node(child)
|
|
210
|
+
# edge: view -> dependency
|
|
211
|
+
edges.append((obj_name, child))
|
|
212
|
+
expand_from(obj_name, child, depth + 1)
|
|
213
|
+
|
|
214
|
+
elif obj_type == "table":
|
|
215
|
+
# attach PI/partition metadata
|
|
216
|
+
try:
|
|
217
|
+
node.table_info = analyze_teradata_ddl(ddl)
|
|
218
|
+
except Exception as e:
|
|
219
|
+
node.error = f"analyze_teradata_ddl failed: {e}"
|
|
220
|
+
|
|
221
|
+
else:
|
|
222
|
+
node.error = "unknown object type"
|
|
223
|
+
|
|
224
|
+
except Exception as e:
|
|
225
|
+
node.error = str(e)
|
|
226
|
+
|
|
227
|
+
# Expand:
|
|
228
|
+
if roots:
|
|
229
|
+
# expand the target(s) as well if you want full drill-down starting from produced object
|
|
230
|
+
# but most people expand from the sources; still, expanding targets can be useful
|
|
231
|
+
for t in roots:
|
|
232
|
+
expand_from(None, t, 0)
|
|
233
|
+
for s in first_sources:
|
|
234
|
+
expand_from(None, s, 0)
|
|
235
|
+
else:
|
|
236
|
+
for s in first_sources:
|
|
237
|
+
expand_from(None, s, 0)
|
|
238
|
+
|
|
239
|
+
# de-duplicate edges while preserving order
|
|
240
|
+
seen_e = set()
|
|
241
|
+
edges_dedup: List[Tuple[str, str]] = []
|
|
242
|
+
for e in edges:
|
|
243
|
+
if e not in seen_e:
|
|
244
|
+
edges_dedup.append(e)
|
|
245
|
+
seen_e.add(e)
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
"nodes": {k: asdict(v) for k, v in nodes.items()},
|
|
249
|
+
"edges": edges_dedup,
|
|
250
|
+
"roots": roots,
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ----------------------------
|
|
255
|
+
# Optional: helper to build a "children adjacency" map, if you prefer that form
|
|
256
|
+
def graph_to_adjacency(graph: Dict[str, Any]) -> Dict[str, List[str]]:
|
|
257
|
+
adj: Dict[str, List[str]] = {}
|
|
258
|
+
for parent, child in graph.get("edges", []):
|
|
259
|
+
adj.setdefault(parent, []).append(child)
|
|
260
|
+
return adj
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def plot_lineage_sankey(
|
|
267
|
+
graph: Dict[str, Any],
|
|
268
|
+
*,
|
|
269
|
+
title: str = "Lineage Sankey",
|
|
270
|
+
max_label_len: int = 45,
|
|
271
|
+
show_full_name_in_hover: bool = True,
|
|
272
|
+
) -> go.Figure:
|
|
273
|
+
"""
|
|
274
|
+
Plot an interactive Sankey from a lineage graph like:
|
|
275
|
+
{
|
|
276
|
+
"nodes": {name: {name, object_type, database, object, table_info, error, ...}, ...},
|
|
277
|
+
"edges": [(parent, child), ...], # parent depends on child
|
|
278
|
+
"roots": [...]
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
Hover displays relevant info (no DDL).
|
|
282
|
+
Returns a plotly Figure.
|
|
283
|
+
"""
|
|
284
|
+
|
|
285
|
+
nodes: Dict[str, Dict[str, Any]] = graph.get("nodes", {}) or {}
|
|
286
|
+
edges: List[Tuple[str, str]] = graph.get("edges", []) or []
|
|
287
|
+
|
|
288
|
+
# --- helpers ---
|
|
289
|
+
def shorten(s: str, n: int) -> str:
|
|
290
|
+
s = str(s)
|
|
291
|
+
return s if len(s) <= n else (s[: n - 1] + "…")
|
|
292
|
+
|
|
293
|
+
def format_table_info(ti: Optional[Dict[str, Any]]) -> str:
|
|
294
|
+
if not ti:
|
|
295
|
+
return ""
|
|
296
|
+
pi = ti.get("primary_index_columns") or []
|
|
297
|
+
parts = ti.get("partition_columns") or []
|
|
298
|
+
lvls = ti.get("partitioning_levels") or []
|
|
299
|
+
|
|
300
|
+
lines = []
|
|
301
|
+
if pi:
|
|
302
|
+
lines.append(f"PI: {', '.join(pi)}")
|
|
303
|
+
if parts:
|
|
304
|
+
lines.append(f"Partitions: {', '.join(parts)}")
|
|
305
|
+
if lvls:
|
|
306
|
+
kinds = ", ".join(
|
|
307
|
+
[f"L{lvl.get('level')}:{lvl.get('kind')}" for lvl in lvls if isinstance(lvl, dict)]
|
|
308
|
+
)
|
|
309
|
+
if kinds:
|
|
310
|
+
lines.append(f"Partitioning: {kinds}")
|
|
311
|
+
return "<br>".join(lines)
|
|
312
|
+
|
|
313
|
+
def hover_for(node_name: str) -> str:
|
|
314
|
+
meta = nodes.get(node_name, {}) or {}
|
|
315
|
+
obj_type = meta.get("object_type", "unknown")
|
|
316
|
+
db = meta.get("database")
|
|
317
|
+
obj = meta.get("object")
|
|
318
|
+
err = meta.get("error")
|
|
319
|
+
|
|
320
|
+
lines = []
|
|
321
|
+
if show_full_name_in_hover:
|
|
322
|
+
lines.append(f"<b>{node_name}</b>")
|
|
323
|
+
else:
|
|
324
|
+
lines.append(f"<b>{db}.{obj}</b>" if db and obj else f"<b>{node_name}</b>")
|
|
325
|
+
|
|
326
|
+
lines.append(f"Type: <b>{obj_type}</b>")
|
|
327
|
+
if db is not None:
|
|
328
|
+
lines.append(f"DB: {db}")
|
|
329
|
+
if obj is not None:
|
|
330
|
+
lines.append(f"Object: {obj}")
|
|
331
|
+
|
|
332
|
+
if obj_type == "table":
|
|
333
|
+
ti = meta.get("table_info")
|
|
334
|
+
ti_txt = format_table_info(ti)
|
|
335
|
+
if ti_txt:
|
|
336
|
+
lines.append(ti_txt)
|
|
337
|
+
|
|
338
|
+
if err:
|
|
339
|
+
lines.append(f"<span style='color:#b00020'>Error: {err}</span>")
|
|
340
|
+
|
|
341
|
+
return "<br>".join(lines)
|
|
342
|
+
|
|
343
|
+
# Collect node names seen in edges; also include isolated roots if any
|
|
344
|
+
node_names_set = set()
|
|
345
|
+
for p, c in edges:
|
|
346
|
+
node_names_set.add(p)
|
|
347
|
+
node_names_set.add(c)
|
|
348
|
+
for r in graph.get("roots", []) or []:
|
|
349
|
+
node_names_set.add(r)
|
|
350
|
+
|
|
351
|
+
node_names = sorted(node_names_set)
|
|
352
|
+
idx = {name: i for i, name in enumerate(node_names)}
|
|
353
|
+
|
|
354
|
+
# Build labels and hover text
|
|
355
|
+
labels = []
|
|
356
|
+
hovers = []
|
|
357
|
+
node_colors = []
|
|
358
|
+
|
|
359
|
+
for name in node_names:
|
|
360
|
+
meta = nodes.get(name, {}) or {}
|
|
361
|
+
obj_type = (meta.get("object_type") or "unknown").lower()
|
|
362
|
+
|
|
363
|
+
# Sankey labels should stay readable
|
|
364
|
+
if meta.get("database") and meta.get("object"):
|
|
365
|
+
label = f'{meta["database"]}.{meta["object"]}'
|
|
366
|
+
else:
|
|
367
|
+
label = name
|
|
368
|
+
labels.append(shorten(label, max_label_len))
|
|
369
|
+
|
|
370
|
+
hovers.append(hover_for(name))
|
|
371
|
+
|
|
372
|
+
# Simple type-based coloring (feel free to tweak)
|
|
373
|
+
if obj_type == "view":
|
|
374
|
+
node_colors.append("rgba(70,130,180,0.75)") # steelblue-ish
|
|
375
|
+
elif obj_type == "table":
|
|
376
|
+
node_colors.append("rgba(46,139,87,0.75)") # seagreen-ish
|
|
377
|
+
else:
|
|
378
|
+
node_colors.append("rgba(128,128,128,0.65)")
|
|
379
|
+
|
|
380
|
+
# Sankey links (parent -> child). Value=1 per dependency by default.
|
|
381
|
+
sources = [idx[p] for p, _ in edges if p in idx]
|
|
382
|
+
targets = [idx[c] for _, c in edges if c in idx]
|
|
383
|
+
values = [1] * len(sources)
|
|
384
|
+
|
|
385
|
+
link_hover = [
|
|
386
|
+
f"<b>{shorten(node_names[s], 120)}</b><br>depends on<br><b>{shorten(node_names[t], 120)}</b>"
|
|
387
|
+
for s, t in zip(sources, targets)
|
|
388
|
+
]
|
|
389
|
+
|
|
390
|
+
fig = go.Figure(
|
|
391
|
+
data=[
|
|
392
|
+
go.Sankey(
|
|
393
|
+
arrangement="snap",
|
|
394
|
+
node=dict(
|
|
395
|
+
pad=18,
|
|
396
|
+
thickness=18,
|
|
397
|
+
label=labels,
|
|
398
|
+
color=node_colors,
|
|
399
|
+
customdata=hovers,
|
|
400
|
+
hovertemplate="%{customdata}<extra></extra>",
|
|
401
|
+
),
|
|
402
|
+
link=dict(
|
|
403
|
+
source=sources,
|
|
404
|
+
target=targets,
|
|
405
|
+
value=values,
|
|
406
|
+
customdata=link_hover,
|
|
407
|
+
hovertemplate="%{customdata}<extra></extra>",
|
|
408
|
+
),
|
|
409
|
+
)
|
|
410
|
+
]
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
fig.update_layout(
|
|
414
|
+
title=title,
|
|
415
|
+
font=dict(size=12),
|
|
416
|
+
hoverlabel=dict(align="left"),
|
|
417
|
+
margin=dict(l=20, r=20, t=60, b=20),
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
return fig
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
|
|
424
|
+
def show_plotly_robust(fig, *, html_path: Optional[str] = None) -> None:
|
|
425
|
+
"""
|
|
426
|
+
Display Plotly figure robustly across environments.
|
|
427
|
+
|
|
428
|
+
- Tries normal fig.show() (Jupyter mime rendering)
|
|
429
|
+
- Falls back to browser renderer if nbformat is missing
|
|
430
|
+
- Optionally writes an HTML file
|
|
431
|
+
"""
|
|
432
|
+
try:
|
|
433
|
+
fig.show()
|
|
434
|
+
return
|
|
435
|
+
except ValueError as e:
|
|
436
|
+
msg = str(e).lower()
|
|
437
|
+
if "nbformat" in msg and "mime type rendering requires" in msg:
|
|
438
|
+
# Fallback: open in browser
|
|
439
|
+
pio.renderers.default = "browser"
|
|
440
|
+
fig.show()
|
|
441
|
+
else:
|
|
442
|
+
raise
|
|
443
|
+
|
|
444
|
+
if html_path:
|
|
445
|
+
fig.write_html(html_path, include_plotlyjs="cdn", full_html=True)
|
|
446
|
+
print(f"Wrote HTML to: {html_path}")
|
tdfs4ds/utils/lineage.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import teradataml as tdml
|
|
2
2
|
import functools
|
|
3
3
|
from packaging import version
|
|
4
|
+
from tdfs4ds import logger_safe
|
|
5
|
+
import tdfs4ds
|
|
6
|
+
|
|
4
7
|
def is_version_greater_than(tested_version, base_version="17.20.00.03"):
|
|
5
8
|
"""
|
|
6
9
|
Check if the tested version is greater than the base version.
|
|
@@ -25,6 +28,7 @@ def is_version_greater_than(tested_version, base_version="17.20.00.03"):
|
|
|
25
28
|
|
|
26
29
|
"""
|
|
27
30
|
return version.parse(tested_version) > version.parse(base_version)
|
|
31
|
+
|
|
28
32
|
def execute_query_wrapper(f):
|
|
29
33
|
"""
|
|
30
34
|
Decorator to execute a query. It wraps around the function and adds exception handling.
|
|
@@ -57,13 +61,13 @@ def execute_query_wrapper(f):
|
|
|
57
61
|
try:
|
|
58
62
|
tdml.execute_sql(q)
|
|
59
63
|
except Exception as e:
|
|
60
|
-
print(str(e).
|
|
64
|
+
print(str(e).splitlines()[0])
|
|
61
65
|
print(q)
|
|
62
66
|
else:
|
|
63
67
|
try:
|
|
64
68
|
tdml.execute_sql(query)
|
|
65
69
|
except Exception as e:
|
|
66
|
-
print(str(e).
|
|
70
|
+
print(str(e).splitlines()[0])
|
|
67
71
|
print(query)
|
|
68
72
|
else:
|
|
69
73
|
if type(query) == list:
|
|
@@ -71,13 +75,13 @@ def execute_query_wrapper(f):
|
|
|
71
75
|
try:
|
|
72
76
|
tdml.get_context().execute(q)
|
|
73
77
|
except Exception as e:
|
|
74
|
-
print(str(e).
|
|
78
|
+
print(str(e).splitlines()[0])
|
|
75
79
|
print(q)
|
|
76
80
|
else:
|
|
77
81
|
try:
|
|
78
82
|
tdml.get_context().execute(query)
|
|
79
83
|
except Exception as e:
|
|
80
|
-
print(str(e).
|
|
84
|
+
print(str(e).splitlines()[0])
|
|
81
85
|
print(query)
|
|
82
86
|
return
|
|
83
87
|
|
|
@@ -97,8 +101,11 @@ def execute_query(query, raise_error=False):
|
|
|
97
101
|
"""
|
|
98
102
|
def handle_exception(e, q):
|
|
99
103
|
# Always print error
|
|
100
|
-
|
|
101
|
-
|
|
104
|
+
logger_safe('error', f"Error executing query: {str(e).splitlines()[0]}")
|
|
105
|
+
if tdfs4ds.DEBUG:
|
|
106
|
+
logger_safe('debug', f"Full error: {str(e)}")
|
|
107
|
+
logger_safe('debug', f"Query: {q}")
|
|
108
|
+
|
|
102
109
|
|
|
103
110
|
# Raise exception only if requested
|
|
104
111
|
if raise_error:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: tdfs4ds
|
|
3
|
-
Version: 0.2.5.
|
|
3
|
+
Version: 0.2.5.5
|
|
4
4
|
Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
|
|
5
5
|
Author: Denis Molin
|
|
6
6
|
Requires-Python: >=3.6
|
|
@@ -12,7 +12,14 @@ Requires-Dist: plotly
|
|
|
12
12
|
Requires-Dist: tqdm
|
|
13
13
|
Requires-Dist: networkx
|
|
14
14
|
Requires-Dist: sqlparse
|
|
15
|
-
Requires-Dist:
|
|
15
|
+
Requires-Dist: langchain_openai
|
|
16
|
+
Requires-Dist: nbformat>=4.2.0
|
|
17
|
+
Dynamic: author
|
|
18
|
+
Dynamic: description
|
|
19
|
+
Dynamic: description-content-type
|
|
20
|
+
Dynamic: requires-dist
|
|
21
|
+
Dynamic: requires-python
|
|
22
|
+
Dynamic: summary
|
|
16
23
|
|
|
17
24
|

|
|
18
25
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
tdfs4ds/__init__.py,sha256=
|
|
1
|
+
tdfs4ds/__init__.py,sha256=B084WiycxDqhS6_V-v0v-Sb3VtMJlxKHuegBkwuepJk,70555
|
|
2
2
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
3
3
|
tdfs4ds/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
4
4
|
tdfs4ds/data/logo/tdfs4ds_logo.png,sha256=OCKQnH0gQbRyupwZeiIgo-9c6mdRtjE2E2Zunr_4Ae0,363980
|
|
@@ -9,11 +9,15 @@ tdfs4ds/dataset/dataset.py,sha256=J_fgfsVdR9zSOXrUOqyotqsUD-GlQMGyuld6ueov45w,76
|
|
|
9
9
|
tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
|
|
10
10
|
tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
|
|
11
11
|
tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
|
|
12
|
-
tdfs4ds/feature_store/feature_data_processing.py,sha256=
|
|
12
|
+
tdfs4ds/feature_store/feature_data_processing.py,sha256=4raJaRddlyRaxCATioEU6oHOBKhY1K0fyYKbmWTLj1U,46221
|
|
13
13
|
tdfs4ds/feature_store/feature_query_retrieval.py,sha256=51c6ZNlLFiBIxNPinS8ot8bjWEIb1QV2eVg69yzVF80,35381
|
|
14
|
-
tdfs4ds/feature_store/feature_store_management.py,sha256=
|
|
15
|
-
tdfs4ds/genai/__init__.py,sha256=
|
|
16
|
-
tdfs4ds/genai/documentation.py,sha256=
|
|
14
|
+
tdfs4ds/feature_store/feature_store_management.py,sha256=qsazxRC4jxBwfwNYpRhrDLDBtnq2BfePTQ31vmDFH_o,52190
|
|
15
|
+
tdfs4ds/genai/__init__.py,sha256=Os1NpNPNr1h5-25xt_jckIqImI3jDMxjxUvM7TqEXzE,811
|
|
16
|
+
tdfs4ds/genai/documentation.py,sha256=rcGPupWpVSG8vhGjk_AWcHarvaImM9XEBkxJYiy5SK0,92244
|
|
17
|
+
tdfs4ds/lineage/__init__.py,sha256=NuTK4QDk6WbOVmtDZFLTzcEfp431Ao31uEjOkYfoImg,391
|
|
18
|
+
tdfs4ds/lineage/indexing.py,sha256=th_XRBS-QNKruwO9od9rJG1bzARCD_4Fn2W2wZkoDUg,16768
|
|
19
|
+
tdfs4ds/lineage/lineage.py,sha256=doiJrQst7d0npeuPZG57c-g-G-1AnGuzPURzHGHu2hI,11507
|
|
20
|
+
tdfs4ds/lineage/network.py,sha256=HwUlt1NLQJAAViCaV_1O9bTlY1lDpokSCN44E8QqLU0,14623
|
|
17
21
|
tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
|
|
18
22
|
tdfs4ds/process_store/process_followup.py,sha256=E4jgQahjhVRBbfAW3JXNLId7H5qV8ozRt-6PyAQuPzg,12583
|
|
19
23
|
tdfs4ds/process_store/process_query_administration.py,sha256=AOufkJ6DFUpBiGm-6Q6Dq0Aovw31UGTscZ3Ya0ewS-0,7851
|
|
@@ -22,11 +26,11 @@ tdfs4ds/process_store/process_store_catalog_management.py,sha256=s-2JAaOx-m_hANV
|
|
|
22
26
|
tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
|
|
23
27
|
tdfs4ds/utils/filter_management.py,sha256=rF8v0fws-ZR9og7x2TBqS5VWpOit1cwgF2xNkb_Hw1w,28635
|
|
24
28
|
tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
|
|
25
|
-
tdfs4ds/utils/lineage.py,sha256=
|
|
26
|
-
tdfs4ds/utils/query_management.py,sha256=
|
|
29
|
+
tdfs4ds/utils/lineage.py,sha256=QwmxTvKCxwuyw4Zn7FG2Jpxcy_EcRdTJhPXmPyKOggE,37888
|
|
30
|
+
tdfs4ds/utils/query_management.py,sha256=q6MJ-PNJp4yDuXEtN-MLJ8Cg7h4i1bYFR7abYsh7nak,5068
|
|
27
31
|
tdfs4ds/utils/time_management.py,sha256=g3EJO7I8ERoZ4X7yq5SyDqSE4O9p0BRcv__QPuAxbGA,32243
|
|
28
32
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
29
|
-
tdfs4ds-0.2.5.
|
|
30
|
-
tdfs4ds-0.2.5.
|
|
31
|
-
tdfs4ds-0.2.5.
|
|
32
|
-
tdfs4ds-0.2.5.
|
|
33
|
+
tdfs4ds-0.2.5.5.dist-info/METADATA,sha256=lPNjCTF6FJyGIpSvgTBEmnsgKXYhwZVmyv-l7Qr7eJY,14532
|
|
34
|
+
tdfs4ds-0.2.5.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
35
|
+
tdfs4ds-0.2.5.5.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
36
|
+
tdfs4ds-0.2.5.5.dist-info/RECORD,,
|
|
File without changes
|