tdfs4ds 0.2.4.18__py3-none-any.whl → 0.2.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/utils/lineage.py +163 -0
- {tdfs4ds-0.2.4.18.dist-info → tdfs4ds-0.2.4.19.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.18.dist-info → tdfs4ds-0.2.4.19.dist-info}/RECORD +6 -6
- {tdfs4ds-0.2.4.18.dist-info → tdfs4ds-0.2.4.19.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.18.dist-info → tdfs4ds-0.2.4.19.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
tdfs4ds/utils/lineage.py
CHANGED
|
@@ -4,6 +4,7 @@ import teradataml as tdml
|
|
|
4
4
|
import tdfs4ds
|
|
5
5
|
import tqdm
|
|
6
6
|
import networkx as nx
|
|
7
|
+
import sqlparse
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def query_change_case(query, case):
|
|
@@ -630,3 +631,165 @@ def get_ddl(view_name, schema_name, object_type='view'):
|
|
|
630
631
|
# Replace carriage returns with newlines for consistent formatting
|
|
631
632
|
return ddl.replace('\r', '\n')
|
|
632
633
|
|
|
634
|
+
from datetime import datetime
|
|
635
|
+
import sqlparse
|
|
636
|
+
import re
|
|
637
|
+
import os
|
|
638
|
+
|
|
639
|
+
def generate_process_report(format="html", output_file=None, collapsible=False, sort_by="view_name"):
|
|
640
|
+
"""
|
|
641
|
+
Generate a process catalog report with sidebar index grouped by database,
|
|
642
|
+
with sub-items for Entity, Features, and DDL.
|
|
643
|
+
"""
|
|
644
|
+
|
|
645
|
+
processes = tdfs4ds.process_catalog()
|
|
646
|
+
processes = processes[processes.DATA_DOMAIN == tdfs4ds.DATA_DOMAIN].to_pandas()
|
|
647
|
+
processes['VIEW'] = processes['VIEW_NAME'].apply(lambda x: x.split('.')[1].replace('"', ""))
|
|
648
|
+
|
|
649
|
+
def split_view_name(full_name):
|
|
650
|
+
db, vw = full_name.replace('"', '').split('.')
|
|
651
|
+
return db, vw
|
|
652
|
+
|
|
653
|
+
processes["DB"], processes["VW"] = zip(*processes["VIEW_NAME"].map(split_view_name))
|
|
654
|
+
|
|
655
|
+
if sort_by:
|
|
656
|
+
if sort_by == "database":
|
|
657
|
+
processes = processes.sort_values(["DB", "VW"])
|
|
658
|
+
elif sort_by == "view_name":
|
|
659
|
+
processes = processes.sort_values(["VW"])
|
|
660
|
+
elif sort_by == "database,view_name":
|
|
661
|
+
processes = processes.sort_values(["DB", "VW"])
|
|
662
|
+
|
|
663
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
664
|
+
readable_ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
665
|
+
data_domain = tdfs4ds.DATA_DOMAIN
|
|
666
|
+
|
|
667
|
+
if output_file is None:
|
|
668
|
+
output_file = f"report_{data_domain}_{timestamp}.{ 'html' if format=='html' else format }"
|
|
669
|
+
|
|
670
|
+
report_parts = []
|
|
671
|
+
|
|
672
|
+
if format == "html":
|
|
673
|
+
report_parts.append(f"""
|
|
674
|
+
<html>
|
|
675
|
+
<head>
|
|
676
|
+
<title>Process Report - {data_domain}</title>
|
|
677
|
+
<style>
|
|
678
|
+
body {{
|
|
679
|
+
margin: 0;
|
|
680
|
+
font-family: Arial, sans-serif;
|
|
681
|
+
}}
|
|
682
|
+
.sidebar {{
|
|
683
|
+
position: fixed;
|
|
684
|
+
top: 0;
|
|
685
|
+
left: 0;
|
|
686
|
+
width: 280px;
|
|
687
|
+
height: 100%;
|
|
688
|
+
background: #f4f4f4;
|
|
689
|
+
border-right: 1px solid #ccc;
|
|
690
|
+
padding: 20px;
|
|
691
|
+
overflow-y: auto;
|
|
692
|
+
}}
|
|
693
|
+
.sidebar h2 {{
|
|
694
|
+
font-size: 18px;
|
|
695
|
+
margin-top: 0;
|
|
696
|
+
}}
|
|
697
|
+
.sidebar ul {{
|
|
698
|
+
list-style: none;
|
|
699
|
+
padding-left: 15px;
|
|
700
|
+
}}
|
|
701
|
+
.sidebar li {{
|
|
702
|
+
margin-bottom: 4px;
|
|
703
|
+
}}
|
|
704
|
+
.sidebar a {{
|
|
705
|
+
text-decoration: none;
|
|
706
|
+
color: #007BFF;
|
|
707
|
+
font-size: 14px;
|
|
708
|
+
}}
|
|
709
|
+
.sidebar a:hover {{
|
|
710
|
+
text-decoration: underline;
|
|
711
|
+
}}
|
|
712
|
+
.content {{
|
|
713
|
+
margin-left: 300px;
|
|
714
|
+
padding: 20px;
|
|
715
|
+
}}
|
|
716
|
+
pre {{
|
|
717
|
+
background: #f8f8f8;
|
|
718
|
+
border: 1px solid #ddd;
|
|
719
|
+
padding: 10px;
|
|
720
|
+
overflow-x: auto;
|
|
721
|
+
}}
|
|
722
|
+
</style>
|
|
723
|
+
</head>
|
|
724
|
+
<body>
|
|
725
|
+
<div class="sidebar">
|
|
726
|
+
<h2>Teradata Feature Store</h2>
|
|
727
|
+
""")
|
|
728
|
+
|
|
729
|
+
# Build structured index: group by database, with sub-items
|
|
730
|
+
grouped = processes.groupby("DB")
|
|
731
|
+
for db, group in grouped:
|
|
732
|
+
report_parts.append(f"<h3>DB: {db}</h3><ul>")
|
|
733
|
+
for _, row in group.iterrows():
|
|
734
|
+
view_name = row['VIEW_NAME']
|
|
735
|
+
vw = row["VW"]
|
|
736
|
+
safe_id = re.sub(r'[^A-Za-z0-9_]+', '_', view_name)
|
|
737
|
+
|
|
738
|
+
# Main view link
|
|
739
|
+
report_parts.append(f'<li><a href="#{safe_id}">{vw}</a>')
|
|
740
|
+
|
|
741
|
+
# Sub-links: entity, features, ddl
|
|
742
|
+
report_parts.append("<ul>")
|
|
743
|
+
report_parts.append(f'<li><a href="#{safe_id}_entity">Entity</a></li>')
|
|
744
|
+
report_parts.append(f'<li><a href="#{safe_id}_features">Features</a></li>')
|
|
745
|
+
report_parts.append(f'<li><a href="#{safe_id}_ddl">DDL</a></li>')
|
|
746
|
+
report_parts.append("</ul></li>")
|
|
747
|
+
report_parts.append("</ul>")
|
|
748
|
+
|
|
749
|
+
report_parts.append("""
|
|
750
|
+
</div>
|
|
751
|
+
<div class="content">
|
|
752
|
+
""")
|
|
753
|
+
|
|
754
|
+
# Header
|
|
755
|
+
report_parts.append(f"<h1>Process Catalog Report - {data_domain}</h1>")
|
|
756
|
+
report_parts.append(f"<p><em>Generated on {readable_ts}</em></p>")
|
|
757
|
+
|
|
758
|
+
for _, row in processes.iterrows():
|
|
759
|
+
view_name = row['VIEW_NAME']
|
|
760
|
+
db, vw = row["DB"], row["VW"]
|
|
761
|
+
entity_list = row['ENTITY_ID'].split(',')
|
|
762
|
+
features_list = row['FEATURE_NAMES'].split(',')
|
|
763
|
+
|
|
764
|
+
ddl_raw = tdml.execute_sql(f"SHOW VIEW {view_name}").fetchall()[0][0]
|
|
765
|
+
ddl = sqlparse.format(ddl_raw, reindent=True, keyword_case="upper")
|
|
766
|
+
safe_id = re.sub(r'[^A-Za-z0-9_]+', '_', view_name)
|
|
767
|
+
|
|
768
|
+
if format == "html":
|
|
769
|
+
section = [f'<h2 id="{safe_id}">{db}.{vw} ({data_domain})</h2>']
|
|
770
|
+
|
|
771
|
+
section.append(f'<h3 id="{safe_id}_entity">Entity</h3><ul>')
|
|
772
|
+
section.extend([f"<li>{t}</li>" for t in entity_list])
|
|
773
|
+
section.append("</ul>")
|
|
774
|
+
|
|
775
|
+
section.append(f'<h3 id="{safe_id}_features">Features ({len(features_list)} total)</h3><ul>')
|
|
776
|
+
section.extend([f"<li>{t}</li>" for t in features_list])
|
|
777
|
+
section.append("</ul>")
|
|
778
|
+
|
|
779
|
+
section.append(f'<h3 id="{safe_id}_ddl">DDL</h3>')
|
|
780
|
+
if collapsible:
|
|
781
|
+
section.append("<details><summary>Show/Hide DDL</summary><pre><code>")
|
|
782
|
+
section.append(ddl)
|
|
783
|
+
section.append("</code></pre></details>")
|
|
784
|
+
else:
|
|
785
|
+
section.append(f"<pre><code>{ddl}</code></pre>")
|
|
786
|
+
|
|
787
|
+
report_parts.append("\n".join(section))
|
|
788
|
+
|
|
789
|
+
if format == "html":
|
|
790
|
+
report_parts.append("</div></body></html>")
|
|
791
|
+
|
|
792
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
793
|
+
f.write("\n".join(report_parts))
|
|
794
|
+
|
|
795
|
+
print(f"✅ Report generated: {os.path.abspath(output_file)}")
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=_H_VK1ezxvUSLKuIq9WKdNwZuu_iWjNcBJ9IwXYkpDo,64168
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -25,11 +25,11 @@ tdfs4ds/process_store/process_store_catalog_management.py,sha256=H135RRTYn-pyWIq
|
|
|
25
25
|
tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
|
|
26
26
|
tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH4lXdU,11073
|
|
27
27
|
tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
|
|
28
|
-
tdfs4ds/utils/lineage.py,sha256=
|
|
28
|
+
tdfs4ds/utils/lineage.py,sha256=XvoiNyrVrsVhuSZTAJrCNjEZAQ4YVsoe61aIl2fBKzk,34757
|
|
29
29
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
30
30
|
tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
|
|
31
31
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
32
|
-
tdfs4ds-0.2.4.
|
|
33
|
-
tdfs4ds-0.2.4.
|
|
34
|
-
tdfs4ds-0.2.4.
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
32
|
+
tdfs4ds-0.2.4.19.dist-info/METADATA,sha256=5ri5CNAhzx8igDa4PpsfwMQPeJSxhNsaM_c7_B8IYIs,11944
|
|
33
|
+
tdfs4ds-0.2.4.19.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
34
|
+
tdfs4ds-0.2.4.19.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
35
|
+
tdfs4ds-0.2.4.19.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|