tdfs4ds 0.2.4.19__py3-none-any.whl → 0.2.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/utils/lineage.py +190 -105
- {tdfs4ds-0.2.4.19.dist-info → tdfs4ds-0.2.4.20.dist-info}/METADATA +5 -1
- {tdfs4ds-0.2.4.19.dist-info → tdfs4ds-0.2.4.20.dist-info}/RECORD +6 -6
- {tdfs4ds-0.2.4.19.dist-info → tdfs4ds-0.2.4.20.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.19.dist-info → tdfs4ds-0.2.4.20.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
tdfs4ds/utils/lineage.py
CHANGED
|
@@ -631,17 +631,27 @@ def get_ddl(view_name, schema_name, object_type='view'):
|
|
|
631
631
|
# Replace carriage returns with newlines for consistent formatting
|
|
632
632
|
return ddl.replace('\r', '\n')
|
|
633
633
|
|
|
634
|
-
from datetime import datetime
|
|
635
|
-
import sqlparse
|
|
636
|
-
import re
|
|
637
634
|
import os
|
|
638
|
-
|
|
639
|
-
|
|
635
|
+
import datetime
|
|
636
|
+
import sqlparse
|
|
637
|
+
import tdfs4ds
|
|
638
|
+
import importlib.resources as pkg_resources
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def generate_process_report(
|
|
642
|
+
format="html",
|
|
643
|
+
output_file=None,
|
|
644
|
+
collapsible=False,
|
|
645
|
+
sort_by="view_name",
|
|
646
|
+
theme_mode="light",
|
|
647
|
+
company_name=None,
|
|
648
|
+
company_logo_url=None
|
|
649
|
+
):
|
|
640
650
|
"""
|
|
641
|
-
Generate a process catalog report
|
|
642
|
-
with sub-items for Entity, Features, and DDL.
|
|
651
|
+
Generate a process catalog report styled after the Teradata website.
|
|
643
652
|
"""
|
|
644
653
|
|
|
654
|
+
# Retrieve processes
|
|
645
655
|
processes = tdfs4ds.process_catalog()
|
|
646
656
|
processes = processes[processes.DATA_DOMAIN == tdfs4ds.DATA_DOMAIN].to_pandas()
|
|
647
657
|
processes['VIEW'] = processes['VIEW_NAME'].apply(lambda x: x.split('.')[1].replace('"', ""))
|
|
@@ -650,9 +660,9 @@ def generate_process_report(format="html", output_file=None, collapsible=False,
|
|
|
650
660
|
db, vw = full_name.replace('"', '').split('.')
|
|
651
661
|
return db, vw
|
|
652
662
|
|
|
653
|
-
processes["DB"], processes["VW"] = zip(*processes["VIEW_NAME"].map(split_view_name))
|
|
654
|
-
|
|
655
663
|
if sort_by:
|
|
664
|
+
processes = processes.copy()
|
|
665
|
+
processes["DB"], processes["VW"] = zip(*processes["VIEW_NAME"].map(split_view_name))
|
|
656
666
|
if sort_by == "database":
|
|
657
667
|
processes = processes.sort_values(["DB", "VW"])
|
|
658
668
|
elif sort_by == "view_name":
|
|
@@ -660,123 +670,187 @@ def generate_process_report(format="html", output_file=None, collapsible=False,
|
|
|
660
670
|
elif sort_by == "database,view_name":
|
|
661
671
|
processes = processes.sort_values(["DB", "VW"])
|
|
662
672
|
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
673
|
+
# Timestamp and output file
|
|
674
|
+
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
675
|
+
if not output_file:
|
|
676
|
+
output_file = f"process_report_{tdfs4ds.DATA_DOMAIN}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
|
|
677
|
+
|
|
678
|
+
# Theme
|
|
679
|
+
if theme_mode == "dark":
|
|
680
|
+
bg_color = "#121212"
|
|
681
|
+
text_color = "#f5f5f5"
|
|
682
|
+
sidebar_bg = "#1e1e1e"
|
|
683
|
+
border_color = "#333"
|
|
684
|
+
pre_bg = "#222"
|
|
685
|
+
teradata_logo_file = "teradata_sym_rgb_wht_rev.png"
|
|
686
|
+
else:
|
|
687
|
+
bg_color = "#ffffff"
|
|
688
|
+
text_color = "#000000"
|
|
689
|
+
sidebar_bg = "#f8f9fa"
|
|
690
|
+
border_color = "#e1e1e1"
|
|
691
|
+
pre_bg = "#f4f6f8"
|
|
692
|
+
teradata_logo_file = "teradata_sym_rgb_pos.png"
|
|
693
|
+
|
|
694
|
+
# Load Teradata logo
|
|
695
|
+
with pkg_resources.path("tdfs4ds.data.logo", teradata_logo_file) as p:
|
|
696
|
+
teradata_logo_path = str(p)
|
|
697
|
+
with pkg_resources.path("tdfs4ds.data.logo", "tdfs4ds_logo.png") as p: # ensure you add this logo to package
|
|
698
|
+
tdfs4ds_logo_path = str(p)
|
|
699
|
+
|
|
700
|
+
# HTML Report
|
|
670
701
|
report_parts = []
|
|
671
702
|
|
|
672
703
|
if format == "html":
|
|
704
|
+
report_parts.append("<html><head>")
|
|
705
|
+
report_parts.append("<meta charset='utf-8'>")
|
|
706
|
+
report_parts.append("<title>Process Catalog Report</title>")
|
|
673
707
|
report_parts.append(f"""
|
|
674
|
-
<
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
708
|
+
<style>
|
|
709
|
+
body {{
|
|
710
|
+
font-family: "Segoe UI", Arial, sans-serif;
|
|
711
|
+
margin: 0;
|
|
712
|
+
display: flex;
|
|
713
|
+
background: {bg_color};
|
|
714
|
+
color: {text_color};
|
|
715
|
+
height: 100vh;
|
|
716
|
+
overflow: hidden;
|
|
717
|
+
}}
|
|
718
|
+
.sidebar {{
|
|
719
|
+
width: 260px;
|
|
720
|
+
background: {sidebar_bg};
|
|
721
|
+
display: flex;
|
|
722
|
+
flex-direction: column;
|
|
723
|
+
border-right: 1px solid {border_color};
|
|
724
|
+
box-shadow: 2px 0 8px rgba(0,0,0,0.05);
|
|
725
|
+
}}
|
|
726
|
+
.sidebar-header {{
|
|
727
|
+
padding: 20px;
|
|
728
|
+
text-align: center;
|
|
729
|
+
border-bottom: 1px solid {border_color};
|
|
730
|
+
}}
|
|
731
|
+
.sidebar-header img {{
|
|
732
|
+
height: 35px;
|
|
733
|
+
max-width: 90%;
|
|
734
|
+
margin: 10px 0;
|
|
735
|
+
}}
|
|
736
|
+
.sidebar-content {{
|
|
737
|
+
flex: 1;
|
|
738
|
+
overflow-y: auto;
|
|
739
|
+
padding: 20px;
|
|
740
|
+
}}
|
|
741
|
+
.content-wrapper {{
|
|
742
|
+
flex: 1;
|
|
743
|
+
display: flex;
|
|
744
|
+
flex-direction: column;
|
|
745
|
+
height: 100vh;
|
|
746
|
+
}}
|
|
747
|
+
.header {{
|
|
748
|
+
flex: 0 0 auto;
|
|
749
|
+
background: #007CBA;
|
|
750
|
+
color: white;
|
|
751
|
+
padding: 20px;
|
|
752
|
+
}}
|
|
753
|
+
.header h1 {{
|
|
754
|
+
margin: 0 0 10px;
|
|
755
|
+
}}
|
|
756
|
+
.header p {{
|
|
757
|
+
margin: 5px 0;
|
|
758
|
+
font-size: 0.95em;
|
|
759
|
+
}}
|
|
760
|
+
.content {{
|
|
761
|
+
flex: 1;
|
|
762
|
+
padding: 30px;
|
|
763
|
+
overflow-y: auto;
|
|
764
|
+
}}
|
|
765
|
+
.footer {{
|
|
766
|
+
flex: 0 0 auto;
|
|
767
|
+
padding: 15px;
|
|
768
|
+
border-top: 1px solid {border_color};
|
|
769
|
+
font-size: 0.9em;
|
|
770
|
+
text-align: center;
|
|
771
|
+
color: #666;
|
|
772
|
+
display: flex;
|
|
773
|
+
justify-content: center;
|
|
774
|
+
align-items: center;
|
|
775
|
+
gap: 20px;
|
|
776
|
+
}}
|
|
777
|
+
.footer img {{
|
|
778
|
+
height: 25px;
|
|
779
|
+
}}
|
|
780
|
+
pre {{
|
|
781
|
+
background: {pre_bg};
|
|
782
|
+
padding: 12px;
|
|
783
|
+
border-radius: 6px;
|
|
784
|
+
overflow-x: auto;
|
|
785
|
+
font-size: 0.9em;
|
|
786
|
+
}}
|
|
787
|
+
ul {{ list-style-type: none; padding-left: 15px; }}
|
|
788
|
+
a {{ text-decoration: none; color: #007CBA; }}
|
|
789
|
+
a:hover {{ text-decoration: underline; }}
|
|
790
|
+
h2, h3 {{ color: #007CBA; }}
|
|
791
|
+
</style>
|
|
727
792
|
""")
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
report_parts.append(f'<li><a href="#{
|
|
793
|
+
report_parts.append("</head><body>")
|
|
794
|
+
|
|
795
|
+
# Sidebar
|
|
796
|
+
report_parts.append("<div class='sidebar'>")
|
|
797
|
+
report_parts.append("<div class='sidebar-header'>")
|
|
798
|
+
if company_logo_url:
|
|
799
|
+
report_parts.append(f'<img src="{company_logo_url}" alt="Company logo"><br>')
|
|
800
|
+
report_parts.append(f'<img src="{teradata_logo_path}" alt="Teradata logo">')
|
|
801
|
+
report_parts.append("</div>") # end sidebar-header
|
|
802
|
+
|
|
803
|
+
# Sidebar content (index)
|
|
804
|
+
report_parts.append("<div class='sidebar-content'>")
|
|
805
|
+
report_parts.append("<h2>Process Catalog Index</h2><ul id='indexList'>")
|
|
806
|
+
for db, db_group in processes.groupby("DB"):
|
|
807
|
+
report_parts.append(f"<li><h3>DB: {db}</h3><ul>")
|
|
808
|
+
for _, row in db_group.iterrows():
|
|
809
|
+
view_id = f"{row['DB']}_{row['VW']}"
|
|
810
|
+
report_parts.append(f'<li><a href="#{view_id}">{row["VW"]}</a><ul>')
|
|
811
|
+
report_parts.append(f'<li><a href="#{view_id}_entity">Entity</a></li>')
|
|
812
|
+
report_parts.append(f'<li><a href="#{view_id}_features">Features</a></li>')
|
|
813
|
+
report_parts.append(f'<li><a href="#{view_id}_ddl">DDL</a></li>')
|
|
746
814
|
report_parts.append("</ul></li>")
|
|
747
|
-
report_parts.append("</ul>")
|
|
815
|
+
report_parts.append("</ul></li>")
|
|
816
|
+
report_parts.append("</ul></div></div>") # close sidebar-content & sidebar
|
|
748
817
|
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
<div class="content">
|
|
752
|
-
""")
|
|
818
|
+
# Main content wrapper
|
|
819
|
+
report_parts.append("<div class='content-wrapper'>")
|
|
753
820
|
|
|
754
821
|
# Header
|
|
755
|
-
report_parts.append(
|
|
756
|
-
report_parts.append(
|
|
822
|
+
report_parts.append("<div class='header'>")
|
|
823
|
+
report_parts.append("<h1>Process Catalog Report</h1>")
|
|
824
|
+
report_parts.append("<p>Data Domain: Customer Transaction Analytics Time Management | "
|
|
825
|
+
f"Generated on {timestamp}</p>")
|
|
826
|
+
report_parts.append("<p>Powered by <strong>Teradata</strong> and <strong>tdfs4ds</strong></p>")
|
|
827
|
+
report_parts.append("</div>") # end header
|
|
828
|
+
|
|
829
|
+
# Content
|
|
830
|
+
report_parts.append("<div class='content'>")
|
|
757
831
|
|
|
832
|
+
# Sections per process
|
|
758
833
|
for _, row in processes.iterrows():
|
|
759
834
|
view_name = row['VIEW_NAME']
|
|
760
|
-
db, vw =
|
|
835
|
+
db, vw = split_view_name(view_name)
|
|
836
|
+
view_id = f"{db}_{vw}"
|
|
761
837
|
entity_list = row['ENTITY_ID'].split(',')
|
|
762
838
|
features_list = row['FEATURE_NAMES'].split(',')
|
|
763
|
-
|
|
764
|
-
ddl_raw = tdml.execute_sql(f"SHOW VIEW {view_name}").fetchall()[0][0]
|
|
839
|
+
process_id = row['PROCESS_ID']
|
|
840
|
+
ddl_raw = tdfs4ds.tdml.execute_sql(f"SHOW VIEW {view_name}").fetchall()[0][0]
|
|
765
841
|
ddl = sqlparse.format(ddl_raw, reindent=True, keyword_case="upper")
|
|
766
|
-
safe_id = re.sub(r'[^A-Za-z0-9_]+', '_', view_name)
|
|
767
842
|
|
|
768
843
|
if format == "html":
|
|
769
|
-
section = [f'<h2 id="{
|
|
770
|
-
|
|
771
|
-
section.append(f'<h3 id="{
|
|
844
|
+
section = [f'<h2 id="{view_id}">{view_name}</h2>']
|
|
845
|
+
section.append(f"<p><strong>PROCESS_ID:</strong> {process_id}</p>")
|
|
846
|
+
section.append(f'<h3 id="{view_id}_entity">Entity</h3><ul>')
|
|
772
847
|
section.extend([f"<li>{t}</li>" for t in entity_list])
|
|
773
848
|
section.append("</ul>")
|
|
774
|
-
|
|
775
|
-
section.append(f'<h3 id="{safe_id}_features">Features ({len(features_list)} total)</h3><ul>')
|
|
849
|
+
section.append(f'<h3 id="{view_id}_features">Features ({len(features_list)} total)</h3><ul>')
|
|
776
850
|
section.extend([f"<li>{t}</li>" for t in features_list])
|
|
777
851
|
section.append("</ul>")
|
|
852
|
+
section.append(f'<h3 id="{view_id}_ddl">DDL</h3>')
|
|
778
853
|
|
|
779
|
-
section.append(f'<h3 id="{safe_id}_ddl">DDL</h3>')
|
|
780
854
|
if collapsible:
|
|
781
855
|
section.append("<details><summary>Show/Hide DDL</summary><pre><code>")
|
|
782
856
|
section.append(ddl)
|
|
@@ -787,9 +861,20 @@ def generate_process_report(format="html", output_file=None, collapsible=False,
|
|
|
787
861
|
report_parts.append("\n".join(section))
|
|
788
862
|
|
|
789
863
|
if format == "html":
|
|
864
|
+
report_parts.append("</div>") # end content
|
|
865
|
+
report_parts.append(
|
|
866
|
+
f"<div class='footer'>"
|
|
867
|
+
f'<a href="https://www.teradata.com" target="_blank">'
|
|
868
|
+
f'<img src="{teradata_logo_path}" alt="Teradata logo"></a>'
|
|
869
|
+
f'<a href="https://pypi.org/project/tdfs4ds/" target="_blank">'
|
|
870
|
+
f'<img src="{tdfs4ds_logo_path}" alt="tdfs4ds logo"></a>'
|
|
871
|
+
f"<span>© 2025 . Generated using tdfs4ds on Teradata.</span>"
|
|
872
|
+
f"</div>"
|
|
873
|
+
)
|
|
790
874
|
report_parts.append("</div></body></html>")
|
|
791
875
|
|
|
876
|
+
# Write output
|
|
792
877
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
793
878
|
f.write("\n".join(report_parts))
|
|
794
879
|
|
|
795
|
-
|
|
880
|
+
return output_file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: tdfs4ds
|
|
3
|
-
Version: 0.2.4.
|
|
3
|
+
Version: 0.2.4.20
|
|
4
4
|
Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
|
|
5
5
|
Author: Denis Molin
|
|
6
6
|
Requires-Python: >=3.6
|
|
@@ -13,6 +13,10 @@ Requires-Dist: tqdm
|
|
|
13
13
|
Requires-Dist: networkx
|
|
14
14
|
Requires-Dist: sqlparse
|
|
15
15
|
|
|
16
|
+
<p align="center">
|
|
17
|
+
<img src="https://github.com/denismolin/tdfs4ds/blob/main/tdfs4ds_logo.png" alt="tdfs4ds logo" width="200"/>
|
|
18
|
+
</p>
|
|
19
|
+
|
|
16
20
|
# tdfs4ds : A Feature Store Library for Data Scientists working with Clearscape Analytics
|
|
17
21
|
|
|
18
22
|
The tdfs library is a Python package designed for managing and utilizing Feature Stores in a Teradata Database. With a set of easy-to-use functions, tdfs enables the efficient creation, registration, and storage of features. It also simplifies the process of preparing feature data for ingestion, building datasets for data analysis, and obtaining already existing features.
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=oGMGP13DYQowGbWRIP8xJ2RnhHqnVk5AD-t33mHrx0o,64168
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -25,11 +25,11 @@ tdfs4ds/process_store/process_store_catalog_management.py,sha256=H135RRTYn-pyWIq
|
|
|
25
25
|
tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
|
|
26
26
|
tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH4lXdU,11073
|
|
27
27
|
tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
|
|
28
|
-
tdfs4ds/utils/lineage.py,sha256=
|
|
28
|
+
tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,37839
|
|
29
29
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
30
30
|
tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
|
|
31
31
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
32
|
-
tdfs4ds-0.2.4.
|
|
33
|
-
tdfs4ds-0.2.4.
|
|
34
|
-
tdfs4ds-0.2.4.
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
32
|
+
tdfs4ds-0.2.4.20.dist-info/METADATA,sha256=niFkaOR8VVaVVYY318GZ9PwX4wjgf53Awzt_kCUWY84,12084
|
|
33
|
+
tdfs4ds-0.2.4.20.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
34
|
+
tdfs4ds-0.2.4.20.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
35
|
+
tdfs4ds-0.2.4.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|