tdfs4ds 0.2.4.18__py3-none-any.whl → 0.2.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/utils/lineage.py +248 -0
- {tdfs4ds-0.2.4.18.dist-info → tdfs4ds-0.2.4.20.dist-info}/METADATA +5 -1
- {tdfs4ds-0.2.4.18.dist-info → tdfs4ds-0.2.4.20.dist-info}/RECORD +6 -6
- {tdfs4ds-0.2.4.18.dist-info → tdfs4ds-0.2.4.20.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.18.dist-info → tdfs4ds-0.2.4.20.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
tdfs4ds/utils/lineage.py
CHANGED
|
@@ -4,6 +4,7 @@ import teradataml as tdml
|
|
|
4
4
|
import tdfs4ds
|
|
5
5
|
import tqdm
|
|
6
6
|
import networkx as nx
|
|
7
|
+
import sqlparse
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def query_change_case(query, case):
|
|
@@ -630,3 +631,250 @@ def get_ddl(view_name, schema_name, object_type='view'):
|
|
|
630
631
|
# Replace carriage returns with newlines for consistent formatting
|
|
631
632
|
return ddl.replace('\r', '\n')
|
|
632
633
|
|
|
634
|
+
import os
|
|
635
|
+
import datetime
|
|
636
|
+
import sqlparse
|
|
637
|
+
import tdfs4ds
|
|
638
|
+
import importlib.resources as pkg_resources
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def generate_process_report(
|
|
642
|
+
format="html",
|
|
643
|
+
output_file=None,
|
|
644
|
+
collapsible=False,
|
|
645
|
+
sort_by="view_name",
|
|
646
|
+
theme_mode="light",
|
|
647
|
+
company_name=None,
|
|
648
|
+
company_logo_url=None
|
|
649
|
+
):
|
|
650
|
+
"""
|
|
651
|
+
Generate a process catalog report styled after the Teradata website.
|
|
652
|
+
"""
|
|
653
|
+
|
|
654
|
+
# Retrieve processes
|
|
655
|
+
processes = tdfs4ds.process_catalog()
|
|
656
|
+
processes = processes[processes.DATA_DOMAIN == tdfs4ds.DATA_DOMAIN].to_pandas()
|
|
657
|
+
processes['VIEW'] = processes['VIEW_NAME'].apply(lambda x: x.split('.')[1].replace('"', ""))
|
|
658
|
+
|
|
659
|
+
def split_view_name(full_name):
|
|
660
|
+
db, vw = full_name.replace('"', '').split('.')
|
|
661
|
+
return db, vw
|
|
662
|
+
|
|
663
|
+
if sort_by:
|
|
664
|
+
processes = processes.copy()
|
|
665
|
+
processes["DB"], processes["VW"] = zip(*processes["VIEW_NAME"].map(split_view_name))
|
|
666
|
+
if sort_by == "database":
|
|
667
|
+
processes = processes.sort_values(["DB", "VW"])
|
|
668
|
+
elif sort_by == "view_name":
|
|
669
|
+
processes = processes.sort_values(["VW"])
|
|
670
|
+
elif sort_by == "database,view_name":
|
|
671
|
+
processes = processes.sort_values(["DB", "VW"])
|
|
672
|
+
|
|
673
|
+
# Timestamp and output file
|
|
674
|
+
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
675
|
+
if not output_file:
|
|
676
|
+
output_file = f"process_report_{tdfs4ds.DATA_DOMAIN}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
|
|
677
|
+
|
|
678
|
+
# Theme
|
|
679
|
+
if theme_mode == "dark":
|
|
680
|
+
bg_color = "#121212"
|
|
681
|
+
text_color = "#f5f5f5"
|
|
682
|
+
sidebar_bg = "#1e1e1e"
|
|
683
|
+
border_color = "#333"
|
|
684
|
+
pre_bg = "#222"
|
|
685
|
+
teradata_logo_file = "teradata_sym_rgb_wht_rev.png"
|
|
686
|
+
else:
|
|
687
|
+
bg_color = "#ffffff"
|
|
688
|
+
text_color = "#000000"
|
|
689
|
+
sidebar_bg = "#f8f9fa"
|
|
690
|
+
border_color = "#e1e1e1"
|
|
691
|
+
pre_bg = "#f4f6f8"
|
|
692
|
+
teradata_logo_file = "teradata_sym_rgb_pos.png"
|
|
693
|
+
|
|
694
|
+
# Load Teradata logo
|
|
695
|
+
with pkg_resources.path("tdfs4ds.data.logo", teradata_logo_file) as p:
|
|
696
|
+
teradata_logo_path = str(p)
|
|
697
|
+
with pkg_resources.path("tdfs4ds.data.logo", "tdfs4ds_logo.png") as p: # ensure you add this logo to package
|
|
698
|
+
tdfs4ds_logo_path = str(p)
|
|
699
|
+
|
|
700
|
+
# HTML Report
|
|
701
|
+
report_parts = []
|
|
702
|
+
|
|
703
|
+
if format == "html":
|
|
704
|
+
report_parts.append("<html><head>")
|
|
705
|
+
report_parts.append("<meta charset='utf-8'>")
|
|
706
|
+
report_parts.append("<title>Process Catalog Report</title>")
|
|
707
|
+
report_parts.append(f"""
|
|
708
|
+
<style>
|
|
709
|
+
body {{
|
|
710
|
+
font-family: "Segoe UI", Arial, sans-serif;
|
|
711
|
+
margin: 0;
|
|
712
|
+
display: flex;
|
|
713
|
+
background: {bg_color};
|
|
714
|
+
color: {text_color};
|
|
715
|
+
height: 100vh;
|
|
716
|
+
overflow: hidden;
|
|
717
|
+
}}
|
|
718
|
+
.sidebar {{
|
|
719
|
+
width: 260px;
|
|
720
|
+
background: {sidebar_bg};
|
|
721
|
+
display: flex;
|
|
722
|
+
flex-direction: column;
|
|
723
|
+
border-right: 1px solid {border_color};
|
|
724
|
+
box-shadow: 2px 0 8px rgba(0,0,0,0.05);
|
|
725
|
+
}}
|
|
726
|
+
.sidebar-header {{
|
|
727
|
+
padding: 20px;
|
|
728
|
+
text-align: center;
|
|
729
|
+
border-bottom: 1px solid {border_color};
|
|
730
|
+
}}
|
|
731
|
+
.sidebar-header img {{
|
|
732
|
+
height: 35px;
|
|
733
|
+
max-width: 90%;
|
|
734
|
+
margin: 10px 0;
|
|
735
|
+
}}
|
|
736
|
+
.sidebar-content {{
|
|
737
|
+
flex: 1;
|
|
738
|
+
overflow-y: auto;
|
|
739
|
+
padding: 20px;
|
|
740
|
+
}}
|
|
741
|
+
.content-wrapper {{
|
|
742
|
+
flex: 1;
|
|
743
|
+
display: flex;
|
|
744
|
+
flex-direction: column;
|
|
745
|
+
height: 100vh;
|
|
746
|
+
}}
|
|
747
|
+
.header {{
|
|
748
|
+
flex: 0 0 auto;
|
|
749
|
+
background: #007CBA;
|
|
750
|
+
color: white;
|
|
751
|
+
padding: 20px;
|
|
752
|
+
}}
|
|
753
|
+
.header h1 {{
|
|
754
|
+
margin: 0 0 10px;
|
|
755
|
+
}}
|
|
756
|
+
.header p {{
|
|
757
|
+
margin: 5px 0;
|
|
758
|
+
font-size: 0.95em;
|
|
759
|
+
}}
|
|
760
|
+
.content {{
|
|
761
|
+
flex: 1;
|
|
762
|
+
padding: 30px;
|
|
763
|
+
overflow-y: auto;
|
|
764
|
+
}}
|
|
765
|
+
.footer {{
|
|
766
|
+
flex: 0 0 auto;
|
|
767
|
+
padding: 15px;
|
|
768
|
+
border-top: 1px solid {border_color};
|
|
769
|
+
font-size: 0.9em;
|
|
770
|
+
text-align: center;
|
|
771
|
+
color: #666;
|
|
772
|
+
display: flex;
|
|
773
|
+
justify-content: center;
|
|
774
|
+
align-items: center;
|
|
775
|
+
gap: 20px;
|
|
776
|
+
}}
|
|
777
|
+
.footer img {{
|
|
778
|
+
height: 25px;
|
|
779
|
+
}}
|
|
780
|
+
pre {{
|
|
781
|
+
background: {pre_bg};
|
|
782
|
+
padding: 12px;
|
|
783
|
+
border-radius: 6px;
|
|
784
|
+
overflow-x: auto;
|
|
785
|
+
font-size: 0.9em;
|
|
786
|
+
}}
|
|
787
|
+
ul {{ list-style-type: none; padding-left: 15px; }}
|
|
788
|
+
a {{ text-decoration: none; color: #007CBA; }}
|
|
789
|
+
a:hover {{ text-decoration: underline; }}
|
|
790
|
+
h2, h3 {{ color: #007CBA; }}
|
|
791
|
+
</style>
|
|
792
|
+
""")
|
|
793
|
+
report_parts.append("</head><body>")
|
|
794
|
+
|
|
795
|
+
# Sidebar
|
|
796
|
+
report_parts.append("<div class='sidebar'>")
|
|
797
|
+
report_parts.append("<div class='sidebar-header'>")
|
|
798
|
+
if company_logo_url:
|
|
799
|
+
report_parts.append(f'<img src="{company_logo_url}" alt="Company logo"><br>')
|
|
800
|
+
report_parts.append(f'<img src="{teradata_logo_path}" alt="Teradata logo">')
|
|
801
|
+
report_parts.append("</div>") # end sidebar-header
|
|
802
|
+
|
|
803
|
+
# Sidebar content (index)
|
|
804
|
+
report_parts.append("<div class='sidebar-content'>")
|
|
805
|
+
report_parts.append("<h2>Process Catalog Index</h2><ul id='indexList'>")
|
|
806
|
+
for db, db_group in processes.groupby("DB"):
|
|
807
|
+
report_parts.append(f"<li><h3>DB: {db}</h3><ul>")
|
|
808
|
+
for _, row in db_group.iterrows():
|
|
809
|
+
view_id = f"{row['DB']}_{row['VW']}"
|
|
810
|
+
report_parts.append(f'<li><a href="#{view_id}">{row["VW"]}</a><ul>')
|
|
811
|
+
report_parts.append(f'<li><a href="#{view_id}_entity">Entity</a></li>')
|
|
812
|
+
report_parts.append(f'<li><a href="#{view_id}_features">Features</a></li>')
|
|
813
|
+
report_parts.append(f'<li><a href="#{view_id}_ddl">DDL</a></li>')
|
|
814
|
+
report_parts.append("</ul></li>")
|
|
815
|
+
report_parts.append("</ul></li>")
|
|
816
|
+
report_parts.append("</ul></div></div>") # close sidebar-content & sidebar
|
|
817
|
+
|
|
818
|
+
# Main content wrapper
|
|
819
|
+
report_parts.append("<div class='content-wrapper'>")
|
|
820
|
+
|
|
821
|
+
# Header
|
|
822
|
+
report_parts.append("<div class='header'>")
|
|
823
|
+
report_parts.append("<h1>Process Catalog Report</h1>")
|
|
824
|
+
report_parts.append("<p>Data Domain: Customer Transaction Analytics Time Management | "
|
|
825
|
+
f"Generated on {timestamp}</p>")
|
|
826
|
+
report_parts.append("<p>Powered by <strong>Teradata</strong> and <strong>tdfs4ds</strong></p>")
|
|
827
|
+
report_parts.append("</div>") # end header
|
|
828
|
+
|
|
829
|
+
# Content
|
|
830
|
+
report_parts.append("<div class='content'>")
|
|
831
|
+
|
|
832
|
+
# Sections per process
|
|
833
|
+
for _, row in processes.iterrows():
|
|
834
|
+
view_name = row['VIEW_NAME']
|
|
835
|
+
db, vw = split_view_name(view_name)
|
|
836
|
+
view_id = f"{db}_{vw}"
|
|
837
|
+
entity_list = row['ENTITY_ID'].split(',')
|
|
838
|
+
features_list = row['FEATURE_NAMES'].split(',')
|
|
839
|
+
process_id = row['PROCESS_ID']
|
|
840
|
+
ddl_raw = tdfs4ds.tdml.execute_sql(f"SHOW VIEW {view_name}").fetchall()[0][0]
|
|
841
|
+
ddl = sqlparse.format(ddl_raw, reindent=True, keyword_case="upper")
|
|
842
|
+
|
|
843
|
+
if format == "html":
|
|
844
|
+
section = [f'<h2 id="{view_id}">{view_name}</h2>']
|
|
845
|
+
section.append(f"<p><strong>PROCESS_ID:</strong> {process_id}</p>")
|
|
846
|
+
section.append(f'<h3 id="{view_id}_entity">Entity</h3><ul>')
|
|
847
|
+
section.extend([f"<li>{t}</li>" for t in entity_list])
|
|
848
|
+
section.append("</ul>")
|
|
849
|
+
section.append(f'<h3 id="{view_id}_features">Features ({len(features_list)} total)</h3><ul>')
|
|
850
|
+
section.extend([f"<li>{t}</li>" for t in features_list])
|
|
851
|
+
section.append("</ul>")
|
|
852
|
+
section.append(f'<h3 id="{view_id}_ddl">DDL</h3>')
|
|
853
|
+
|
|
854
|
+
if collapsible:
|
|
855
|
+
section.append("<details><summary>Show/Hide DDL</summary><pre><code>")
|
|
856
|
+
section.append(ddl)
|
|
857
|
+
section.append("</code></pre></details>")
|
|
858
|
+
else:
|
|
859
|
+
section.append(f"<pre><code>{ddl}</code></pre>")
|
|
860
|
+
|
|
861
|
+
report_parts.append("\n".join(section))
|
|
862
|
+
|
|
863
|
+
if format == "html":
|
|
864
|
+
report_parts.append("</div>") # end content
|
|
865
|
+
report_parts.append(
|
|
866
|
+
f"<div class='footer'>"
|
|
867
|
+
f'<a href="https://www.teradata.com" target="_blank">'
|
|
868
|
+
f'<img src="{teradata_logo_path}" alt="Teradata logo"></a>'
|
|
869
|
+
f'<a href="https://pypi.org/project/tdfs4ds/" target="_blank">'
|
|
870
|
+
f'<img src="{tdfs4ds_logo_path}" alt="tdfs4ds logo"></a>'
|
|
871
|
+
f"<span>© 2025 . Generated using tdfs4ds on Teradata.</span>"
|
|
872
|
+
f"</div>"
|
|
873
|
+
)
|
|
874
|
+
report_parts.append("</div></body></html>")
|
|
875
|
+
|
|
876
|
+
# Write output
|
|
877
|
+
with open(output_file, "w", encoding="utf-8") as f:
|
|
878
|
+
f.write("\n".join(report_parts))
|
|
879
|
+
|
|
880
|
+
return output_file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: tdfs4ds
|
|
3
|
-
Version: 0.2.4.
|
|
3
|
+
Version: 0.2.4.20
|
|
4
4
|
Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
|
|
5
5
|
Author: Denis Molin
|
|
6
6
|
Requires-Python: >=3.6
|
|
@@ -13,6 +13,10 @@ Requires-Dist: tqdm
|
|
|
13
13
|
Requires-Dist: networkx
|
|
14
14
|
Requires-Dist: sqlparse
|
|
15
15
|
|
|
16
|
+
<p align="center">
|
|
17
|
+
<img src="https://github.com/denismolin/tdfs4ds/blob/main/tdfs4ds_logo.png" alt="tdfs4ds logo" width="200"/>
|
|
18
|
+
</p>
|
|
19
|
+
|
|
16
20
|
# tdfs4ds : A Feature Store Library for Data Scientists working with Clearscape Analytics
|
|
17
21
|
|
|
18
22
|
The tdfs library is a Python package designed for managing and utilizing Feature Stores in a Teradata Database. With a set of easy-to-use functions, tdfs enables the efficient creation, registration, and storage of features. It also simplifies the process of preparing feature data for ingestion, building datasets for data analysis, and obtaining already existing features.
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=oGMGP13DYQowGbWRIP8xJ2RnhHqnVk5AD-t33mHrx0o,64168
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -25,11 +25,11 @@ tdfs4ds/process_store/process_store_catalog_management.py,sha256=H135RRTYn-pyWIq
|
|
|
25
25
|
tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
|
|
26
26
|
tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH4lXdU,11073
|
|
27
27
|
tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
|
|
28
|
-
tdfs4ds/utils/lineage.py,sha256=
|
|
28
|
+
tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,37839
|
|
29
29
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
30
30
|
tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
|
|
31
31
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
32
|
-
tdfs4ds-0.2.4.
|
|
33
|
-
tdfs4ds-0.2.4.
|
|
34
|
-
tdfs4ds-0.2.4.
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
32
|
+
tdfs4ds-0.2.4.20.dist-info/METADATA,sha256=niFkaOR8VVaVVYY318GZ9PwX4wjgf53Awzt_kCUWY84,12084
|
|
33
|
+
tdfs4ds-0.2.4.20.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
34
|
+
tdfs4ds-0.2.4.20.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
35
|
+
tdfs4ds-0.2.4.20.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|