tdfs4ds 0.2.4.18__py3-none-any.whl → 0.2.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tdfs4ds/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.2.4.18'
1
+ __version__ = '0.2.4.20'
2
2
  import logging
3
3
  # Setup the logger
4
4
  logging.basicConfig(
tdfs4ds/utils/lineage.py CHANGED
@@ -4,6 +4,7 @@ import teradataml as tdml
4
4
  import tdfs4ds
5
5
  import tqdm
6
6
  import networkx as nx
7
+ import sqlparse
7
8
 
8
9
 
9
10
  def query_change_case(query, case):
@@ -630,3 +631,250 @@ def get_ddl(view_name, schema_name, object_type='view'):
630
631
  # Replace carriage returns with newlines for consistent formatting
631
632
  return ddl.replace('\r', '\n')
632
633
 
634
+ import os
635
+ import datetime
636
+ import sqlparse
637
+ import tdfs4ds
638
+ import importlib.resources as pkg_resources
639
+
640
+
641
+ def generate_process_report(
642
+ format="html",
643
+ output_file=None,
644
+ collapsible=False,
645
+ sort_by="view_name",
646
+ theme_mode="light",
647
+ company_name=None,
648
+ company_logo_url=None
649
+ ):
650
+ """
651
+ Generate a process catalog report styled after the Teradata website.
652
+ """
653
+
654
+ # Retrieve processes
655
+ processes = tdfs4ds.process_catalog()
656
+ processes = processes[processes.DATA_DOMAIN == tdfs4ds.DATA_DOMAIN].to_pandas()
657
+ processes['VIEW'] = processes['VIEW_NAME'].apply(lambda x: x.split('.')[1].replace('"', ""))
658
+
659
+ def split_view_name(full_name):
660
+ db, vw = full_name.replace('"', '').split('.')
661
+ return db, vw
662
+
663
+ if sort_by:
664
+ processes = processes.copy()
665
+ processes["DB"], processes["VW"] = zip(*processes["VIEW_NAME"].map(split_view_name))
666
+ if sort_by == "database":
667
+ processes = processes.sort_values(["DB", "VW"])
668
+ elif sort_by == "view_name":
669
+ processes = processes.sort_values(["VW"])
670
+ elif sort_by == "database,view_name":
671
+ processes = processes.sort_values(["DB", "VW"])
672
+
673
+ # Timestamp and output file
674
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
675
+ if not output_file:
676
+ output_file = f"process_report_{tdfs4ds.DATA_DOMAIN}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
677
+
678
+ # Theme
679
+ if theme_mode == "dark":
680
+ bg_color = "#121212"
681
+ text_color = "#f5f5f5"
682
+ sidebar_bg = "#1e1e1e"
683
+ border_color = "#333"
684
+ pre_bg = "#222"
685
+ teradata_logo_file = "teradata_sym_rgb_wht_rev.png"
686
+ else:
687
+ bg_color = "#ffffff"
688
+ text_color = "#000000"
689
+ sidebar_bg = "#f8f9fa"
690
+ border_color = "#e1e1e1"
691
+ pre_bg = "#f4f6f8"
692
+ teradata_logo_file = "teradata_sym_rgb_pos.png"
693
+
694
+ # Load Teradata logo
695
+ with pkg_resources.path("tdfs4ds.data.logo", teradata_logo_file) as p:
696
+ teradata_logo_path = str(p)
697
+ with pkg_resources.path("tdfs4ds.data.logo", "tdfs4ds_logo.png") as p: # ensure you add this logo to package
698
+ tdfs4ds_logo_path = str(p)
699
+
700
+ # HTML Report
701
+ report_parts = []
702
+
703
+ if format == "html":
704
+ report_parts.append("<html><head>")
705
+ report_parts.append("<meta charset='utf-8'>")
706
+ report_parts.append("<title>Process Catalog Report</title>")
707
+ report_parts.append(f"""
708
+ <style>
709
+ body {{
710
+ font-family: "Segoe UI", Arial, sans-serif;
711
+ margin: 0;
712
+ display: flex;
713
+ background: {bg_color};
714
+ color: {text_color};
715
+ height: 100vh;
716
+ overflow: hidden;
717
+ }}
718
+ .sidebar {{
719
+ width: 260px;
720
+ background: {sidebar_bg};
721
+ display: flex;
722
+ flex-direction: column;
723
+ border-right: 1px solid {border_color};
724
+ box-shadow: 2px 0 8px rgba(0,0,0,0.05);
725
+ }}
726
+ .sidebar-header {{
727
+ padding: 20px;
728
+ text-align: center;
729
+ border-bottom: 1px solid {border_color};
730
+ }}
731
+ .sidebar-header img {{
732
+ height: 35px;
733
+ max-width: 90%;
734
+ margin: 10px 0;
735
+ }}
736
+ .sidebar-content {{
737
+ flex: 1;
738
+ overflow-y: auto;
739
+ padding: 20px;
740
+ }}
741
+ .content-wrapper {{
742
+ flex: 1;
743
+ display: flex;
744
+ flex-direction: column;
745
+ height: 100vh;
746
+ }}
747
+ .header {{
748
+ flex: 0 0 auto;
749
+ background: #007CBA;
750
+ color: white;
751
+ padding: 20px;
752
+ }}
753
+ .header h1 {{
754
+ margin: 0 0 10px;
755
+ }}
756
+ .header p {{
757
+ margin: 5px 0;
758
+ font-size: 0.95em;
759
+ }}
760
+ .content {{
761
+ flex: 1;
762
+ padding: 30px;
763
+ overflow-y: auto;
764
+ }}
765
+ .footer {{
766
+ flex: 0 0 auto;
767
+ padding: 15px;
768
+ border-top: 1px solid {border_color};
769
+ font-size: 0.9em;
770
+ text-align: center;
771
+ color: #666;
772
+ display: flex;
773
+ justify-content: center;
774
+ align-items: center;
775
+ gap: 20px;
776
+ }}
777
+ .footer img {{
778
+ height: 25px;
779
+ }}
780
+ pre {{
781
+ background: {pre_bg};
782
+ padding: 12px;
783
+ border-radius: 6px;
784
+ overflow-x: auto;
785
+ font-size: 0.9em;
786
+ }}
787
+ ul {{ list-style-type: none; padding-left: 15px; }}
788
+ a {{ text-decoration: none; color: #007CBA; }}
789
+ a:hover {{ text-decoration: underline; }}
790
+ h2, h3 {{ color: #007CBA; }}
791
+ </style>
792
+ """)
793
+ report_parts.append("</head><body>")
794
+
795
+ # Sidebar
796
+ report_parts.append("<div class='sidebar'>")
797
+ report_parts.append("<div class='sidebar-header'>")
798
+ if company_logo_url:
799
+ report_parts.append(f'<img src="{company_logo_url}" alt="Company logo"><br>')
800
+ report_parts.append(f'<img src="{teradata_logo_path}" alt="Teradata logo">')
801
+ report_parts.append("</div>") # end sidebar-header
802
+
803
+ # Sidebar content (index)
804
+ report_parts.append("<div class='sidebar-content'>")
805
+ report_parts.append("<h2>Process Catalog Index</h2><ul id='indexList'>")
806
+ for db, db_group in processes.groupby("DB"):
807
+ report_parts.append(f"<li><h3>DB: {db}</h3><ul>")
808
+ for _, row in db_group.iterrows():
809
+ view_id = f"{row['DB']}_{row['VW']}"
810
+ report_parts.append(f'<li><a href="#{view_id}">{row["VW"]}</a><ul>')
811
+ report_parts.append(f'<li><a href="#{view_id}_entity">Entity</a></li>')
812
+ report_parts.append(f'<li><a href="#{view_id}_features">Features</a></li>')
813
+ report_parts.append(f'<li><a href="#{view_id}_ddl">DDL</a></li>')
814
+ report_parts.append("</ul></li>")
815
+ report_parts.append("</ul></li>")
816
+ report_parts.append("</ul></div></div>") # close sidebar-content & sidebar
817
+
818
+ # Main content wrapper
819
+ report_parts.append("<div class='content-wrapper'>")
820
+
821
+ # Header
822
+ report_parts.append("<div class='header'>")
823
+ report_parts.append("<h1>Process Catalog Report</h1>")
824
+ report_parts.append("<p>Data Domain: Customer Transaction Analytics Time Management | "
825
+ f"Generated on {timestamp}</p>")
826
+ report_parts.append("<p>Powered by <strong>Teradata</strong> and <strong>tdfs4ds</strong></p>")
827
+ report_parts.append("</div>") # end header
828
+
829
+ # Content
830
+ report_parts.append("<div class='content'>")
831
+
832
+ # Sections per process
833
+ for _, row in processes.iterrows():
834
+ view_name = row['VIEW_NAME']
835
+ db, vw = split_view_name(view_name)
836
+ view_id = f"{db}_{vw}"
837
+ entity_list = row['ENTITY_ID'].split(',')
838
+ features_list = row['FEATURE_NAMES'].split(',')
839
+ process_id = row['PROCESS_ID']
840
+ ddl_raw = tdfs4ds.tdml.execute_sql(f"SHOW VIEW {view_name}").fetchall()[0][0]
841
+ ddl = sqlparse.format(ddl_raw, reindent=True, keyword_case="upper")
842
+
843
+ if format == "html":
844
+ section = [f'<h2 id="{view_id}">{view_name}</h2>']
845
+ section.append(f"<p><strong>PROCESS_ID:</strong> {process_id}</p>")
846
+ section.append(f'<h3 id="{view_id}_entity">Entity</h3><ul>')
847
+ section.extend([f"<li>{t}</li>" for t in entity_list])
848
+ section.append("</ul>")
849
+ section.append(f'<h3 id="{view_id}_features">Features ({len(features_list)} total)</h3><ul>')
850
+ section.extend([f"<li>{t}</li>" for t in features_list])
851
+ section.append("</ul>")
852
+ section.append(f'<h3 id="{view_id}_ddl">DDL</h3>')
853
+
854
+ if collapsible:
855
+ section.append("<details><summary>Show/Hide DDL</summary><pre><code>")
856
+ section.append(ddl)
857
+ section.append("</code></pre></details>")
858
+ else:
859
+ section.append(f"<pre><code>{ddl}</code></pre>")
860
+
861
+ report_parts.append("\n".join(section))
862
+
863
+ if format == "html":
864
+ report_parts.append("</div>") # end content
865
+ report_parts.append(
866
+ f"<div class='footer'>"
867
+ f'<a href="https://www.teradata.com" target="_blank">'
868
+ f'<img src="{teradata_logo_path}" alt="Teradata logo"></a>'
869
+ f'<a href="https://pypi.org/project/tdfs4ds/" target="_blank">'
870
+ f'<img src="{tdfs4ds_logo_path}" alt="tdfs4ds logo"></a>'
871
+ f"<span>© 2025 . Generated using tdfs4ds on Teradata.</span>"
872
+ f"</div>"
873
+ )
874
+ report_parts.append("</div></body></html>")
875
+
876
+ # Write output
877
+ with open(output_file, "w", encoding="utf-8") as f:
878
+ f.write("\n".join(report_parts))
879
+
880
+ return output_file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tdfs4ds
3
- Version: 0.2.4.18
3
+ Version: 0.2.4.20
4
4
  Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
5
5
  Author: Denis Molin
6
6
  Requires-Python: >=3.6
@@ -13,6 +13,10 @@ Requires-Dist: tqdm
13
13
  Requires-Dist: networkx
14
14
  Requires-Dist: sqlparse
15
15
 
16
+ <p align="center">
17
+ <img src="https://github.com/denismolin/tdfs4ds/blob/main/tdfs4ds_logo.png" alt="tdfs4ds logo" width="200"/>
18
+ </p>
19
+
16
20
  # tdfs4ds : A Feature Store Library for Data Scientists working with Clearscape Analytics
17
21
 
18
22
  The tdfs library is a Python package designed for managing and utilizing Feature Stores in a Teradata Database. With a set of easy-to-use functions, tdfs enables the efficient creation, registration, and storage of features. It also simplifies the process of preparing feature data for ingestion, building datasets for data analysis, and obtaining already existing features.
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
2
2
  tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
3
3
  tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
4
4
  tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
5
- tdfs4ds/__init__.py,sha256=pDeTVUVWwzAj5A7GxZy6KTKJkAYuzm5NbmKwhFOFa7I,64168
5
+ tdfs4ds/__init__.py,sha256=oGMGP13DYQowGbWRIP8xJ2RnhHqnVk5AD-t33mHrx0o,64168
6
6
  tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
7
7
  tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
8
8
  tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -25,11 +25,11 @@ tdfs4ds/process_store/process_store_catalog_management.py,sha256=H135RRTYn-pyWIq
25
25
  tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
26
26
  tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH4lXdU,11073
27
27
  tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
28
- tdfs4ds/utils/lineage.py,sha256=LI-5pG7D8lO3-YFa9qA6CrEackiYugV23_Vz9IpF5xw,28670
28
+ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,37839
29
29
  tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
30
30
  tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
31
31
  tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
32
- tdfs4ds-0.2.4.18.dist-info/METADATA,sha256=I3XSCDdFlXlJBYpj4B5liyIU99fkAGWstaaZGvQYDdg,11944
33
- tdfs4ds-0.2.4.18.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
- tdfs4ds-0.2.4.18.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
- tdfs4ds-0.2.4.18.dist-info/RECORD,,
32
+ tdfs4ds-0.2.4.20.dist-info/METADATA,sha256=niFkaOR8VVaVVYY318GZ9PwX4wjgf53Awzt_kCUWY84,12084
33
+ tdfs4ds-0.2.4.20.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
+ tdfs4ds-0.2.4.20.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
+ tdfs4ds-0.2.4.20.dist-info/RECORD,,