tdfs4ds 0.2.4.19__py3-none-any.whl → 0.2.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tdfs4ds/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.2.4.19'
1
+ __version__ = '0.2.4.20'
2
2
  import logging
3
3
  # Setup the logger
4
4
  logging.basicConfig(
tdfs4ds/utils/lineage.py CHANGED
@@ -631,17 +631,27 @@ def get_ddl(view_name, schema_name, object_type='view'):
631
631
  # Replace carriage returns with newlines for consistent formatting
632
632
  return ddl.replace('\r', '\n')
633
633
 
634
- from datetime import datetime
635
- import sqlparse
636
- import re
637
634
  import os
638
-
639
- def generate_process_report(format="html", output_file=None, collapsible=False, sort_by="view_name"):
635
+ import datetime
636
+ import sqlparse
637
+ import tdfs4ds
638
+ import importlib.resources as pkg_resources
639
+
640
+
641
+ def generate_process_report(
642
+ format="html",
643
+ output_file=None,
644
+ collapsible=False,
645
+ sort_by="view_name",
646
+ theme_mode="light",
647
+ company_name=None,
648
+ company_logo_url=None
649
+ ):
640
650
  """
641
- Generate a process catalog report with sidebar index grouped by database,
642
- with sub-items for Entity, Features, and DDL.
651
+ Generate a process catalog report styled after the Teradata website.
643
652
  """
644
653
 
654
+ # Retrieve processes
645
655
  processes = tdfs4ds.process_catalog()
646
656
  processes = processes[processes.DATA_DOMAIN == tdfs4ds.DATA_DOMAIN].to_pandas()
647
657
  processes['VIEW'] = processes['VIEW_NAME'].apply(lambda x: x.split('.')[1].replace('"', ""))
@@ -650,9 +660,9 @@ def generate_process_report(format="html", output_file=None, collapsible=False,
650
660
  db, vw = full_name.replace('"', '').split('.')
651
661
  return db, vw
652
662
 
653
- processes["DB"], processes["VW"] = zip(*processes["VIEW_NAME"].map(split_view_name))
654
-
655
663
  if sort_by:
664
+ processes = processes.copy()
665
+ processes["DB"], processes["VW"] = zip(*processes["VIEW_NAME"].map(split_view_name))
656
666
  if sort_by == "database":
657
667
  processes = processes.sort_values(["DB", "VW"])
658
668
  elif sort_by == "view_name":
@@ -660,123 +670,187 @@ def generate_process_report(format="html", output_file=None, collapsible=False,
660
670
  elif sort_by == "database,view_name":
661
671
  processes = processes.sort_values(["DB", "VW"])
662
672
 
663
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
664
- readable_ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
665
- data_domain = tdfs4ds.DATA_DOMAIN
666
-
667
- if output_file is None:
668
- output_file = f"report_{data_domain}_{timestamp}.{ 'html' if format=='html' else format }"
669
-
673
+ # Timestamp and output file
674
+ timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
675
+ if not output_file:
676
+ output_file = f"process_report_{tdfs4ds.DATA_DOMAIN}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
677
+
678
+ # Theme
679
+ if theme_mode == "dark":
680
+ bg_color = "#121212"
681
+ text_color = "#f5f5f5"
682
+ sidebar_bg = "#1e1e1e"
683
+ border_color = "#333"
684
+ pre_bg = "#222"
685
+ teradata_logo_file = "teradata_sym_rgb_wht_rev.png"
686
+ else:
687
+ bg_color = "#ffffff"
688
+ text_color = "#000000"
689
+ sidebar_bg = "#f8f9fa"
690
+ border_color = "#e1e1e1"
691
+ pre_bg = "#f4f6f8"
692
+ teradata_logo_file = "teradata_sym_rgb_pos.png"
693
+
694
+ # Load Teradata logo
695
+ with pkg_resources.path("tdfs4ds.data.logo", teradata_logo_file) as p:
696
+ teradata_logo_path = str(p)
697
+ with pkg_resources.path("tdfs4ds.data.logo", "tdfs4ds_logo.png") as p: # ensure you add this logo to package
698
+ tdfs4ds_logo_path = str(p)
699
+
700
+ # HTML Report
670
701
  report_parts = []
671
702
 
672
703
  if format == "html":
704
+ report_parts.append("<html><head>")
705
+ report_parts.append("<meta charset='utf-8'>")
706
+ report_parts.append("<title>Process Catalog Report</title>")
673
707
  report_parts.append(f"""
674
- <html>
675
- <head>
676
- <title>Process Report - {data_domain}</title>
677
- <style>
678
- body {{
679
- margin: 0;
680
- font-family: Arial, sans-serif;
681
- }}
682
- .sidebar {{
683
- position: fixed;
684
- top: 0;
685
- left: 0;
686
- width: 280px;
687
- height: 100%;
688
- background: #f4f4f4;
689
- border-right: 1px solid #ccc;
690
- padding: 20px;
691
- overflow-y: auto;
692
- }}
693
- .sidebar h2 {{
694
- font-size: 18px;
695
- margin-top: 0;
696
- }}
697
- .sidebar ul {{
698
- list-style: none;
699
- padding-left: 15px;
700
- }}
701
- .sidebar li {{
702
- margin-bottom: 4px;
703
- }}
704
- .sidebar a {{
705
- text-decoration: none;
706
- color: #007BFF;
707
- font-size: 14px;
708
- }}
709
- .sidebar a:hover {{
710
- text-decoration: underline;
711
- }}
712
- .content {{
713
- margin-left: 300px;
714
- padding: 20px;
715
- }}
716
- pre {{
717
- background: #f8f8f8;
718
- border: 1px solid #ddd;
719
- padding: 10px;
720
- overflow-x: auto;
721
- }}
722
- </style>
723
- </head>
724
- <body>
725
- <div class="sidebar">
726
- <h2>Teradata Feature Store</h2>
708
+ <style>
709
+ body {{
710
+ font-family: "Segoe UI", Arial, sans-serif;
711
+ margin: 0;
712
+ display: flex;
713
+ background: {bg_color};
714
+ color: {text_color};
715
+ height: 100vh;
716
+ overflow: hidden;
717
+ }}
718
+ .sidebar {{
719
+ width: 260px;
720
+ background: {sidebar_bg};
721
+ display: flex;
722
+ flex-direction: column;
723
+ border-right: 1px solid {border_color};
724
+ box-shadow: 2px 0 8px rgba(0,0,0,0.05);
725
+ }}
726
+ .sidebar-header {{
727
+ padding: 20px;
728
+ text-align: center;
729
+ border-bottom: 1px solid {border_color};
730
+ }}
731
+ .sidebar-header img {{
732
+ height: 35px;
733
+ max-width: 90%;
734
+ margin: 10px 0;
735
+ }}
736
+ .sidebar-content {{
737
+ flex: 1;
738
+ overflow-y: auto;
739
+ padding: 20px;
740
+ }}
741
+ .content-wrapper {{
742
+ flex: 1;
743
+ display: flex;
744
+ flex-direction: column;
745
+ height: 100vh;
746
+ }}
747
+ .header {{
748
+ flex: 0 0 auto;
749
+ background: #007CBA;
750
+ color: white;
751
+ padding: 20px;
752
+ }}
753
+ .header h1 {{
754
+ margin: 0 0 10px;
755
+ }}
756
+ .header p {{
757
+ margin: 5px 0;
758
+ font-size: 0.95em;
759
+ }}
760
+ .content {{
761
+ flex: 1;
762
+ padding: 30px;
763
+ overflow-y: auto;
764
+ }}
765
+ .footer {{
766
+ flex: 0 0 auto;
767
+ padding: 15px;
768
+ border-top: 1px solid {border_color};
769
+ font-size: 0.9em;
770
+ text-align: center;
771
+ color: #666;
772
+ display: flex;
773
+ justify-content: center;
774
+ align-items: center;
775
+ gap: 20px;
776
+ }}
777
+ .footer img {{
778
+ height: 25px;
779
+ }}
780
+ pre {{
781
+ background: {pre_bg};
782
+ padding: 12px;
783
+ border-radius: 6px;
784
+ overflow-x: auto;
785
+ font-size: 0.9em;
786
+ }}
787
+ ul {{ list-style-type: none; padding-left: 15px; }}
788
+ a {{ text-decoration: none; color: #007CBA; }}
789
+ a:hover {{ text-decoration: underline; }}
790
+ h2, h3 {{ color: #007CBA; }}
791
+ </style>
727
792
  """)
728
-
729
- # Build structured index: group by database, with sub-items
730
- grouped = processes.groupby("DB")
731
- for db, group in grouped:
732
- report_parts.append(f"<h3>DB: {db}</h3><ul>")
733
- for _, row in group.iterrows():
734
- view_name = row['VIEW_NAME']
735
- vw = row["VW"]
736
- safe_id = re.sub(r'[^A-Za-z0-9_]+', '_', view_name)
737
-
738
- # Main view link
739
- report_parts.append(f'<li><a href="#{safe_id}">{vw}</a>')
740
-
741
- # Sub-links: entity, features, ddl
742
- report_parts.append("<ul>")
743
- report_parts.append(f'<li><a href="#{safe_id}_entity">Entity</a></li>')
744
- report_parts.append(f'<li><a href="#{safe_id}_features">Features</a></li>')
745
- report_parts.append(f'<li><a href="#{safe_id}_ddl">DDL</a></li>')
793
+ report_parts.append("</head><body>")
794
+
795
+ # Sidebar
796
+ report_parts.append("<div class='sidebar'>")
797
+ report_parts.append("<div class='sidebar-header'>")
798
+ if company_logo_url:
799
+ report_parts.append(f'<img src="{company_logo_url}" alt="Company logo"><br>')
800
+ report_parts.append(f'<img src="{teradata_logo_path}" alt="Teradata logo">')
801
+ report_parts.append("</div>") # end sidebar-header
802
+
803
+ # Sidebar content (index)
804
+ report_parts.append("<div class='sidebar-content'>")
805
+ report_parts.append("<h2>Process Catalog Index</h2><ul id='indexList'>")
806
+ for db, db_group in processes.groupby("DB"):
807
+ report_parts.append(f"<li><h3>DB: {db}</h3><ul>")
808
+ for _, row in db_group.iterrows():
809
+ view_id = f"{row['DB']}_{row['VW']}"
810
+ report_parts.append(f'<li><a href="#{view_id}">{row["VW"]}</a><ul>')
811
+ report_parts.append(f'<li><a href="#{view_id}_entity">Entity</a></li>')
812
+ report_parts.append(f'<li><a href="#{view_id}_features">Features</a></li>')
813
+ report_parts.append(f'<li><a href="#{view_id}_ddl">DDL</a></li>')
746
814
  report_parts.append("</ul></li>")
747
- report_parts.append("</ul>")
815
+ report_parts.append("</ul></li>")
816
+ report_parts.append("</ul></div></div>") # close sidebar-content & sidebar
748
817
 
749
- report_parts.append("""
750
- </div>
751
- <div class="content">
752
- """)
818
+ # Main content wrapper
819
+ report_parts.append("<div class='content-wrapper'>")
753
820
 
754
821
  # Header
755
- report_parts.append(f"<h1>Process Catalog Report - {data_domain}</h1>")
756
- report_parts.append(f"<p><em>Generated on {readable_ts}</em></p>")
822
+ report_parts.append("<div class='header'>")
823
+ report_parts.append("<h1>Process Catalog Report</h1>")
824
+ report_parts.append("<p>Data Domain: Customer Transaction Analytics Time Management | "
825
+ f"Generated on {timestamp}</p>")
826
+ report_parts.append("<p>Powered by <strong>Teradata</strong> and <strong>tdfs4ds</strong></p>")
827
+ report_parts.append("</div>") # end header
828
+
829
+ # Content
830
+ report_parts.append("<div class='content'>")
757
831
 
832
+ # Sections per process
758
833
  for _, row in processes.iterrows():
759
834
  view_name = row['VIEW_NAME']
760
- db, vw = row["DB"], row["VW"]
835
+ db, vw = split_view_name(view_name)
836
+ view_id = f"{db}_{vw}"
761
837
  entity_list = row['ENTITY_ID'].split(',')
762
838
  features_list = row['FEATURE_NAMES'].split(',')
763
-
764
- ddl_raw = tdml.execute_sql(f"SHOW VIEW {view_name}").fetchall()[0][0]
839
+ process_id = row['PROCESS_ID']
840
+ ddl_raw = tdfs4ds.tdml.execute_sql(f"SHOW VIEW {view_name}").fetchall()[0][0]
765
841
  ddl = sqlparse.format(ddl_raw, reindent=True, keyword_case="upper")
766
- safe_id = re.sub(r'[^A-Za-z0-9_]+', '_', view_name)
767
842
 
768
843
  if format == "html":
769
- section = [f'<h2 id="{safe_id}">{db}.{vw} ({data_domain})</h2>']
770
-
771
- section.append(f'<h3 id="{safe_id}_entity">Entity</h3><ul>')
844
+ section = [f'<h2 id="{view_id}">{view_name}</h2>']
845
+ section.append(f"<p><strong>PROCESS_ID:</strong> {process_id}</p>")
846
+ section.append(f'<h3 id="{view_id}_entity">Entity</h3><ul>')
772
847
  section.extend([f"<li>{t}</li>" for t in entity_list])
773
848
  section.append("</ul>")
774
-
775
- section.append(f'<h3 id="{safe_id}_features">Features ({len(features_list)} total)</h3><ul>')
849
+ section.append(f'<h3 id="{view_id}_features">Features ({len(features_list)} total)</h3><ul>')
776
850
  section.extend([f"<li>{t}</li>" for t in features_list])
777
851
  section.append("</ul>")
852
+ section.append(f'<h3 id="{view_id}_ddl">DDL</h3>')
778
853
 
779
- section.append(f'<h3 id="{safe_id}_ddl">DDL</h3>')
780
854
  if collapsible:
781
855
  section.append("<details><summary>Show/Hide DDL</summary><pre><code>")
782
856
  section.append(ddl)
@@ -787,9 +861,20 @@ def generate_process_report(format="html", output_file=None, collapsible=False,
787
861
  report_parts.append("\n".join(section))
788
862
 
789
863
  if format == "html":
864
+ report_parts.append("</div>") # end content
865
+ report_parts.append(
866
+ f"<div class='footer'>"
867
+ f'<a href="https://www.teradata.com" target="_blank">'
868
+ f'<img src="{teradata_logo_path}" alt="Teradata logo"></a>'
869
+ f'<a href="https://pypi.org/project/tdfs4ds/" target="_blank">'
870
+ f'<img src="{tdfs4ds_logo_path}" alt="tdfs4ds logo"></a>'
871
+ f"<span>© 2025 . Generated using tdfs4ds on Teradata.</span>"
872
+ f"</div>"
873
+ )
790
874
  report_parts.append("</div></body></html>")
791
875
 
876
+ # Write output
792
877
  with open(output_file, "w", encoding="utf-8") as f:
793
878
  f.write("\n".join(report_parts))
794
879
 
795
- print(f"✅ Report generated: {os.path.abspath(output_file)}")
880
+ return output_file
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tdfs4ds
3
- Version: 0.2.4.19
3
+ Version: 0.2.4.20
4
4
  Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
5
5
  Author: Denis Molin
6
6
  Requires-Python: >=3.6
@@ -13,6 +13,10 @@ Requires-Dist: tqdm
13
13
  Requires-Dist: networkx
14
14
  Requires-Dist: sqlparse
15
15
 
16
+ <p align="center">
17
+ <img src="https://github.com/denismolin/tdfs4ds/blob/main/tdfs4ds_logo.png" alt="tdfs4ds logo" width="200"/>
18
+ </p>
19
+
16
20
  # tdfs4ds : A Feature Store Library for Data Scientists working with Clearscape Analytics
17
21
 
18
22
  The tdfs library is a Python package designed for managing and utilizing Feature Stores in a Teradata Database. With a set of easy-to-use functions, tdfs enables the efficient creation, registration, and storage of features. It also simplifies the process of preparing feature data for ingestion, building datasets for data analysis, and obtaining already existing features.
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
2
2
  tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
3
3
  tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
4
4
  tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
5
- tdfs4ds/__init__.py,sha256=_H_VK1ezxvUSLKuIq9WKdNwZuu_iWjNcBJ9IwXYkpDo,64168
5
+ tdfs4ds/__init__.py,sha256=oGMGP13DYQowGbWRIP8xJ2RnhHqnVk5AD-t33mHrx0o,64168
6
6
  tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
7
7
  tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
8
8
  tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -25,11 +25,11 @@ tdfs4ds/process_store/process_store_catalog_management.py,sha256=H135RRTYn-pyWIq
25
25
  tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
26
26
  tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH4lXdU,11073
27
27
  tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
28
- tdfs4ds/utils/lineage.py,sha256=XvoiNyrVrsVhuSZTAJrCNjEZAQ4YVsoe61aIl2fBKzk,34757
28
+ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,37839
29
29
  tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
30
30
  tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
31
31
  tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
32
- tdfs4ds-0.2.4.19.dist-info/METADATA,sha256=5ri5CNAhzx8igDa4PpsfwMQPeJSxhNsaM_c7_B8IYIs,11944
33
- tdfs4ds-0.2.4.19.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
- tdfs4ds-0.2.4.19.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
- tdfs4ds-0.2.4.19.dist-info/RECORD,,
32
+ tdfs4ds-0.2.4.20.dist-info/METADATA,sha256=niFkaOR8VVaVVYY318GZ9PwX4wjgf53Awzt_kCUWY84,12084
33
+ tdfs4ds-0.2.4.20.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
+ tdfs4ds-0.2.4.20.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
+ tdfs4ds-0.2.4.20.dist-info/RECORD,,