tdfs4ds 0.2.4.24__py3-none-any.whl → 0.2.4.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tdfs4ds/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.2.4.24'
1
+ __version__ = '0.2.4.25'
2
2
  import logging
3
3
  # Setup the logger
4
4
  logging.basicConfig(
@@ -43,7 +43,7 @@ class Dataset:
43
43
  return getattr(self.df, item)
44
44
  raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")
45
45
 
46
- def _retrieve_entities_and_features(self):
46
+ def _retrieve_entities_and_features_old(self):
47
47
 
48
48
  if self._get_dataset_type() == 'snapshot':
49
49
 
@@ -75,6 +75,63 @@ class Dataset:
75
75
  logger.error(f"not implemented yet for dataset type: {self._get_dataset_type()}")
76
76
  raise
77
77
 
78
+ def _retrieve_entities_and_features(self):
79
+ if self._get_dataset_type() != 'snapshot':
80
+ logger.error(f"not implemented yet for dataset type: {self._get_dataset_type()}")
81
+ raise
82
+
83
+ import re
84
+
85
+ ddl = self._get_ddl()
86
+
87
+ # Column types from the materialized dataframe
88
+ columns_types = get_feature_types_sql_format(self.df)
89
+
90
+ # Regex to capture each feature subquery:
91
+ # - grabs feature name alias, FEATURE_ID, FEATURE_VERSION
92
+ # - grabs database and view/table (quoted or unquoted)
93
+ pattern = re.compile(
94
+ r"""
95
+ SEQUENCED\s+VALIDTIME\s+SELECT
96
+ .*? # anything before the feature value
97
+ B1\.FEATURE_VALUE\s+AS\s+(?P<fname>[A-Za-z_][\w]*) # AS <feature_name>
98
+ \s+FROM\s+
99
+ (?:
100
+ "(?P<dbq>[^"]+)"\."(?P<viewq>[^"]+)" # "DB"."VIEW"
101
+ |
102
+ (?P<db>[A-Za-z_]\w*)\.(?P<view>[A-Za-z_]\w*) # DB.VIEW
103
+ )
104
+ \s+B1\s+WHERE\s*\(
105
+ \s*FEATURE_ID\s*=\s*(?P<fid>\d+)\s+
106
+ AND\s+FEATURE_VERSION\s*=\s*'(?P<fver>[^']+)'
107
+ \s*\)
108
+ """,
109
+ re.IGNORECASE | re.DOTALL | re.VERBOSE
110
+ )
111
+
112
+ features = {}
113
+ for m in pattern.finditer(ddl):
114
+ fname = m.group('fname')
115
+ fid = int(m.group('fid'))
116
+ fver = m.group('fver')
117
+ db = (m.group('dbq') or m.group('db') or '').upper()
118
+ view = (m.group('viewq') or m.group('view') or '').upper()
119
+ ftype = columns_types[fname].upper()
120
+ features[fname.upper()] = {
121
+ 'id': fid,
122
+ 'version': fver,
123
+ 'type': ftype,
124
+ 'database': db,
125
+ 'view': view
126
+ }
127
+
128
+ # Anything in the dataframe that isn't a feature column is an entity column.
129
+ feature_names_upper = set(features.keys())
130
+ entity_names = [c for c in self.df.columns if c.upper() not in feature_names_upper]
131
+ entity = {n: columns_types[n] for n in entity_names}
132
+
133
+ return entity, features
134
+
78
135
  def _get_dataset_type(self):
79
136
  return 'snapshot'
80
137
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tdfs4ds
3
- Version: 0.2.4.24
3
+ Version: 0.2.4.25
4
4
  Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
5
5
  Author: Denis Molin
6
6
  Requires-Python: >=3.6
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
2
2
  tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
3
3
  tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
4
4
  tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
5
- tdfs4ds/__init__.py,sha256=vKZp_W-fEms4vEvXDyUSdBA4o5-YWU8cx16tq57jNPY,64168
5
+ tdfs4ds/__init__.py,sha256=SLBlY2M4Z7IyvMYnd8wRWUeXJeWJhDYLD7FdWXII7lI,64168
6
6
  tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
7
7
  tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
8
8
  tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -13,7 +13,7 @@ tdfs4ds/data/logo/tdfs4ds_logo.png,sha256=OCKQnH0gQbRyupwZeiIgo-9c6mdRtjE2E2Zunr
13
13
  tdfs4ds/data/logo/teradata_sym_rgb_pos.png,sha256=Zq-QzLb04PIQ4iN8C6ssaLuNVVI1Q_TqBkFx_f7aNOI,8052
14
14
  tdfs4ds/data/logo/teradata_sym_rgb_wht_rev.png,sha256=ETznIUnS38vlHek_CzjmcjnpthfCATCp2Ww0Dx8Th3Q,7803
15
15
  tdfs4ds/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- tdfs4ds/dataset/dataset.py,sha256=caiQwT-RtdPe5MDtsynWMm1n12OxftgMp7_BR9SCHKw,5360
16
+ tdfs4ds/dataset/dataset.py,sha256=J_fgfsVdR9zSOXrUOqyotqsUD-GlQMGyuld6ueov45w,7603
17
17
  tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
18
18
  tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
19
19
  tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
@@ -32,7 +32,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
32
32
  tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
33
33
  tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
34
34
  tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
35
- tdfs4ds-0.2.4.24.dist-info/METADATA,sha256=et6II_3-JvvDjxw4JJWNEMGtpMtGPpF70MKHFy3K6RM,14326
36
- tdfs4ds-0.2.4.24.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
37
- tdfs4ds-0.2.4.24.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
38
- tdfs4ds-0.2.4.24.dist-info/RECORD,,
35
+ tdfs4ds-0.2.4.25.dist-info/METADATA,sha256=u4jn-r_y-KkfxsVcBRAQ07OQw7vkSUEwvtNp0BS2WKs,14326
36
+ tdfs4ds-0.2.4.25.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
37
+ tdfs4ds-0.2.4.25.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
38
+ tdfs4ds-0.2.4.25.dist-info/RECORD,,