tdfs4ds 0.2.4.24__py3-none-any.whl → 0.2.4.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/dataset/dataset.py +58 -1
- {tdfs4ds-0.2.4.24.dist-info → tdfs4ds-0.2.4.25.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.24.dist-info → tdfs4ds-0.2.4.25.dist-info}/RECORD +6 -6
- {tdfs4ds-0.2.4.24.dist-info → tdfs4ds-0.2.4.25.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.24.dist-info → tdfs4ds-0.2.4.25.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
tdfs4ds/dataset/dataset.py
CHANGED
|
@@ -43,7 +43,7 @@ class Dataset:
|
|
|
43
43
|
return getattr(self.df, item)
|
|
44
44
|
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")
|
|
45
45
|
|
|
46
|
-
def
|
|
46
|
+
def _retrieve_entities_and_features_old(self):
|
|
47
47
|
|
|
48
48
|
if self._get_dataset_type() == 'snapshot':
|
|
49
49
|
|
|
@@ -75,6 +75,63 @@ class Dataset:
|
|
|
75
75
|
logger.error(f"not implemented yet for dataset type: {self._get_dataset_type()}")
|
|
76
76
|
raise
|
|
77
77
|
|
|
78
|
+
def _retrieve_entities_and_features(self):
|
|
79
|
+
if self._get_dataset_type() != 'snapshot':
|
|
80
|
+
logger.error(f"not implemented yet for dataset type: {self._get_dataset_type()}")
|
|
81
|
+
raise
|
|
82
|
+
|
|
83
|
+
import re
|
|
84
|
+
|
|
85
|
+
ddl = self._get_ddl()
|
|
86
|
+
|
|
87
|
+
# Column types from the materialized dataframe
|
|
88
|
+
columns_types = get_feature_types_sql_format(self.df)
|
|
89
|
+
|
|
90
|
+
# Regex to capture each feature subquery:
|
|
91
|
+
# - grabs feature name alias, FEATURE_ID, FEATURE_VERSION
|
|
92
|
+
# - grabs database and view/table (quoted or unquoted)
|
|
93
|
+
pattern = re.compile(
|
|
94
|
+
r"""
|
|
95
|
+
SEQUENCED\s+VALIDTIME\s+SELECT
|
|
96
|
+
.*? # anything before the feature value
|
|
97
|
+
B1\.FEATURE_VALUE\s+AS\s+(?P<fname>[A-Za-z_][\w]*) # AS <feature_name>
|
|
98
|
+
\s+FROM\s+
|
|
99
|
+
(?:
|
|
100
|
+
"(?P<dbq>[^"]+)"\."(?P<viewq>[^"]+)" # "DB"."VIEW"
|
|
101
|
+
|
|
|
102
|
+
(?P<db>[A-Za-z_]\w*)\.(?P<view>[A-Za-z_]\w*) # DB.VIEW
|
|
103
|
+
)
|
|
104
|
+
\s+B1\s+WHERE\s*\(
|
|
105
|
+
\s*FEATURE_ID\s*=\s*(?P<fid>\d+)\s+
|
|
106
|
+
AND\s+FEATURE_VERSION\s*=\s*'(?P<fver>[^']+)'
|
|
107
|
+
\s*\)
|
|
108
|
+
""",
|
|
109
|
+
re.IGNORECASE | re.DOTALL | re.VERBOSE
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
features = {}
|
|
113
|
+
for m in pattern.finditer(ddl):
|
|
114
|
+
fname = m.group('fname')
|
|
115
|
+
fid = int(m.group('fid'))
|
|
116
|
+
fver = m.group('fver')
|
|
117
|
+
db = (m.group('dbq') or m.group('db') or '').upper()
|
|
118
|
+
view = (m.group('viewq') or m.group('view') or '').upper()
|
|
119
|
+
ftype = columns_types[fname].upper()
|
|
120
|
+
features[fname.upper()] = {
|
|
121
|
+
'id': fid,
|
|
122
|
+
'version': fver,
|
|
123
|
+
'type': ftype,
|
|
124
|
+
'database': db,
|
|
125
|
+
'view': view
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
# Anything in the dataframe that isn't a feature column is an entity column.
|
|
129
|
+
feature_names_upper = set(features.keys())
|
|
130
|
+
entity_names = [c for c in self.df.columns if c.upper() not in feature_names_upper]
|
|
131
|
+
entity = {n: columns_types[n] for n in entity_names}
|
|
132
|
+
|
|
133
|
+
return entity, features
|
|
134
|
+
|
|
78
135
|
def _get_dataset_type(self):
|
|
79
136
|
return 'snapshot'
|
|
80
137
|
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=SLBlY2M4Z7IyvMYnd8wRWUeXJeWJhDYLD7FdWXII7lI,64168
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -13,7 +13,7 @@ tdfs4ds/data/logo/tdfs4ds_logo.png,sha256=OCKQnH0gQbRyupwZeiIgo-9c6mdRtjE2E2Zunr
|
|
|
13
13
|
tdfs4ds/data/logo/teradata_sym_rgb_pos.png,sha256=Zq-QzLb04PIQ4iN8C6ssaLuNVVI1Q_TqBkFx_f7aNOI,8052
|
|
14
14
|
tdfs4ds/data/logo/teradata_sym_rgb_wht_rev.png,sha256=ETznIUnS38vlHek_CzjmcjnpthfCATCp2Ww0Dx8Th3Q,7803
|
|
15
15
|
tdfs4ds/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
-
tdfs4ds/dataset/dataset.py,sha256=
|
|
16
|
+
tdfs4ds/dataset/dataset.py,sha256=J_fgfsVdR9zSOXrUOqyotqsUD-GlQMGyuld6ueov45w,7603
|
|
17
17
|
tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
|
|
18
18
|
tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
|
|
19
19
|
tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
|
|
@@ -32,7 +32,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
|
|
|
32
32
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
33
33
|
tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
|
|
34
34
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
36
|
-
tdfs4ds-0.2.4.
|
|
37
|
-
tdfs4ds-0.2.4.
|
|
38
|
-
tdfs4ds-0.2.4.
|
|
35
|
+
tdfs4ds-0.2.4.25.dist-info/METADATA,sha256=u4jn-r_y-KkfxsVcBRAQ07OQw7vkSUEwvtNp0BS2WKs,14326
|
|
36
|
+
tdfs4ds-0.2.4.25.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
37
|
+
tdfs4ds-0.2.4.25.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
38
|
+
tdfs4ds-0.2.4.25.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|