tdfs4ds 0.2.4.15__py3-none-any.whl → 0.2.4.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +52 -75
- {tdfs4ds-0.2.4.15.dist-info → tdfs4ds-0.2.4.16.dist-info}/METADATA +2 -2
- {tdfs4ds-0.2.4.15.dist-info → tdfs4ds-0.2.4.16.dist-info}/RECORD +5 -5
- {tdfs4ds-0.2.4.15.dist-info → tdfs4ds-0.2.4.16.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.15.dist-info → tdfs4ds-0.2.4.16.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
__version__ = '0.2.4.
|
|
1
|
+
__version__ = '0.2.4.16'
|
|
2
2
|
import logging
|
|
3
3
|
# Setup the logger
|
|
4
4
|
logging.basicConfig(
|
|
@@ -165,99 +165,76 @@ def connect(
|
|
|
165
165
|
followup_name = tdfs4ds.FOLLOW_UP_NAME,
|
|
166
166
|
feature_catalog_name_view = tdfs4ds.FEATURE_CATALOG_NAME_VIEW,
|
|
167
167
|
process_catalog_name_view = tdfs4ds.PROCESS_CATALOG_NAME_VIEW,
|
|
168
|
-
dataset_catalog_name = tdfs4ds.DATASET_CATALOG_NAME
|
|
168
|
+
dataset_catalog_name = tdfs4ds.DATASET_CATALOG_NAME,
|
|
169
|
+
create_if_missing = False # New argument
|
|
169
170
|
):
|
|
170
|
-
"""
|
|
171
|
-
Configures the database environment by setting schema names and checking the existence of specified catalog tables.
|
|
172
|
-
|
|
173
|
-
This function initializes the database schema and verifies the presence of the feature catalog, process catalog,
|
|
174
|
-
data distribution tables, and the filter manager. It updates the module-level configuration for these names if they exist.
|
|
175
|
-
If any of the specified tables or manager do not exist, it raises an assertion error.
|
|
176
|
-
|
|
177
|
-
Parameters:
|
|
178
|
-
- database (str): The name of the database schema to use. Defaults to tdfs4ds.SCHEMA.
|
|
179
|
-
- feature_catalog_name (str, optional): The name of the feature catalog table. Defaults to tdfs4ds.FEATURE_CATALOG_NAME.
|
|
180
|
-
- process_catalog_name (str, optional): The name of the process catalog table. Defaults to tdfs4ds.PROCESS_CATALOG_NAME.
|
|
181
|
-
- data_distribution_name (str, optional): The name of the data distribution table. Defaults to tdfs4ds.DATA_DISTRIBUTION_NAME.
|
|
182
|
-
- filter_manager_name (str, optional): The name of the filter manager. Defaults to tdfs4ds.FILTER_MANAGER_NAME.
|
|
183
|
-
|
|
184
|
-
Steps Performed:
|
|
185
|
-
1. Set the database schema to the provided 'database' name.
|
|
186
|
-
2. Retrieve the list of tables in the specified schema and check for the existence of the feature catalog, process catalog,
|
|
187
|
-
data distribution tables, and filter manager.
|
|
188
|
-
3. Update the module-level names for these tables and manager if they exist.
|
|
189
|
-
4. Raise an assertion error if any of the specified tables or manager do not exist, specifying which are missing.
|
|
190
|
-
|
|
191
|
-
Returns:
|
|
192
|
-
None
|
|
193
|
-
|
|
194
|
-
Raises:
|
|
195
|
-
AssertionError: An error indicating which of the feature catalog, process catalog, data distribution table, or filter manager do not exist.
|
|
196
|
-
"""
|
|
197
171
|
if database is not None:
|
|
198
172
|
tdfs4ds.SCHEMA = database
|
|
199
173
|
else:
|
|
200
|
-
|
|
174
|
+
raise ValueError("database parameter is None.")
|
|
201
175
|
|
|
202
176
|
tables = [x.lower() for x in list(tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA, object_type='table').TableName.values)]
|
|
177
|
+
|
|
203
178
|
feature_exists = feature_catalog_name.lower() in tables
|
|
204
179
|
process_exists = process_catalog_name.lower() in tables
|
|
205
180
|
distrib_exists = data_distribution_name.lower() in tables
|
|
206
181
|
filter_manager_exists = filter_manager_name.lower() in tables
|
|
207
182
|
followup_name_exists = followup_name.lower() in tables
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
if followup_name_exists:
|
|
211
|
-
tdfs4ds.FOLLOW_UP_NAME = followup_name
|
|
212
|
-
else:
|
|
213
|
-
tdfs4ds.process_store.process_followup.follow_up_table_creation()
|
|
214
|
-
tdfs4ds.FOLLOW_UP_NAME = followup_name
|
|
215
|
-
|
|
216
|
-
if feature_exists and process_exists and distrib_exists and filter_manager_exists:
|
|
217
|
-
tdfs4ds.FEATURE_CATALOG_NAME = feature_catalog_name
|
|
218
|
-
tdfs4ds.PROCESS_CATALOG_NAME = process_catalog_name
|
|
219
|
-
tdfs4ds.DATA_DISTRIBUTION_NAME = data_distribution_name
|
|
220
|
-
tdfs4ds.FILTER_MANAGER_NAME = filter_manager_name
|
|
221
|
-
tdfs4ds.PROCESS_CATALOG_NAME_VIEW = process_catalog_name_view
|
|
222
|
-
tdfs4ds.FEATURE_CATALOG_NAME_VIEW = feature_catalog_name_view
|
|
223
|
-
|
|
224
183
|
|
|
225
|
-
|
|
226
|
-
if
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
184
|
+
if not (feature_exists and process_exists and distrib_exists and filter_manager_exists):
|
|
185
|
+
if not create_if_missing:
|
|
186
|
+
return False # Feature store does not exist
|
|
187
|
+
else:
|
|
188
|
+
# Create the missing components
|
|
189
|
+
if not feature_exists:
|
|
190
|
+
tdfs4ds.feature_store.feature_store_management.feature_store_catalog_creation()
|
|
191
|
+
if not process_exists:
|
|
192
|
+
tdfs4ds.process_store.process_store_catalog_management.process_store_catalog_creation()
|
|
193
|
+
if not distrib_exists:
|
|
194
|
+
tdfs4ds.data_distribution.data_distribution_catalog_creation()
|
|
195
|
+
if not filter_manager_exists:
|
|
196
|
+
tdfs4ds.filter_manager.filter_manager_catalog_creation()
|
|
197
|
+
|
|
198
|
+
# Follow-up table handling
|
|
199
|
+
if not followup_name_exists:
|
|
200
|
+
tdfs4ds.process_store.process_followup.follow_up_table_creation()
|
|
201
|
+
tdfs4ds.FOLLOW_UP_NAME = followup_name
|
|
202
|
+
|
|
203
|
+
# Set catalog names
|
|
204
|
+
tdfs4ds.FEATURE_CATALOG_NAME = feature_catalog_name
|
|
205
|
+
tdfs4ds.PROCESS_CATALOG_NAME = process_catalog_name
|
|
206
|
+
tdfs4ds.DATA_DISTRIBUTION_NAME = data_distribution_name
|
|
207
|
+
tdfs4ds.FILTER_MANAGER_NAME = filter_manager_name
|
|
208
|
+
tdfs4ds.PROCESS_CATALOG_NAME_VIEW = process_catalog_name_view
|
|
209
|
+
tdfs4ds.FEATURE_CATALOG_NAME_VIEW = feature_catalog_name_view
|
|
210
|
+
|
|
211
|
+
process_list = tdml.DataFrame(tdml.in_schema(database, process_catalog_name))
|
|
212
|
+
if 'ENTITY_NULL_SUBSTITUTE' not in process_list.columns:
|
|
213
|
+
print('ENTITY_NULL_SUBSTITUTE column does not exist in the existing process catalog')
|
|
214
|
+
print('upgrade to the latest DDL')
|
|
215
|
+
tdfs4ds.process_store.process_store_catalog_management.upgrade_process_catalog()
|
|
230
216
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
else:
|
|
234
|
-
missing = []
|
|
235
|
-
if not feature_exists:
|
|
236
|
-
missing.append("feature catalog")
|
|
237
|
-
if not process_exists:
|
|
238
|
-
missing.append("process catalog")
|
|
239
|
-
if not distrib_exists:
|
|
240
|
-
missing.append("data distribution table")
|
|
241
|
-
if not filter_manager_exists:
|
|
242
|
-
missing.append("filter manager")
|
|
243
|
-
assert False, f"""{', '.join(missing)} {'do' if len(missing) > 1 else 'does'} not exist.
|
|
244
|
-
Please run setup to create the missing table ou speficy the correct name in the arguments.
|
|
245
|
-
type help(tdfs4ds.connect) for more information."""
|
|
217
|
+
tdfs4ds.feature_store.feature_store_management.feature_store_catalog_view_creation()
|
|
218
|
+
tdfs4ds.process_store.process_store_catalog_management.process_store_catalog_view_creation()
|
|
246
219
|
|
|
247
|
-
|
|
248
|
-
return 'PERIOD' in tdfs4ds.utils.lineage.get_ddl(view_name=tdfs4ds.DATA_DISTRIBUTION_NAME,
|
|
249
|
-
schema_name=tdfs4ds.SCHEMA, object_type='table')
|
|
250
|
-
|
|
220
|
+
# Dataset catalog setup
|
|
251
221
|
tdfs4ds.DATASET_CATALOG_NAME = dataset_catalog_name
|
|
252
222
|
dataset_catalog = DatasetCatalog(schema_name=database, name=tdfs4ds.DATASET_CATALOG_NAME)
|
|
253
223
|
if not dataset_catalog._exists():
|
|
254
224
|
dataset_catalog.create_catalog()
|
|
255
225
|
|
|
256
|
-
if
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
226
|
+
# Check if distribution is temporal
|
|
227
|
+
def is_data_distribution_temporal():
|
|
228
|
+
return 'PERIOD' in tdfs4ds.utils.lineage.get_ddl(
|
|
229
|
+
view_name=tdfs4ds.DATA_DISTRIBUTION_NAME,
|
|
230
|
+
schema_name=tdfs4ds.SCHEMA,
|
|
231
|
+
object_type='table'
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
tdfs4ds.DATA_DISTRIBUTION_TEMPORAL = is_data_distribution_temporal()
|
|
235
|
+
|
|
236
|
+
return True # Feature store exists or was created
|
|
237
|
+
|
|
261
238
|
|
|
262
239
|
|
|
263
240
|
def feature_catalog():
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: tdfs4ds
|
|
3
|
-
Version: 0.2.4.
|
|
3
|
+
Version: 0.2.4.16
|
|
4
4
|
Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
|
|
5
5
|
Author: Denis Molin
|
|
6
6
|
Requires-Python: >=3.6
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
|
-
Requires-Dist: teradataml
|
|
8
|
+
Requires-Dist: teradataml>=17.20
|
|
9
9
|
Requires-Dist: pandas
|
|
10
10
|
Requires-Dist: numpy
|
|
11
11
|
Requires-Dist: plotly
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=tgwWXaw1AlYeTSLOEkn4Q35xtSvITL-tK0KAevVvUcA,64168
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -29,7 +29,7 @@ tdfs4ds/utils/lineage.py,sha256=LI-5pG7D8lO3-YFa9qA6CrEackiYugV23_Vz9IpF5xw,2867
|
|
|
29
29
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
30
30
|
tdfs4ds/utils/time_management.py,sha256=rVxtIXcFtQih2UabAtos4DK-j9MPqzYVieIz_SvySZE,9241
|
|
31
31
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
32
|
-
tdfs4ds-0.2.4.
|
|
33
|
-
tdfs4ds-0.2.4.
|
|
34
|
-
tdfs4ds-0.2.4.
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
32
|
+
tdfs4ds-0.2.4.16.dist-info/METADATA,sha256=tD09svo_gk0JMYgg1aI3L8GbzZHjsKKeCFVw7b21D98,11944
|
|
33
|
+
tdfs4ds-0.2.4.16.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
34
|
+
tdfs4ds-0.2.4.16.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
35
|
+
tdfs4ds-0.2.4.16.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|