tdfs4ds 0.2.4.15__py3-none-any.whl → 0.2.4.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tdfs4ds/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.2.4.15'
1
+ __version__ = '0.2.4.16'
2
2
  import logging
3
3
  # Setup the logger
4
4
  logging.basicConfig(
@@ -165,99 +165,76 @@ def connect(
165
165
  followup_name = tdfs4ds.FOLLOW_UP_NAME,
166
166
  feature_catalog_name_view = tdfs4ds.FEATURE_CATALOG_NAME_VIEW,
167
167
  process_catalog_name_view = tdfs4ds.PROCESS_CATALOG_NAME_VIEW,
168
- dataset_catalog_name = tdfs4ds.DATASET_CATALOG_NAME
168
+ dataset_catalog_name = tdfs4ds.DATASET_CATALOG_NAME,
169
+ create_if_missing = False # New argument
169
170
  ):
170
- """
171
- Configures the database environment by setting schema names and checking the existence of specified catalog tables.
172
-
173
- This function initializes the database schema and verifies the presence of the feature catalog, process catalog,
174
- data distribution tables, and the filter manager. It updates the module-level configuration for these names if they exist.
175
- If any of the specified tables or manager do not exist, it raises an assertion error.
176
-
177
- Parameters:
178
- - database (str): The name of the database schema to use. Defaults to tdfs4ds.SCHEMA.
179
- - feature_catalog_name (str, optional): The name of the feature catalog table. Defaults to tdfs4ds.FEATURE_CATALOG_NAME.
180
- - process_catalog_name (str, optional): The name of the process catalog table. Defaults to tdfs4ds.PROCESS_CATALOG_NAME.
181
- - data_distribution_name (str, optional): The name of the data distribution table. Defaults to tdfs4ds.DATA_DISTRIBUTION_NAME.
182
- - filter_manager_name (str, optional): The name of the filter manager. Defaults to tdfs4ds.FILTER_MANAGER_NAME.
183
-
184
- Steps Performed:
185
- 1. Set the database schema to the provided 'database' name.
186
- 2. Retrieve the list of tables in the specified schema and check for the existence of the feature catalog, process catalog,
187
- data distribution tables, and filter manager.
188
- 3. Update the module-level names for these tables and manager if they exist.
189
- 4. Raise an assertion error if any of the specified tables or manager do not exist, specifying which are missing.
190
-
191
- Returns:
192
- None
193
-
194
- Raises:
195
- AssertionError: An error indicating which of the feature catalog, process catalog, data distribution table, or filter manager do not exist.
196
- """
197
171
  if database is not None:
198
172
  tdfs4ds.SCHEMA = database
199
173
  else:
200
- assert False, "database parameter is None."
174
+ raise ValueError("database parameter is None.")
201
175
 
202
176
  tables = [x.lower() for x in list(tdml.db_list_tables(schema_name=tdfs4ds.SCHEMA, object_type='table').TableName.values)]
177
+
203
178
  feature_exists = feature_catalog_name.lower() in tables
204
179
  process_exists = process_catalog_name.lower() in tables
205
180
  distrib_exists = data_distribution_name.lower() in tables
206
181
  filter_manager_exists = filter_manager_name.lower() in tables
207
182
  followup_name_exists = followup_name.lower() in tables
208
-
209
-
210
- if followup_name_exists:
211
- tdfs4ds.FOLLOW_UP_NAME = followup_name
212
- else:
213
- tdfs4ds.process_store.process_followup.follow_up_table_creation()
214
- tdfs4ds.FOLLOW_UP_NAME = followup_name
215
-
216
- if feature_exists and process_exists and distrib_exists and filter_manager_exists:
217
- tdfs4ds.FEATURE_CATALOG_NAME = feature_catalog_name
218
- tdfs4ds.PROCESS_CATALOG_NAME = process_catalog_name
219
- tdfs4ds.DATA_DISTRIBUTION_NAME = data_distribution_name
220
- tdfs4ds.FILTER_MANAGER_NAME = filter_manager_name
221
- tdfs4ds.PROCESS_CATALOG_NAME_VIEW = process_catalog_name_view
222
- tdfs4ds.FEATURE_CATALOG_NAME_VIEW = feature_catalog_name_view
223
-
224
183
 
225
- process_list = tdml.DataFrame(tdml.in_schema(database, process_catalog_name))
226
- if 'ENTITY_NULL_SUBSTITUTE' not in process_list.columns:
227
- print('ENTITY_NULL_SUBSTITUTE column does not exist in the existing process catalog')
228
- print('upgrade to the latest DDL')
229
- tdfs4ds.process_store.process_store_catalog_management.upgrade_process_catalog()
184
+ if not (feature_exists and process_exists and distrib_exists and filter_manager_exists):
185
+ if not create_if_missing:
186
+ return False # Feature store does not exist
187
+ else:
188
+ # Create the missing components
189
+ if not feature_exists:
190
+ tdfs4ds.feature_store.feature_store_management.feature_store_catalog_creation()
191
+ if not process_exists:
192
+ tdfs4ds.process_store.process_store_catalog_management.process_store_catalog_creation()
193
+ if not distrib_exists:
194
+ tdfs4ds.data_distribution.data_distribution_catalog_creation()
195
+ if not filter_manager_exists:
196
+ tdfs4ds.filter_manager.filter_manager_catalog_creation()
197
+
198
+ # Follow-up table handling
199
+ if not followup_name_exists:
200
+ tdfs4ds.process_store.process_followup.follow_up_table_creation()
201
+ tdfs4ds.FOLLOW_UP_NAME = followup_name
202
+
203
+ # Set catalog names
204
+ tdfs4ds.FEATURE_CATALOG_NAME = feature_catalog_name
205
+ tdfs4ds.PROCESS_CATALOG_NAME = process_catalog_name
206
+ tdfs4ds.DATA_DISTRIBUTION_NAME = data_distribution_name
207
+ tdfs4ds.FILTER_MANAGER_NAME = filter_manager_name
208
+ tdfs4ds.PROCESS_CATALOG_NAME_VIEW = process_catalog_name_view
209
+ tdfs4ds.FEATURE_CATALOG_NAME_VIEW = feature_catalog_name_view
210
+
211
+ process_list = tdml.DataFrame(tdml.in_schema(database, process_catalog_name))
212
+ if 'ENTITY_NULL_SUBSTITUTE' not in process_list.columns:
213
+ print('ENTITY_NULL_SUBSTITUTE column does not exist in the existing process catalog')
214
+ print('upgrade to the latest DDL')
215
+ tdfs4ds.process_store.process_store_catalog_management.upgrade_process_catalog()
230
216
 
231
- tdfs4ds.feature_store.feature_store_management.feature_store_catalog_view_creation()
232
- tdfs4ds.process_store.process_store_catalog_management.process_store_catalog_view_creation()
233
- else:
234
- missing = []
235
- if not feature_exists:
236
- missing.append("feature catalog")
237
- if not process_exists:
238
- missing.append("process catalog")
239
- if not distrib_exists:
240
- missing.append("data distribution table")
241
- if not filter_manager_exists:
242
- missing.append("filter manager")
243
- assert False, f"""{', '.join(missing)} {'do' if len(missing) > 1 else 'does'} not exist.
244
- Please run setup to create the missing table ou speficy the correct name in the arguments.
245
- type help(tdfs4ds.connect) for more information."""
217
+ tdfs4ds.feature_store.feature_store_management.feature_store_catalog_view_creation()
218
+ tdfs4ds.process_store.process_store_catalog_management.process_store_catalog_view_creation()
246
219
 
247
- def is_data_distribution_temporal():
248
- return 'PERIOD' in tdfs4ds.utils.lineage.get_ddl(view_name=tdfs4ds.DATA_DISTRIBUTION_NAME,
249
- schema_name=tdfs4ds.SCHEMA, object_type='table')
250
-
220
+ # Dataset catalog setup
251
221
  tdfs4ds.DATASET_CATALOG_NAME = dataset_catalog_name
252
222
  dataset_catalog = DatasetCatalog(schema_name=database, name=tdfs4ds.DATASET_CATALOG_NAME)
253
223
  if not dataset_catalog._exists():
254
224
  dataset_catalog.create_catalog()
255
225
 
256
- if is_data_distribution_temporal():
257
- tdfs4ds.DATA_DISTRIBUTION_TEMPORAL = True
258
- else:
259
- tdfs4ds.DATA_DISTRIBUTION_TEMPORAL = False
260
- return
226
+ # Check if distribution is temporal
227
+ def is_data_distribution_temporal():
228
+ return 'PERIOD' in tdfs4ds.utils.lineage.get_ddl(
229
+ view_name=tdfs4ds.DATA_DISTRIBUTION_NAME,
230
+ schema_name=tdfs4ds.SCHEMA,
231
+ object_type='table'
232
+ )
233
+
234
+ tdfs4ds.DATA_DISTRIBUTION_TEMPORAL = is_data_distribution_temporal()
235
+
236
+ return True # Feature store exists or was created
237
+
261
238
 
262
239
 
263
240
  def feature_catalog():
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tdfs4ds
3
- Version: 0.2.4.15
3
+ Version: 0.2.4.16
4
4
  Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
5
5
  Author: Denis Molin
6
6
  Requires-Python: >=3.6
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: teradataml >=17.20
8
+ Requires-Dist: teradataml>=17.20
9
9
  Requires-Dist: pandas
10
10
  Requires-Dist: numpy
11
11
  Requires-Dist: plotly
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
2
2
  tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
3
3
  tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
4
4
  tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
5
- tdfs4ds/__init__.py,sha256=iMvZ-8xpVJmDd1XYF_OmiBiuNbuKEh0gCT99hgJp778,65845
5
+ tdfs4ds/__init__.py,sha256=tgwWXaw1AlYeTSLOEkn4Q35xtSvITL-tK0KAevVvUcA,64168
6
6
  tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
7
7
  tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
8
8
  tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -29,7 +29,7 @@ tdfs4ds/utils/lineage.py,sha256=LI-5pG7D8lO3-YFa9qA6CrEackiYugV23_Vz9IpF5xw,2867
29
29
  tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
30
30
  tdfs4ds/utils/time_management.py,sha256=rVxtIXcFtQih2UabAtos4DK-j9MPqzYVieIz_SvySZE,9241
31
31
  tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
32
- tdfs4ds-0.2.4.15.dist-info/METADATA,sha256=Hzu1GmQ0jQ9i2kwusgrpBN3CcXDnsZxK42qALwBkVRM,11945
33
- tdfs4ds-0.2.4.15.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
- tdfs4ds-0.2.4.15.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
- tdfs4ds-0.2.4.15.dist-info/RECORD,,
32
+ tdfs4ds-0.2.4.16.dist-info/METADATA,sha256=tD09svo_gk0JMYgg1aI3L8GbzZHjsKKeCFVw7b21D98,11944
33
+ tdfs4ds-0.2.4.16.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
34
+ tdfs4ds-0.2.4.16.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
35
+ tdfs4ds-0.2.4.16.dist-info/RECORD,,