tdfs4ds 0.2.4.31__py3-none-any.whl → 0.2.4.33__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ def list_processes():
28
28
  return tdml.DataFrame(tdml.in_schema(tdfs4ds.SCHEMA, tdfs4ds.PROCESS_CATALOG_NAME_VIEW))
29
29
  except Exception as e:
30
30
  print(str(e))
31
- print(query)
31
+ print(tdml.DataFrame(tdml.in_schema(tdfs4ds.SCHEMA, tdfs4ds.PROCESS_CATALOG_NAME_VIEW)).show_query())
32
32
 
33
33
  def list_processes_feature_split():
34
34
  """
@@ -3,6 +3,7 @@ import tdfs4ds
3
3
  from tdfs4ds.utils.query_management import execute_query_wrapper
4
4
  import uuid
5
5
  import json
6
+ from tdfs4ds import logger,logger_safe
6
7
 
7
8
  @execute_query_wrapper
8
9
  def register_process_view(view_name, entity_id, feature_names, metadata={}, entity_null_substitute = {}, **kwargs):
@@ -74,80 +75,91 @@ def _register_process_view_merge(view_name, entity_id, feature_names, metadata={
74
75
  - Requires 'tdml' module for DataFrame operations and 'uuid' for generating unique identifiers.
75
76
  """
76
77
 
77
-
78
- # Handling the case where the view name is provided as a DataFrame
79
- if type(view_name) == tdml.dataframe.dataframe.DataFrame:
78
+ # Handle teradataml DataFrame input
79
+ if isinstance(view_name, tdml.dataframe.dataframe.DataFrame):
80
80
  try:
81
81
  view_name = view_name._table_name
82
- except:
83
- print(
84
- 'create your teradata dataframe using tdml.DataFrame(<view name>). Crystallize your view if needed')
82
+ except Exception:
83
+ logger_safe(
84
+ "error",
85
+ "Invalid DataFrame for view registration. Use: tdml.DataFrame(<table/view>). Crystallize if needed."
86
+ )
85
87
  raise
86
88
 
89
+ # Prevent using temporary teradataml views
87
90
  if view_name.split('.')[1].startswith('ml__'):
88
- tdfs4ds.logger.error('Your dataframe is a temporary teradataml dataframe. Please crystallize your view first.')
89
- raise ValueError("Invalid process view name: it starts with 'ml__'. Please consider view crystallization")
90
-
91
- # Get filter manager:
91
+ logger_safe(
92
+ "error",
93
+ "Invalid view name '%s': starts with 'ml__'. Please crystallize your view first.",
94
+ view_name
95
+ )
96
+ raise ValueError("Invalid process view name: temporary teradataml views are not allowed.")
97
+
98
+ # Get optional arguments
92
99
  filtermanager = kwargs.get('filtermanager', None)
93
- if filtermanager is None:
94
- query_upsert_filtermanager = None
95
-
96
- # Get data distribution related inputs:
97
- primary_index = kwargs.get('primary_index', [e for e in entity_id.keys()])
100
+ query_upsert_filtermanager = None
101
+ primary_index = kwargs.get('primary_index', list(entity_id.keys()))
98
102
  partitioning = kwargs.get('partitioning', '').replace("'", '"')
99
103
 
100
104
  if primary_index is None:
101
- primary_index = [e for e in entity_id.keys()]
105
+ primary_index = list(entity_id.keys())
102
106
 
107
+ feature_names = ','.join(feature_names)
103
108
 
109
+ # Validtime period
110
+ end_period_ = '9999-01-01 00:00:00' if tdfs4ds.END_PERIOD == 'UNTIL_CHANGED' else tdfs4ds.END_PERIOD
111
+ validtime_statement = (
112
+ 'CURRENT VALIDTIME'
113
+ if tdfs4ds.FEATURE_STORE_TIME is None
114
+ else f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{end_period_})'"
115
+ )
104
116
 
105
- # Joining the feature names into a comma-separated string
106
- feature_names = ','.join(feature_names)
117
+ logger_safe("info", "Registering process view: %s", view_name)
107
118
 
108
- # Setting the end period for the view
109
- if tdfs4ds.END_PERIOD == 'UNTIL_CHANGED':
110
- end_period_ = '9999-01-01 00:00:00'
111
- else:
112
- end_period_ = tdfs4ds.END_PERIOD
119
+ # Check if view already exists in catalog
120
+ query_process_id = f"""
121
+ SEL PROCESS_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW}
122
+ WHERE view_name = '{view_name}'
123
+ """
124
+ process_id_result = tdml.execute_sql(query_process_id).fetchall()
113
125
 
114
- if tdfs4ds.FEATURE_STORE_TIME == None:
115
- validtime_statement = 'CURRENT VALIDTIME'
116
- else:
117
- validtime_statement = f"VALIDTIME PERIOD '({tdfs4ds.FEATURE_STORE_TIME},{end_period_})'"
126
+ if process_id_result:
127
+ process_id = process_id_result[0][0]
128
+ logger_safe("info", "Updating existing process_id=%s", process_id)
118
129
 
130
+ query_feature_version = f"""
131
+ SEL PROCESS_VERSION FROM {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW}
132
+ WHERE view_name = '{view_name}'
133
+ """
134
+ feature_version = tdml.execute_sql(query_feature_version).fetchall()[0][0]
119
135
 
120
- query_process_id = f"SEL PROCESS_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW} WHERE view_name = '{view_name}'"
121
- process_id = tdml.execute_sql(query_process_id).fetchall()
122
- if len(process_id)>0:
123
- process_id = process_id[0][0]
124
- query_feature_version = f"SEL PROCESS_VERSION FROM {tdfs4ds.SCHEMA}.{tdfs4ds.PROCESS_CATALOG_NAME_VIEW} WHERE view_name = '{view_name}'"
125
- feature_version = tdml.execute_sql(query_feature_version).fetchall()[0][0]
126
- query_primary_index = f"SEL FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING FROM {tdfs4ds.SCHEMA}.{tdfs4ds.DATA_DISTRIBUTION_NAME} WHERE process_id = '{process_id}'"
127
- query_primary_index_res = tdml.execute_sql(query_primary_index).fetchall()
128
- if len(query_primary_index_res)>0:
129
- FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING = tdml.execute_sql(query_primary_index).fetchall()[0]
136
+ query_primary_index = f"""
137
+ SEL FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING
138
+ FROM {tdfs4ds.SCHEMA}.{tdfs4ds.DATA_DISTRIBUTION_NAME}
139
+ WHERE process_id = '{process_id}'
140
+ """
141
+ dist_res = tdml.execute_sql(query_primary_index).fetchall()
142
+ if dist_res:
143
+ FOR_PRIMARY_INDEX, FOR_DATA_PARTITIONING = dist_res[0]
130
144
  else:
131
- raise ValueError(f"""
132
- There is not information on primary index and partitioning for process: {process_id}.
133
- The working date is: {validtime_statement}
134
- The content of the distribution table is:
135
- {print(tdml.DataFrame.from_query(f"SEL * FROM {tdfs4ds.SCHEMA}.{tdfs4ds.DATA_DISTRIBUTION_NAME} WHERE process_id = '{process_id}'"))}
136
- """)
145
+ logger_safe(
146
+ "error",
147
+ "Missing data distribution info for existing process %s. Check distribution table.",
148
+ process_id
149
+ )
150
+ raise ValueError("Missing distribution info.")
137
151
  else:
138
- # Generating a unique process identifier
139
152
  process_id = str(uuid.uuid4())
140
153
  feature_version = 1
141
154
  FOR_PRIMARY_INDEX = ",".join(primary_index)
142
155
  FOR_DATA_PARTITIONING = partitioning
156
+ logger_safe("info", "Generated new process_id=%s", process_id)
143
157
 
144
- # Create a comma-separated string of entity IDs
145
- entity_id_list = list(entity_id.keys())
146
- entity_id_list.sort()
147
- ENTITY_ID__ = ','.join([k for k in entity_id_list])
158
+ # Build entity_id string
159
+ ENTITY_ID__ = ','.join(sorted(entity_id.keys()))
160
+ logger_safe("debug", "Entity IDs: %s", ENTITY_ID__)
161
+ logger_safe("debug", "Feature names: %s", feature_names)
148
162
 
149
- print('feature_version :',feature_version)
150
- print('int(feature_version) :', int(feature_version))
151
163
  if tdfs4ds.FEATURE_STORE_TIME == None:
152
164
 
153
165
 
@@ -402,16 +414,16 @@ def _register_process_view_merge(view_name, entity_id, feature_names, metadata={
402
414
  """
403
415
 
404
416
 
405
- # Logging the process registration
406
- print(f'register process with id : {process_id}')
407
- print(f"to run the process again just type : run(process_id='{process_id}')")
408
- print(f"to update your dataset : dataset = run(process_id='{process_id}',return_dataset=True)")
417
+ logger_safe("info", "Process registered: process_id=%s", process_id)
418
+ logger_safe("info", "To rerun: run(process_id='%s')", process_id)
419
+ logger_safe("info", "To build dataset: dataset = run(process_id='%s', return_dataset=True)", process_id)
409
420
 
410
- #print('query_insert_dist', query_upsert_dist)
421
+ # Return queries
411
422
  if kwargs.get('with_process_id'):
412
423
  return query_upsert, process_id, query_upsert_dist, query_upsert_filtermanager
413
424
  else:
414
425
  return query_upsert, query_upsert_dist, query_upsert_filtermanager
426
+
415
427
  @execute_query_wrapper
416
428
  def _register_process_view_update_insert(view_name, entity_id, feature_names, metadata={}, entity_null_substitute={}, **kwargs):
417
429
  """