tdfs4ds 0.2.4.23__py3-none-any.whl → 0.2.4.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tdfs4ds/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.2.4.23'
1
+ __version__ = '0.2.4.24'
2
2
  import logging
3
3
  # Setup the logger
4
4
  logging.basicConfig(
@@ -743,6 +743,9 @@ def GetTheLargestFeatureID():
743
743
  return feature_id
744
744
 
745
745
 
746
+ import pandas as pd
747
+ import uuid
748
+
746
749
  def GetAlreadyExistingFeatureNames(feature_name, entity_id):
747
750
  """
748
751
  Retrieves a list of already existing feature names from the feature catalog table in a Teradata database.
@@ -751,8 +754,6 @@ def GetAlreadyExistingFeatureNames(feature_name, entity_id):
751
754
  Parameters:
752
755
  - feature_name (str/list): The name(s) of the feature(s) to check. Can be a single feature name or a list of names.
753
756
  - entity_id (dict): A dictionary representing the entity ID, where keys are used to identify the entity.
754
- - schema (str): The schema name where the feature catalog table is located.
755
- - table_name (str, optional): The name of the feature catalog table. Defaults to 'FS_FEATURE_CATALOG'.
756
757
 
757
758
  Returns:
758
759
  list: A list of feature names that already exist in the feature catalog table and match the given feature name and entity ID.
@@ -761,40 +762,54 @@ def GetAlreadyExistingFeatureNames(feature_name, entity_id):
761
762
  - The function creates a temporary table in the database to facilitate the comparison.
762
763
  - It assumes that the feature catalog table exists and is accessible in the specified schema.
763
764
  - The function is designed for use with Teradata databases and assumes appropriate database access.
764
-
765
- Example Usage:
766
- >>> entity_id_dict = {'customer_id': 'INTEGER'}
767
- >>> existing_features = GetAlreadyExistingFeatureNames('age', entity_id_dict)
768
- >>> print(f"Existing features: {existing_features}")
769
765
  """
770
766
 
771
- # Create a temporary DataFrame with the feature name.
772
- list_entity = list(entity_id.keys())
773
- list_entity.sort()
767
+ # Ensure feature_name is a list
768
+ if isinstance(feature_name, str):
769
+ feature_name = [feature_name]
770
+
771
+ # Create a temporary DataFrame with the feature name(s)
772
+ list_entity = sorted(entity_id.keys())
774
773
  df = pd.DataFrame({
775
774
  'FEATURE_NAME': feature_name,
776
775
  'DATA_DOMAIN': tdfs4ds.DATA_DOMAIN,
777
- 'ENTITY_NAME': ','.join([k for k in list_entity])
776
+ 'ENTITY_NAME': ','.join(list_entity)
778
777
  })
779
778
 
780
- # Define a temporary table name.
781
- tmp_name = 'tdfs__fgjnojnsmdoignmosnig'
782
-
783
- # Copy the temporary DataFrame to a temporary table in the Teradata database.
784
- tdml.copy_to_sql(df, schema_name=tdfs4ds.SCHEMA, table_name=tmp_name, if_exists='replace',
785
- types={'FEATURE_NAME': tdml.VARCHAR(length=255, charset='LATIN')})
779
+ # Generate a unique temporary table name
780
+ tmp_name = f"tdfs_tmp_{uuid.uuid4().hex[:12]}"
786
781
 
787
- # Execute a SQL query to get the feature names that exist in both the temporary table and the feature catalog table.
788
- existing_features = list(tdml.DataFrame.from_query(f"""
789
- SEL A.FEATURE_NAME
790
- FROM {tdfs4ds.SCHEMA}.{tmp_name} A
791
- INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
792
- ON A.FEATURE_NAME = B.FEATURE_NAME
793
- AND A.ENTITY_NAME = B.ENTITY_NAME
794
- AND A.DATA_DOMAIN = B.DATA_DOMAIN
795
- """).to_pandas().FEATURE_NAME.values)
782
+ try:
783
+ # Copy the temporary DataFrame to a temporary table in the Teradata database
784
+ tdml.copy_to_sql(
785
+ df,
786
+ schema_name=tdfs4ds.SCHEMA,
787
+ table_name=tmp_name,
788
+ if_exists='replace',
789
+ types={'FEATURE_NAME': tdml.VARCHAR(length=255, charset='LATIN')}
790
+ )
791
+
792
+ # Execute a SQL query to get the feature names that exist in both the temporary table and the feature catalog table
793
+ query = f"""
794
+ SEL A.FEATURE_NAME
795
+ FROM {tdfs4ds.SCHEMA}.{tmp_name} A
796
+ INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
797
+ ON A.FEATURE_NAME = B.FEATURE_NAME
798
+ AND A.ENTITY_NAME = B.ENTITY_NAME
799
+ AND A.DATA_DOMAIN = B.DATA_DOMAIN
800
+ """
801
+ existing_features = list(
802
+ tdml.DataFrame.from_query(query).to_pandas().FEATURE_NAME.values
803
+ )
804
+
805
+ finally:
806
+ # Clean up: drop the temporary table
807
+ try:
808
+ tdml.execute(f"DROP TABLE {tdfs4ds.SCHEMA}.{tmp_name};")
809
+ except Exception as e:
810
+ # Ignore if already dropped or not found
811
+ pass
796
812
 
797
- # Return the list of existing features.
798
813
  return existing_features
799
814
 
800
815
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tdfs4ds
3
- Version: 0.2.4.23
3
+ Version: 0.2.4.24
4
4
  Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
5
5
  Author: Denis Molin
6
6
  Requires-Python: >=3.6
@@ -156,12 +156,15 @@ Here is the structure of the package:
156
156
  │ └── Function: outstanding_amounts_dataset
157
157
  │ └── Function: upload_outstanding_amounts_dataset
158
158
  ├── __init__.py
159
- │ └── Function: _build_time_series
160
159
  │ └── Function: _upload_features
160
+ │ └── Function: augment_source_with_features
161
161
  │ └── Function: build_dataset
162
- │ └── Function: build_dataset_time_series
162
+ │ └── Function: build_dataset_opt
163
163
  │ └── Function: connect
164
+ │ └── Function: dataset_catalog
164
165
  │ └── Function: feature_catalog
166
+ │ └── Function: get_dataset_entity
167
+ │ └── Function: get_dataset_features
165
168
  │ └── Function: process_catalog
166
169
  │ └── Function: roll_out
167
170
  │ └── Function: run
@@ -169,6 +172,13 @@ Here is the structure of the package:
169
172
  │ └── Function: upload_features
170
173
  │ └── Function: upload_tdstone2_scores
171
174
  └── data
175
+ └── logo
176
+ └── dataset
177
+ ├── dataset.py
178
+ │ └── Class: Dataset
179
+ ├── dataset_catalog.py
180
+ │ └── Class: DatasetCatalog
181
+ ├── __init__.py
172
182
  └── feature_store
173
183
  ├── entity_management.py
174
184
  │ └── Function: register_entity
@@ -177,56 +187,89 @@ Here is the structure of the package:
177
187
  ├── feature_data_processing.py
178
188
  │ └── Function: _store_feature_merge
179
189
  │ └── Function: _store_feature_update_insert
190
+ │ └── Function: apply_collect_stats
191
+ │ └── Function: generate_collect_stats
192
+ │ └── Function: generate_on_clause
193
+ │ └── Function: get_feature_id_and_conversion
180
194
  │ └── Function: prepare_feature_ingestion
181
195
  │ └── Function: prepare_feature_ingestion_tdstone2
182
196
  │ └── Function: store_feature
183
197
  ├── feature_query_retrieval.py
198
+ │ └── Function: generate_uuid_from_string
199
+ │ └── Function: get_available_entity_id_records
184
200
  │ └── Function: get_available_features
185
201
  │ └── Function: get_entity_tables
202
+ │ └── Function: get_feature_location
186
203
  │ └── Function: get_feature_store_content
187
204
  │ └── Function: get_feature_store_table_name
188
205
  │ └── Function: get_feature_versions
189
206
  │ └── Function: get_list_entity
190
207
  │ └── Function: get_list_features
191
208
  │ └── Function: list_features
209
+ │ └── Function: write_where_clause_filter
192
210
  ├── feature_store_management.py
193
211
  │ └── Function: GetAlreadyExistingFeatureNames
194
212
  │ └── Function: GetTheLargestFeatureID
195
213
  │ └── Function: Gettdtypes
214
+ │ └── Function: _register_features_merge
215
+ │ └── Function: _register_features_update_insert
196
216
  │ └── Function: delete_feature
197
217
  │ └── Function: feature_store_catalog_creation
218
+ │ └── Function: feature_store_catalog_view_creation
198
219
  │ └── Function: feature_store_table_creation
199
220
  │ └── Function: register_features
200
221
  │ └── Function: remove_feature
201
222
  │ └── Function: tdstone2_Gettdtypes
202
223
  ├── __init__.py
203
224
  └── process_store
225
+ ├── process_followup.py
226
+ │ └── Function: follow_up_report
227
+ │ └── Function: follow_up_table_creation
228
+ │ └── Function: followup_close
229
+ │ └── Function: followup_open
204
230
  ├── process_query_administration.py
205
231
  │ └── Function: get_process_id
206
232
  │ └── Function: list_processes
233
+ │ └── Function: list_processes_feature_split
207
234
  │ └── Function: remove_process
208
235
  ├── process_registration_management.py
236
+ │ └── Function: _register_process_view_merge
237
+ │ └── Function: _register_process_view_update_insert
209
238
  │ └── Function: register_process_tdstone
210
239
  │ └── Function: register_process_view
211
240
  ├── process_store_catalog_management.py
212
241
  │ └── Function: process_store_catalog_creation
242
+ │ └── Function: process_store_catalog_view_creation
243
+ │ └── Function: upgrade_process_catalog
213
244
  ├── __init__.py
214
245
  └── utils
246
+ ├── filter_management.py
247
+ │ └── Function: get_hidden_table_name
248
+ │ └── Class: FilterManager
215
249
  ├── info.py
250
+ │ └── Function: extract_partition_content
251
+ │ └── Function: generate_partitioning_clause
216
252
  │ └── Function: get_column_types
217
253
  │ └── Function: get_column_types_simple
254
+ │ └── Function: get_feature_types_sql_format
255
+ │ └── Function: seconds_to_dhms
256
+ │ └── Function: update_varchar_length
218
257
  ├── lineage.py
219
258
  │ └── Function: _analyze_sql_query
220
259
  │ └── Function: analyze_sql_query
221
260
  │ └── Function: crystallize_view
261
+ │ └── Function: generate_process_report
222
262
  │ └── Function: generate_view_dependency_network
223
263
  │ └── Function: generate_view_dependency_network_fs
224
264
  │ └── Function: get_ddl
265
+ │ └── Function: query_change_case
266
+ │ └── Function: query_replace
225
267
  ├── query_management.py
226
268
  │ └── Function: execute_query
227
269
  │ └── Function: execute_query_wrapper
228
270
  │ └── Function: is_version_greater_than
229
271
  ├── time_management.py
272
+ │ └── Function: get_hidden_table_name
230
273
  │ └── Class: TimeManager
231
274
  ├── visualization.py
232
275
  │ └── Function: display_table
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
2
2
  tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
3
3
  tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
4
4
  tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
5
- tdfs4ds/__init__.py,sha256=FNpQfJ5wgZez8WqMl25csWVISP_Pk38ytayVUQG-l3I,64168
5
+ tdfs4ds/__init__.py,sha256=vKZp_W-fEms4vEvXDyUSdBA4o5-YWU8cx16tq57jNPY,64168
6
6
  tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
7
7
  tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
8
8
  tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
@@ -19,7 +19,7 @@ tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaU
19
19
  tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
20
20
  tdfs4ds/feature_store/feature_data_processing.py,sha256=rvpnFrV6Tmg8C6xcSQLT_lrFYqZsdSzFXmS-4suK9qg,42847
21
21
  tdfs4ds/feature_store/feature_query_retrieval.py,sha256=zuHRZhL6-qyLpPS7mWgRy1WingSN5iibkbi53Q7jfAs,33834
22
- tdfs4ds/feature_store/feature_store_management.py,sha256=e_hBsGhtqA6vHBu2Mhy4URkYe4SFaHijXXdqqWr-3tg,56154
22
+ tdfs4ds/feature_store/feature_store_management.py,sha256=bVJbGZx1zj8Ph3q_qb939bxXSqsDl6e6SZw_GGzJ0qk,56238
23
23
  tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
24
24
  tdfs4ds/process_store/process_followup.py,sha256=PvLcU7meg3ljBlPfuez3qwTVqpHHhVJxYxGqjgiHE8E,7265
25
25
  tdfs4ds/process_store/process_query_administration.py,sha256=DsIt97cBoJ7NcpQzbQt55eUFNgXGdOMm5Hh2aX5v0PY,7762
@@ -32,7 +32,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
32
32
  tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
33
33
  tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
34
34
  tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
35
- tdfs4ds-0.2.4.23.dist-info/METADATA,sha256=scT7tts-iJvQ9OGkpgJ9vQJcbl55VdX8kiHfB-9AfAU,12028
36
- tdfs4ds-0.2.4.23.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
37
- tdfs4ds-0.2.4.23.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
38
- tdfs4ds-0.2.4.23.dist-info/RECORD,,
35
+ tdfs4ds-0.2.4.24.dist-info/METADATA,sha256=et6II_3-JvvDjxw4JJWNEMGtpMtGPpF70MKHFy3K6RM,14326
36
+ tdfs4ds-0.2.4.24.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
37
+ tdfs4ds-0.2.4.24.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
38
+ tdfs4ds-0.2.4.24.dist-info/RECORD,,