tdfs4ds 0.2.4.22__py3-none-any.whl → 0.2.4.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/data/logo/tdfs4ds_logo.png +0 -0
- tdfs4ds/data/logo/teradata_sym_rgb_pos.png +0 -0
- tdfs4ds/data/logo/teradata_sym_rgb_wht_rev.png +0 -0
- tdfs4ds/feature_store/feature_store_management.py +42 -27
- {tdfs4ds-0.2.4.22.dist-info → tdfs4ds-0.2.4.24.dist-info}/METADATA +46 -3
- {tdfs4ds-0.2.4.22.dist-info → tdfs4ds-0.2.4.24.dist-info}/RECORD +9 -6
- {tdfs4ds-0.2.4.22.dist-info → tdfs4ds-0.2.4.24.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.22.dist-info → tdfs4ds-0.2.4.24.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -743,6 +743,9 @@ def GetTheLargestFeatureID():
|
|
|
743
743
|
return feature_id
|
|
744
744
|
|
|
745
745
|
|
|
746
|
+
import pandas as pd
|
|
747
|
+
import uuid
|
|
748
|
+
|
|
746
749
|
def GetAlreadyExistingFeatureNames(feature_name, entity_id):
|
|
747
750
|
"""
|
|
748
751
|
Retrieves a list of already existing feature names from the feature catalog table in a Teradata database.
|
|
@@ -751,8 +754,6 @@ def GetAlreadyExistingFeatureNames(feature_name, entity_id):
|
|
|
751
754
|
Parameters:
|
|
752
755
|
- feature_name (str/list): The name(s) of the feature(s) to check. Can be a single feature name or a list of names.
|
|
753
756
|
- entity_id (dict): A dictionary representing the entity ID, where keys are used to identify the entity.
|
|
754
|
-
- schema (str): The schema name where the feature catalog table is located.
|
|
755
|
-
- table_name (str, optional): The name of the feature catalog table. Defaults to 'FS_FEATURE_CATALOG'.
|
|
756
757
|
|
|
757
758
|
Returns:
|
|
758
759
|
list: A list of feature names that already exist in the feature catalog table and match the given feature name and entity ID.
|
|
@@ -761,40 +762,54 @@ def GetAlreadyExistingFeatureNames(feature_name, entity_id):
|
|
|
761
762
|
- The function creates a temporary table in the database to facilitate the comparison.
|
|
762
763
|
- It assumes that the feature catalog table exists and is accessible in the specified schema.
|
|
763
764
|
- The function is designed for use with Teradata databases and assumes appropriate database access.
|
|
764
|
-
|
|
765
|
-
Example Usage:
|
|
766
|
-
>>> entity_id_dict = {'customer_id': 'INTEGER'}
|
|
767
|
-
>>> existing_features = GetAlreadyExistingFeatureNames('age', entity_id_dict)
|
|
768
|
-
>>> print(f"Existing features: {existing_features}")
|
|
769
765
|
"""
|
|
770
766
|
|
|
771
|
-
#
|
|
772
|
-
|
|
773
|
-
|
|
767
|
+
# Ensure feature_name is a list
|
|
768
|
+
if isinstance(feature_name, str):
|
|
769
|
+
feature_name = [feature_name]
|
|
770
|
+
|
|
771
|
+
# Create a temporary DataFrame with the feature name(s)
|
|
772
|
+
list_entity = sorted(entity_id.keys())
|
|
774
773
|
df = pd.DataFrame({
|
|
775
774
|
'FEATURE_NAME': feature_name,
|
|
776
775
|
'DATA_DOMAIN': tdfs4ds.DATA_DOMAIN,
|
|
777
|
-
'ENTITY_NAME': ','.join(
|
|
776
|
+
'ENTITY_NAME': ','.join(list_entity)
|
|
778
777
|
})
|
|
779
778
|
|
|
780
|
-
#
|
|
781
|
-
tmp_name =
|
|
782
|
-
|
|
783
|
-
# Copy the temporary DataFrame to a temporary table in the Teradata database.
|
|
784
|
-
tdml.copy_to_sql(df, schema_name=tdfs4ds.SCHEMA, table_name=tmp_name, if_exists='replace',
|
|
785
|
-
types={'FEATURE_NAME': tdml.VARCHAR(length=255, charset='LATIN')})
|
|
779
|
+
# Generate a unique temporary table name
|
|
780
|
+
tmp_name = f"tdfs_tmp_{uuid.uuid4().hex[:12]}"
|
|
786
781
|
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
782
|
+
try:
|
|
783
|
+
# Copy the temporary DataFrame to a temporary table in the Teradata database
|
|
784
|
+
tdml.copy_to_sql(
|
|
785
|
+
df,
|
|
786
|
+
schema_name=tdfs4ds.SCHEMA,
|
|
787
|
+
table_name=tmp_name,
|
|
788
|
+
if_exists='replace',
|
|
789
|
+
types={'FEATURE_NAME': tdml.VARCHAR(length=255, charset='LATIN')}
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
# Execute a SQL query to get the feature names that exist in both the temporary table and the feature catalog table
|
|
793
|
+
query = f"""
|
|
794
|
+
SEL A.FEATURE_NAME
|
|
795
|
+
FROM {tdfs4ds.SCHEMA}.{tmp_name} A
|
|
796
|
+
INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW} B
|
|
797
|
+
ON A.FEATURE_NAME = B.FEATURE_NAME
|
|
798
|
+
AND A.ENTITY_NAME = B.ENTITY_NAME
|
|
799
|
+
AND A.DATA_DOMAIN = B.DATA_DOMAIN
|
|
800
|
+
"""
|
|
801
|
+
existing_features = list(
|
|
802
|
+
tdml.DataFrame.from_query(query).to_pandas().FEATURE_NAME.values
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
finally:
|
|
806
|
+
# Clean up: drop the temporary table
|
|
807
|
+
try:
|
|
808
|
+
tdml.execute(f"DROP TABLE {tdfs4ds.SCHEMA}.{tmp_name};")
|
|
809
|
+
except Exception as e:
|
|
810
|
+
# Ignore if already dropped or not found
|
|
811
|
+
pass
|
|
796
812
|
|
|
797
|
-
# Return the list of existing features.
|
|
798
813
|
return existing_features
|
|
799
814
|
|
|
800
815
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: tdfs4ds
|
|
3
|
-
Version: 0.2.4.
|
|
3
|
+
Version: 0.2.4.24
|
|
4
4
|
Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
|
|
5
5
|
Author: Denis Molin
|
|
6
6
|
Requires-Python: >=3.6
|
|
@@ -156,12 +156,15 @@ Here is the structure of the package:
|
|
|
156
156
|
│ └── Function: outstanding_amounts_dataset
|
|
157
157
|
│ └── Function: upload_outstanding_amounts_dataset
|
|
158
158
|
├── __init__.py
|
|
159
|
-
│ └── Function: _build_time_series
|
|
160
159
|
│ └── Function: _upload_features
|
|
160
|
+
│ └── Function: augment_source_with_features
|
|
161
161
|
│ └── Function: build_dataset
|
|
162
|
-
│ └── Function:
|
|
162
|
+
│ └── Function: build_dataset_opt
|
|
163
163
|
│ └── Function: connect
|
|
164
|
+
│ └── Function: dataset_catalog
|
|
164
165
|
│ └── Function: feature_catalog
|
|
166
|
+
│ └── Function: get_dataset_entity
|
|
167
|
+
│ └── Function: get_dataset_features
|
|
165
168
|
│ └── Function: process_catalog
|
|
166
169
|
│ └── Function: roll_out
|
|
167
170
|
│ └── Function: run
|
|
@@ -169,6 +172,13 @@ Here is the structure of the package:
|
|
|
169
172
|
│ └── Function: upload_features
|
|
170
173
|
│ └── Function: upload_tdstone2_scores
|
|
171
174
|
└── data
|
|
175
|
+
└── logo
|
|
176
|
+
└── dataset
|
|
177
|
+
├── dataset.py
|
|
178
|
+
│ └── Class: Dataset
|
|
179
|
+
├── dataset_catalog.py
|
|
180
|
+
│ └── Class: DatasetCatalog
|
|
181
|
+
├── __init__.py
|
|
172
182
|
└── feature_store
|
|
173
183
|
├── entity_management.py
|
|
174
184
|
│ └── Function: register_entity
|
|
@@ -177,56 +187,89 @@ Here is the structure of the package:
|
|
|
177
187
|
├── feature_data_processing.py
|
|
178
188
|
│ └── Function: _store_feature_merge
|
|
179
189
|
│ └── Function: _store_feature_update_insert
|
|
190
|
+
│ └── Function: apply_collect_stats
|
|
191
|
+
│ └── Function: generate_collect_stats
|
|
192
|
+
│ └── Function: generate_on_clause
|
|
193
|
+
│ └── Function: get_feature_id_and_conversion
|
|
180
194
|
│ └── Function: prepare_feature_ingestion
|
|
181
195
|
│ └── Function: prepare_feature_ingestion_tdstone2
|
|
182
196
|
│ └── Function: store_feature
|
|
183
197
|
├── feature_query_retrieval.py
|
|
198
|
+
│ └── Function: generate_uuid_from_string
|
|
199
|
+
│ └── Function: get_available_entity_id_records
|
|
184
200
|
│ └── Function: get_available_features
|
|
185
201
|
│ └── Function: get_entity_tables
|
|
202
|
+
│ └── Function: get_feature_location
|
|
186
203
|
│ └── Function: get_feature_store_content
|
|
187
204
|
│ └── Function: get_feature_store_table_name
|
|
188
205
|
│ └── Function: get_feature_versions
|
|
189
206
|
│ └── Function: get_list_entity
|
|
190
207
|
│ └── Function: get_list_features
|
|
191
208
|
│ └── Function: list_features
|
|
209
|
+
│ └── Function: write_where_clause_filter
|
|
192
210
|
├── feature_store_management.py
|
|
193
211
|
│ └── Function: GetAlreadyExistingFeatureNames
|
|
194
212
|
│ └── Function: GetTheLargestFeatureID
|
|
195
213
|
│ └── Function: Gettdtypes
|
|
214
|
+
│ └── Function: _register_features_merge
|
|
215
|
+
│ └── Function: _register_features_update_insert
|
|
196
216
|
│ └── Function: delete_feature
|
|
197
217
|
│ └── Function: feature_store_catalog_creation
|
|
218
|
+
│ └── Function: feature_store_catalog_view_creation
|
|
198
219
|
│ └── Function: feature_store_table_creation
|
|
199
220
|
│ └── Function: register_features
|
|
200
221
|
│ └── Function: remove_feature
|
|
201
222
|
│ └── Function: tdstone2_Gettdtypes
|
|
202
223
|
├── __init__.py
|
|
203
224
|
└── process_store
|
|
225
|
+
├── process_followup.py
|
|
226
|
+
│ └── Function: follow_up_report
|
|
227
|
+
│ └── Function: follow_up_table_creation
|
|
228
|
+
│ └── Function: followup_close
|
|
229
|
+
│ └── Function: followup_open
|
|
204
230
|
├── process_query_administration.py
|
|
205
231
|
│ └── Function: get_process_id
|
|
206
232
|
│ └── Function: list_processes
|
|
233
|
+
│ └── Function: list_processes_feature_split
|
|
207
234
|
│ └── Function: remove_process
|
|
208
235
|
├── process_registration_management.py
|
|
236
|
+
│ └── Function: _register_process_view_merge
|
|
237
|
+
│ └── Function: _register_process_view_update_insert
|
|
209
238
|
│ └── Function: register_process_tdstone
|
|
210
239
|
│ └── Function: register_process_view
|
|
211
240
|
├── process_store_catalog_management.py
|
|
212
241
|
│ └── Function: process_store_catalog_creation
|
|
242
|
+
│ └── Function: process_store_catalog_view_creation
|
|
243
|
+
│ └── Function: upgrade_process_catalog
|
|
213
244
|
├── __init__.py
|
|
214
245
|
└── utils
|
|
246
|
+
├── filter_management.py
|
|
247
|
+
│ └── Function: get_hidden_table_name
|
|
248
|
+
│ └── Class: FilterManager
|
|
215
249
|
├── info.py
|
|
250
|
+
│ └── Function: extract_partition_content
|
|
251
|
+
│ └── Function: generate_partitioning_clause
|
|
216
252
|
│ └── Function: get_column_types
|
|
217
253
|
│ └── Function: get_column_types_simple
|
|
254
|
+
│ └── Function: get_feature_types_sql_format
|
|
255
|
+
│ └── Function: seconds_to_dhms
|
|
256
|
+
│ └── Function: update_varchar_length
|
|
218
257
|
├── lineage.py
|
|
219
258
|
│ └── Function: _analyze_sql_query
|
|
220
259
|
│ └── Function: analyze_sql_query
|
|
221
260
|
│ └── Function: crystallize_view
|
|
261
|
+
│ └── Function: generate_process_report
|
|
222
262
|
│ └── Function: generate_view_dependency_network
|
|
223
263
|
│ └── Function: generate_view_dependency_network_fs
|
|
224
264
|
│ └── Function: get_ddl
|
|
265
|
+
│ └── Function: query_change_case
|
|
266
|
+
│ └── Function: query_replace
|
|
225
267
|
├── query_management.py
|
|
226
268
|
│ └── Function: execute_query
|
|
227
269
|
│ └── Function: execute_query_wrapper
|
|
228
270
|
│ └── Function: is_version_greater_than
|
|
229
271
|
├── time_management.py
|
|
272
|
+
│ └── Function: get_hidden_table_name
|
|
230
273
|
│ └── Class: TimeManager
|
|
231
274
|
├── visualization.py
|
|
232
275
|
│ └── Function: display_table
|
|
@@ -2,13 +2,16 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=vKZp_W-fEms4vEvXDyUSdBA4o5-YWU8cx16tq57jNPY,64168
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
9
9
|
tdfs4ds/process_store.py,sha256=W97pwqOwabo062ow_LfAXZmlSkcq8xTuwhwAX1EStlQ,16939
|
|
10
10
|
tdfs4ds/utils.py,sha256=xF1VP0NCgosXcKymOo_ofMMnvLEF228IxaxIl-f65uA,23312
|
|
11
11
|
tdfs4ds/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
12
|
+
tdfs4ds/data/logo/tdfs4ds_logo.png,sha256=OCKQnH0gQbRyupwZeiIgo-9c6mdRtjE2E2Zunr_4Ae0,363980
|
|
13
|
+
tdfs4ds/data/logo/teradata_sym_rgb_pos.png,sha256=Zq-QzLb04PIQ4iN8C6ssaLuNVVI1Q_TqBkFx_f7aNOI,8052
|
|
14
|
+
tdfs4ds/data/logo/teradata_sym_rgb_wht_rev.png,sha256=ETznIUnS38vlHek_CzjmcjnpthfCATCp2Ww0Dx8Th3Q,7803
|
|
12
15
|
tdfs4ds/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
16
|
tdfs4ds/dataset/dataset.py,sha256=caiQwT-RtdPe5MDtsynWMm1n12OxftgMp7_BR9SCHKw,5360
|
|
14
17
|
tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
|
|
@@ -16,7 +19,7 @@ tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaU
|
|
|
16
19
|
tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
|
|
17
20
|
tdfs4ds/feature_store/feature_data_processing.py,sha256=rvpnFrV6Tmg8C6xcSQLT_lrFYqZsdSzFXmS-4suK9qg,42847
|
|
18
21
|
tdfs4ds/feature_store/feature_query_retrieval.py,sha256=zuHRZhL6-qyLpPS7mWgRy1WingSN5iibkbi53Q7jfAs,33834
|
|
19
|
-
tdfs4ds/feature_store/feature_store_management.py,sha256=
|
|
22
|
+
tdfs4ds/feature_store/feature_store_management.py,sha256=bVJbGZx1zj8Ph3q_qb939bxXSqsDl6e6SZw_GGzJ0qk,56238
|
|
20
23
|
tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
|
|
21
24
|
tdfs4ds/process_store/process_followup.py,sha256=PvLcU7meg3ljBlPfuez3qwTVqpHHhVJxYxGqjgiHE8E,7265
|
|
22
25
|
tdfs4ds/process_store/process_query_administration.py,sha256=DsIt97cBoJ7NcpQzbQt55eUFNgXGdOMm5Hh2aX5v0PY,7762
|
|
@@ -29,7 +32,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
|
|
|
29
32
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
30
33
|
tdfs4ds/utils/time_management.py,sha256=1eqGs7rT3SGag0F30R3PzwiC7Aa7DKia2Ud0aSNKcPg,10593
|
|
31
34
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
32
|
-
tdfs4ds-0.2.4.
|
|
33
|
-
tdfs4ds-0.2.4.
|
|
34
|
-
tdfs4ds-0.2.4.
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
35
|
+
tdfs4ds-0.2.4.24.dist-info/METADATA,sha256=et6II_3-JvvDjxw4JJWNEMGtpMtGPpF70MKHFy3K6RM,14326
|
|
36
|
+
tdfs4ds-0.2.4.24.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
37
|
+
tdfs4ds-0.2.4.24.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
38
|
+
tdfs4ds-0.2.4.24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|