tdfs4ds 0.2.4.12__py3-none-any.whl → 0.2.4.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/feature_store/feature_data_processing.py +8 -0
- tdfs4ds/utils/info.py +2 -10
- {tdfs4ds-0.2.4.12.dist-info → tdfs4ds-0.2.4.13.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.4.12.dist-info → tdfs4ds-0.2.4.13.dist-info}/RECORD +7 -7
- {tdfs4ds-0.2.4.12.dist-info → tdfs4ds-0.2.4.13.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.4.12.dist-info → tdfs4ds-0.2.4.13.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
|
@@ -329,6 +329,14 @@ def prepare_feature_ingestion(df, entity_id, feature_names, feature_versions=Non
|
|
|
329
329
|
nb_duplicates = tdml.execute_sql(query_test_unicity).fetchall()[0][0]
|
|
330
330
|
if nb_duplicates is not None and nb_duplicates > 0:
|
|
331
331
|
tdfs4ds.logger.error(f"The process generates {nb_duplicates} duplicates")
|
|
332
|
+
query_test_unicity = f"""
|
|
333
|
+
SELECT TOP 3
|
|
334
|
+
{output_columns_unicity}
|
|
335
|
+
, count(*) as n
|
|
336
|
+
FROM {_get_database_username()}.{volatile_table_name}
|
|
337
|
+
GROUP BY {output_columns_unicity}
|
|
338
|
+
HAVING n > 1
|
|
339
|
+
"""
|
|
332
340
|
raise ValueError("Invalid process: the process generates duplicates.")
|
|
333
341
|
#tdfs4ds.logger.info(f"No duplicate found.")
|
|
334
342
|
except Exception as e:
|
tdfs4ds/utils/info.py
CHANGED
|
@@ -137,19 +137,11 @@ def extract_partition_content(partitioning):
|
|
|
137
137
|
Returns:
|
|
138
138
|
str: The content within the parentheses after 'PARTITION BY', or None if no match is found.
|
|
139
139
|
"""
|
|
140
|
-
|
|
141
|
-
pattern = r'PARTITION\s+BY\s*\(\s*(.*?)\s*\)'
|
|
140
|
+
pattern = r'PARTITION\s+BY\s*\((.*)\)' # Matches content within outer parentheses after PARTITION BY
|
|
142
141
|
match = re.search(pattern, partitioning, re.DOTALL)
|
|
143
142
|
|
|
144
143
|
if match:
|
|
145
|
-
|
|
146
|
-
# Second extraction: Get the content within the inner parentheses
|
|
147
|
-
inner_pattern = r'\((.*)\)'
|
|
148
|
-
inner_match = re.search(inner_pattern, result, re.DOTALL)
|
|
149
|
-
if inner_match:
|
|
150
|
-
return inner_match.group(1)
|
|
151
|
-
else:
|
|
152
|
-
return result
|
|
144
|
+
return match.group(1).strip()
|
|
153
145
|
else:
|
|
154
146
|
return None
|
|
155
147
|
|
|
@@ -2,7 +2,7 @@ tdfs/__init__.py,sha256=7AcO7uB1opRCt7t2JOHworKimfAaDeO3boRW7u9Geo8,23
|
|
|
2
2
|
tdfs/datasets.py,sha256=-b2MPEKGki2V1M8iUcoDR9uc2krIK7u1CK-EhChvihs,985
|
|
3
3
|
tdfs/feature_store.py,sha256=Honu7eOAXxP4Ivz0mRlhuNkfTDzgZl5HB1WlQUwzcZ0,31354
|
|
4
4
|
tdfs/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
5
|
-
tdfs4ds/__init__.py,sha256=
|
|
5
|
+
tdfs4ds/__init__.py,sha256=lAuC88U7w8sXMOnyq-XvncIkBeQu4Mb0Qs6xt-leY0s,65845
|
|
6
6
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
7
7
|
tdfs4ds/feature_engineering.py,sha256=oVnZ2V_XNGE12LKC_fNfkrWSQZLgtYRmaf8Dispi6S4,7081
|
|
8
8
|
tdfs4ds/feature_store.py,sha256=y-oItPZw6nBkBcGAceaATZbkLPTsvpk0OnpzTxYofDs,68576
|
|
@@ -14,7 +14,7 @@ tdfs4ds/dataset/dataset.py,sha256=caiQwT-RtdPe5MDtsynWMm1n12OxftgMp7_BR9SCHKw,53
|
|
|
14
14
|
tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
|
|
15
15
|
tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
|
|
16
16
|
tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
|
|
17
|
-
tdfs4ds/feature_store/feature_data_processing.py,sha256=
|
|
17
|
+
tdfs4ds/feature_store/feature_data_processing.py,sha256=ZLr1MqSfqcHO4KuybKiiKnt9cPvQRhdrLRMpZyPsYXg,42643
|
|
18
18
|
tdfs4ds/feature_store/feature_query_retrieval.py,sha256=zuHRZhL6-qyLpPS7mWgRy1WingSN5iibkbi53Q7jfAs,33834
|
|
19
19
|
tdfs4ds/feature_store/feature_store_management.py,sha256=e_hBsGhtqA6vHBu2Mhy4URkYe4SFaHijXXdqqWr-3tg,56154
|
|
20
20
|
tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
|
|
@@ -24,12 +24,12 @@ tdfs4ds/process_store/process_registration_management.py,sha256=F8VlBoL-de98KnkM
|
|
|
24
24
|
tdfs4ds/process_store/process_store_catalog_management.py,sha256=H135RRTYn-pyWIqPVbHpuIyyvsaNrek6b1iPk8avJMI,16088
|
|
25
25
|
tdfs4ds/utils/__init__.py,sha256=-yTMfDLZbQnIRQ64s_bczzT21tDW2A8FZeq9PX5SgFU,168
|
|
26
26
|
tdfs4ds/utils/filter_management.py,sha256=7D47N_hnTSUVOkaV2XuKrlUFMxzWjDsCBvRYsH4lXdU,11073
|
|
27
|
-
tdfs4ds/utils/info.py,sha256=
|
|
27
|
+
tdfs4ds/utils/info.py,sha256=sShnUxXMlvCtQ6xtShDhqdpTr6sMG0dZQhNBFgUENDY,12058
|
|
28
28
|
tdfs4ds/utils/lineage.py,sha256=LI-5pG7D8lO3-YFa9qA6CrEackiYugV23_Vz9IpF5xw,28670
|
|
29
29
|
tdfs4ds/utils/query_management.py,sha256=nAcE8QY1GWAKgOtb-ubSfDVcnYbU7Ge8CruVRLoPtmY,6356
|
|
30
30
|
tdfs4ds/utils/time_management.py,sha256=_jbwdyZH4Yr3VzbUrq6X93FpXDCDEdH0iv56vX7j8mA,8446
|
|
31
31
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
32
|
-
tdfs4ds-0.2.4.
|
|
33
|
-
tdfs4ds-0.2.4.
|
|
34
|
-
tdfs4ds-0.2.4.
|
|
35
|
-
tdfs4ds-0.2.4.
|
|
32
|
+
tdfs4ds-0.2.4.13.dist-info/METADATA,sha256=b-SwW84vY9Hs3MS10r01QPUpn2p_onb7Hf-F-Xuw5Eg,11945
|
|
33
|
+
tdfs4ds-0.2.4.13.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
34
|
+
tdfs4ds-0.2.4.13.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
35
|
+
tdfs4ds-0.2.4.13.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|