tdfs4ds 0.2.5.3__py3-none-any.whl → 0.2.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tdfs4ds/__init__.py +1 -1
- tdfs4ds/feature_store/feature_store_management.py +14 -13
- tdfs4ds/genai/__init__.py +4 -2
- tdfs4ds/genai/documentation.py +251 -72
- {tdfs4ds-0.2.5.3.dist-info → tdfs4ds-0.2.5.4.dist-info}/METADATA +1 -1
- {tdfs4ds-0.2.5.3.dist-info → tdfs4ds-0.2.5.4.dist-info}/RECORD +8 -8
- {tdfs4ds-0.2.5.3.dist-info → tdfs4ds-0.2.5.4.dist-info}/WHEEL +0 -0
- {tdfs4ds-0.2.5.3.dist-info → tdfs4ds-0.2.5.4.dist-info}/top_level.txt +0 -0
tdfs4ds/__init__.py
CHANGED
|
@@ -1005,20 +1005,21 @@ def delete_feature(feature_name, entity_id, data_domain=None):
|
|
|
1005
1005
|
if tdfs4ds.DEBUG_MODE:
|
|
1006
1006
|
print('table name : ', table_name)
|
|
1007
1007
|
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1008
|
+
if False:
|
|
1009
|
+
query = f"""
|
|
1010
|
+
NONSEQUENCED VALIDTIME DELETE {table_name}
|
|
1011
|
+
WHERE FEATURE_ID = (
|
|
1012
|
+
SEL FEATURE_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
|
|
1013
|
+
WHERE FEATURE_NAME = '{feature_name}'
|
|
1014
|
+
AND DATA_DOMAIN = '{data_domain}'
|
|
1015
|
+
)"""
|
|
1016
|
+
if tdfs4ds.DEBUG_MODE:
|
|
1017
|
+
print(query)
|
|
1017
1018
|
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1019
|
+
try:
|
|
1020
|
+
tdml.execute_sql(query)
|
|
1021
|
+
except Exception as e:
|
|
1022
|
+
print(str(e).split('\n')[0])
|
|
1022
1023
|
|
|
1023
1024
|
return
|
|
1024
1025
|
|
tdfs4ds/genai/__init__.py
CHANGED
|
@@ -9,7 +9,8 @@ from .documentation import (
|
|
|
9
9
|
run_sql_documentation,
|
|
10
10
|
build_llm,
|
|
11
11
|
get_the_explain,
|
|
12
|
-
display_process_info
|
|
12
|
+
display_process_info,
|
|
13
|
+
feed_process_info_with_prompt_result
|
|
13
14
|
)
|
|
14
15
|
|
|
15
16
|
__all__ = [
|
|
@@ -23,5 +24,6 @@ __all__ = [
|
|
|
23
24
|
"run_sql_documentation",
|
|
24
25
|
"build_llm",
|
|
25
26
|
"get_the_explain",
|
|
26
|
-
"display_process_info"
|
|
27
|
+
"display_process_info",
|
|
28
|
+
"feed_process_info_with_prompt_result"
|
|
27
29
|
]
|
tdfs4ds/genai/documentation.py
CHANGED
|
@@ -342,8 +342,20 @@ def _print_documentation(
|
|
|
342
342
|
|
|
343
343
|
if _is_notebook():
|
|
344
344
|
title_html = f"<h2>{title}</h2>" if title else ""
|
|
345
|
-
entity_items =
|
|
346
|
-
|
|
345
|
+
entity_items = (
|
|
346
|
+
'\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>'
|
|
347
|
+
for col, desc in documented_entity_columns.items())
|
|
348
|
+
if documented_entity_columns is not None
|
|
349
|
+
else "<li><em>No entity columns documented.</em></li>"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
feature_items = (
|
|
353
|
+
'\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>'
|
|
354
|
+
for col, desc in documented_feature_columns.items())
|
|
355
|
+
if documented_feature_columns is not None
|
|
356
|
+
else "<li><em>No feature columns documented.</em></li>"
|
|
357
|
+
)
|
|
358
|
+
|
|
347
359
|
|
|
348
360
|
# Build optional sections
|
|
349
361
|
sql_section = ""
|
|
@@ -614,13 +626,13 @@ def build_documentation_json_schema(columns: List[str], provider: str = "generic
|
|
|
614
626
|
# Fallback: generic JSON schema
|
|
615
627
|
return base_schema
|
|
616
628
|
|
|
617
|
-
|
|
618
629
|
def build_sql_documentation_chain(
|
|
619
630
|
llm: ChatOpenAI,
|
|
620
631
|
entity_columns: Sequence[str],
|
|
621
632
|
feature_columns: Sequence[str],
|
|
622
633
|
provider: str = "vllm",
|
|
623
634
|
json_constraint: bool = True,
|
|
635
|
+
prompt_only: bool = False
|
|
624
636
|
) -> Runnable:
|
|
625
637
|
"""
|
|
626
638
|
Build a LangChain Runnable that generates business-focused documentation
|
|
@@ -654,25 +666,32 @@ def build_sql_documentation_chain(
|
|
|
654
666
|
If False:
|
|
655
667
|
- the chain does not enforce JSON structure at the LLM level
|
|
656
668
|
- the model is only guided by the prompt (weaker guarantees)
|
|
669
|
+
prompt_only : bool, optional (default=False)
|
|
670
|
+
If True:
|
|
671
|
+
- returns only the prompt template, without attaching the LLM or parser
|
|
672
|
+
- useful for debugging, testing, or customizing the prompt before execution
|
|
673
|
+
If False:
|
|
674
|
+
- returns the full chain: prompt → LLM (optionally schema-guided) → JSON parser
|
|
657
675
|
|
|
658
676
|
Returns
|
|
659
677
|
-------
|
|
660
678
|
Runnable
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
679
|
+
If prompt_only=False:
|
|
680
|
+
A LangChain Runnable that executes:
|
|
681
|
+
prompt → LLM (optionally schema-guided) → JSON parser
|
|
682
|
+
When invoked with:
|
|
683
|
+
{
|
|
684
|
+
"sql_query": "...",
|
|
685
|
+
"columns_str": "Entity columns:\n- column1\n\nFeature columns:\n- column2\n..."
|
|
686
|
+
}
|
|
687
|
+
It returns:
|
|
688
|
+
dict[str, str]
|
|
689
|
+
A mapping of each requested column name to a short,
|
|
690
|
+
business-oriented description (≤ 5 sentences), plus a 'query_business_logic' key
|
|
691
|
+
containing a high-level description of the query's business logic (5-10 sentences), and an 'entity_description' key
|
|
692
|
+
with a holistic description of the entity (3-5 sentences).
|
|
693
|
+
If prompt_only=True:
|
|
694
|
+
The prompt template itself, for inspection or further customization.
|
|
676
695
|
|
|
677
696
|
Notes
|
|
678
697
|
-----
|
|
@@ -749,7 +768,10 @@ Columns to document (only document these):
|
|
|
749
768
|
raw = ai_msg.content
|
|
750
769
|
return parser.parse(raw)
|
|
751
770
|
|
|
752
|
-
|
|
771
|
+
if prompt_only:
|
|
772
|
+
return prompt
|
|
773
|
+
else:
|
|
774
|
+
return prompt | constrained_llm | RunnableLambda(_parse)
|
|
753
775
|
|
|
754
776
|
def run_sql_documentation(
|
|
755
777
|
chain: Runnable,
|
|
@@ -825,7 +847,10 @@ def run_sql_documentation(
|
|
|
825
847
|
"columns_str": columns_str,
|
|
826
848
|
"language" : language
|
|
827
849
|
})
|
|
828
|
-
|
|
850
|
+
if isinstance(result, dict):
|
|
851
|
+
logger_safe('info', f'run_sql_documentation: Successfully generated documentation for columns: {list(result.keys())}')
|
|
852
|
+
else:
|
|
853
|
+
logger_safe('info', f'run_sql_documentation: Successfully generated documentation prompt')
|
|
829
854
|
return result
|
|
830
855
|
except Exception as e:
|
|
831
856
|
logger_safe('error', f'run_sql_documentation: Failed to generate documentation: {e}')
|
|
@@ -839,6 +864,7 @@ def document_sql_query_columns(
|
|
|
839
864
|
language: str = "English",
|
|
840
865
|
provider: Optional[str] = None,
|
|
841
866
|
json_constraint: bool = True,
|
|
867
|
+
prompt_only: bool = False
|
|
842
868
|
) -> Dict[str, Any]:
|
|
843
869
|
"""
|
|
844
870
|
Convenience function to generate business-focused documentation for SQL query output columns
|
|
@@ -886,20 +912,30 @@ def document_sql_query_columns(
|
|
|
886
912
|
- the chain does not enforce JSON structure at the LLM level
|
|
887
913
|
- the model is only guided by the prompt (weaker guarantees)
|
|
888
914
|
|
|
915
|
+
prompt_only : bool, optional (default=False)
|
|
916
|
+
If True:
|
|
917
|
+
- returns only the prompt template, without executing the chain
|
|
918
|
+
- useful for debugging, testing, or customizing the prompt before execution
|
|
919
|
+
If False:
|
|
920
|
+
- executes the full chain and returns structured documentation
|
|
921
|
+
|
|
889
922
|
Returns
|
|
890
923
|
-------
|
|
891
924
|
dict
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
925
|
+
If prompt_only=False:
|
|
926
|
+
A dictionary with four keys:
|
|
927
|
+
- "query_business_logic": str containing the high-level business logic description of the query
|
|
928
|
+
- "entity_description": str containing the holistic description of the entity
|
|
929
|
+
- "entity_columns": dict[str, str] mapping each entity column name to its description
|
|
930
|
+
- "feature_columns": dict[str, str] mapping each feature column name to its description
|
|
931
|
+
If prompt_only=True:
|
|
932
|
+
The prompt template itself, for inspection or further customization.
|
|
897
933
|
|
|
898
934
|
Raises
|
|
899
935
|
------
|
|
900
936
|
ValueError
|
|
901
937
|
If any of the required tdfs4ds configuration variables (INSTRUCT_MODEL_URL,
|
|
902
|
-
INSTRUCT_MODEL_API_KEY, INSTRUCT_MODEL_MODEL) are not set.
|
|
938
|
+
INSTRUCT_MODEL_API_KEY, INSTRUCT_MODEL_MODEL, INSTRUCT_MODEL_PROVIDER) are not set.
|
|
903
939
|
|
|
904
940
|
Notes
|
|
905
941
|
-----
|
|
@@ -931,30 +967,35 @@ def document_sql_query_columns(
|
|
|
931
967
|
)
|
|
932
968
|
|
|
933
969
|
# Build the documentation chain
|
|
934
|
-
sql_doc_chain = build_sql_documentation_chain(llm, entity_columns, feature_columns, provider=provider, json_constraint=json_constraint)
|
|
970
|
+
sql_doc_chain = build_sql_documentation_chain(llm, entity_columns, feature_columns, provider=provider, json_constraint=json_constraint, prompt_only=prompt_only)
|
|
935
971
|
|
|
936
972
|
# Run the documentation
|
|
937
973
|
result = run_sql_documentation(sql_doc_chain, sql_query, entity_columns, feature_columns, language=language)
|
|
938
974
|
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
975
|
+
if prompt_only:
|
|
976
|
+
logger_safe('info', f'document_sql_query_columns: Successfully generated the prompt to be used with a LLM to generate the documentation')
|
|
977
|
+
return result
|
|
978
|
+
else:
|
|
979
|
+
# Separate entity columns, feature columns, entity description, and query logic
|
|
980
|
+
entity_docs = {k: v for k, v in result.items() if k in entity_columns}
|
|
981
|
+
feature_docs = {k: v for k, v in result.items() if k in feature_columns}
|
|
982
|
+
entity_desc = result.get("entity_description", "")
|
|
983
|
+
query_logic = result.get("query_business_logic", "")
|
|
944
984
|
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
985
|
+
logger_safe('info', f'document_sql_query_columns: Successfully completed documentation for {len(entity_docs)} entity columns, {len(feature_docs)} feature columns, entity description and query logic')
|
|
986
|
+
return {
|
|
987
|
+
"query_business_logic": query_logic,
|
|
988
|
+
"entity_description": entity_desc,
|
|
989
|
+
"entity_columns": entity_docs,
|
|
990
|
+
"feature_columns": feature_docs
|
|
991
|
+
}
|
|
952
992
|
|
|
953
993
|
|
|
954
994
|
def build_explain_documentation_chain(
|
|
955
995
|
llm: ChatOpenAI,
|
|
956
996
|
provider: str = "vllm",
|
|
957
997
|
json_constraint: bool = True,
|
|
998
|
+
prompt_only: bool = False
|
|
958
999
|
) -> Runnable:
|
|
959
1000
|
"""
|
|
960
1001
|
Build a LangChain Runnable that analyzes SQL EXPLAIN plans and generates
|
|
@@ -1112,7 +1153,10 @@ Return ONLY valid JSON with the four keys above.
|
|
|
1112
1153
|
raw = ai_msg.content
|
|
1113
1154
|
return parser.parse(raw)
|
|
1114
1155
|
|
|
1115
|
-
|
|
1156
|
+
if prompt_only:
|
|
1157
|
+
return prompt
|
|
1158
|
+
else:
|
|
1159
|
+
return prompt | constrained_llm | RunnableLambda(_parse)
|
|
1116
1160
|
|
|
1117
1161
|
|
|
1118
1162
|
def run_explain_documentation(
|
|
@@ -1144,7 +1188,10 @@ def run_explain_documentation(
|
|
|
1144
1188
|
"sql_query": sql_query,
|
|
1145
1189
|
"explain_plan": explain_plan
|
|
1146
1190
|
})
|
|
1147
|
-
|
|
1191
|
+
if isinstance(result, dict):
|
|
1192
|
+
logger_safe('info', f'run_explain_documentation: Successfully analyzed EXPLAIN plan. Score: {result.get("optimization_score", "N/A")}/5')
|
|
1193
|
+
else:
|
|
1194
|
+
logger_safe('info', 'run_explain_documentation: Successfully generated the prompt to be used with a LLM to generate the documentation')
|
|
1148
1195
|
return result
|
|
1149
1196
|
except Exception as e:
|
|
1150
1197
|
logger_safe('error', f'run_explain_documentation: Failed to analyze EXPLAIN plan: {e}')
|
|
@@ -1155,6 +1202,7 @@ def document_sql_query_explain(
|
|
|
1155
1202
|
sql_query: str,
|
|
1156
1203
|
provider: Optional[str] = None,
|
|
1157
1204
|
json_constraint: bool = True,
|
|
1205
|
+
prompt_only: bool = False
|
|
1158
1206
|
) -> Dict[str, Any]:
|
|
1159
1207
|
"""
|
|
1160
1208
|
Analyze a SQL query's EXPLAIN plan and return optimization recommendations.
|
|
@@ -1213,10 +1261,13 @@ def document_sql_query_explain(
|
|
|
1213
1261
|
# get the explain plan:
|
|
1214
1262
|
explain_plan = get_the_explain(sql_query)
|
|
1215
1263
|
# Build and run the EXPLAIN analysis chain
|
|
1216
|
-
explain_chain = build_explain_documentation_chain(llm, provider=provider, json_constraint=json_constraint)
|
|
1264
|
+
explain_chain = build_explain_documentation_chain(llm, provider=provider, json_constraint=json_constraint, prompt_only = prompt_only)
|
|
1217
1265
|
result = run_explain_documentation(explain_chain, sql_query, explain_plan)
|
|
1218
1266
|
|
|
1219
|
-
|
|
1267
|
+
if prompt_only:
|
|
1268
|
+
logger_safe('info', f'document_sql_query_explain: Successfully completed EXPLAIN prompt generation')
|
|
1269
|
+
else:
|
|
1270
|
+
logger_safe('info', f'document_sql_query_explain: Successfully completed EXPLAIN analysis. Score: {result.get("optimization_score", "N/A")}/5')
|
|
1220
1271
|
return result
|
|
1221
1272
|
|
|
1222
1273
|
def documentation_tables_creation():
|
|
@@ -1313,7 +1364,7 @@ def documentation_tables_creation():
|
|
|
1313
1364
|
logger_safe('info', 'documentation_tables_creation: Documentation tables creation process completed.')
|
|
1314
1365
|
return
|
|
1315
1366
|
|
|
1316
|
-
def document_process(process_id: str, language: str = "English", json_constraint: bool = True, show_sql_query: bool = False, show_explain_plan: bool = False, display: bool = True, upload: bool = True) -> Optional[Dict[str, Any]]:
|
|
1367
|
+
def document_process(process_id: str, language: str = "English", json_constraint: bool = True, show_sql_query: bool = False, show_explain_plan: bool = False, display: bool = True, upload: bool = True, prompt_only = False) -> Optional[Dict[str, Any]]:
|
|
1317
1368
|
"""
|
|
1318
1369
|
Generate and store documentation for a data process identified by process_id.
|
|
1319
1370
|
This function retrieves the SQL query and output columns for the process,
|
|
@@ -1387,32 +1438,51 @@ def document_process(process_id: str, language: str = "English", json_constraint
|
|
|
1387
1438
|
documentation = document_sql_query_columns(
|
|
1388
1439
|
sql_query = process_info['PROCESS_SQL'],
|
|
1389
1440
|
entity_columns = process_info['ENTITY_COLUMNS'],
|
|
1390
|
-
feature_columns = process_info['FEATURE_COLUMNS']
|
|
1441
|
+
feature_columns = process_info['FEATURE_COLUMNS'],
|
|
1442
|
+
prompt_only = prompt_only
|
|
1391
1443
|
)
|
|
1392
1444
|
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
process_info['
|
|
1404
|
-
process_info['
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1445
|
+
if prompt_only:
|
|
1446
|
+
process_info['PROMPT_BUSINESS_LOGIC_DESCRIPTION'] = documentation.messages[0].content
|
|
1447
|
+
logger_safe('info', 'Prompt available in the PROMPT_BUSINESS_LOGIC_DESCRIPTION field.')
|
|
1448
|
+
process_info['DOCUMENTED_SQL'] = None
|
|
1449
|
+
process_info['ENTITY_DESCRIPTION'] = None
|
|
1450
|
+
process_info['DOCUMENTED_ENTITY_COLUMNS'] = None
|
|
1451
|
+
process_info['DOCUMENTED_FEATURE_COLUMNS'] = None
|
|
1452
|
+
else:
|
|
1453
|
+
process_info['DOCUMENTED_SQL'] = documentation['query_business_logic']
|
|
1454
|
+
process_info['ENTITY_DESCRIPTION'] = documentation['entity_description']
|
|
1455
|
+
process_info['DOCUMENTED_ENTITY_COLUMNS'] = documentation['entity_columns']
|
|
1456
|
+
process_info['DOCUMENTED_FEATURE_COLUMNS'] = documentation['feature_columns']
|
|
1457
|
+
|
|
1458
|
+
explain_documentation = document_sql_query_explain(
|
|
1459
|
+
sql_query = process_info['PROCESS_SQL'],
|
|
1460
|
+
prompt_only=prompt_only
|
|
1461
|
+
)
|
|
1462
|
+
|
|
1463
|
+
if prompt_only:
|
|
1464
|
+
process_info['PROMPT_EXPLAIN_THE_EXPLAIN'] = explain_documentation.messages[0].content
|
|
1465
|
+
logger_safe('info', 'Prompt available in the PROMPT_EXPLAIN_THE_EXPLAIN field.')
|
|
1466
|
+
process_info['EXPLAIN_ANALYSIS'] = None
|
|
1467
|
+
process_info['OPTIMIZATION_SCORE'] = None
|
|
1468
|
+
process_info['EXPLAIN_WARNINGS'] = None
|
|
1469
|
+
process_info['EXPLAIN_RECOMMENDATIONS'] = None
|
|
1470
|
+
else:
|
|
1471
|
+
process_info['EXPLAIN_ANALYSIS'] = explain_documentation['explanation']
|
|
1472
|
+
process_info['OPTIMIZATION_SCORE'] = explain_documentation['optimization_score']
|
|
1473
|
+
process_info['EXPLAIN_WARNINGS'] = explain_documentation['warnings']
|
|
1474
|
+
process_info['EXPLAIN_RECOMMENDATIONS'] = explain_documentation['recommendations']
|
|
1475
|
+
|
|
1476
|
+
# Store the raw EXPLAIN plan if needed for display
|
|
1477
|
+
if show_explain_plan:
|
|
1478
|
+
process_info['RAW_EXPLAIN_PLAN'] = get_the_explain(process_info['PROCESS_SQL'])
|
|
1411
1479
|
|
|
1412
1480
|
# Upload the generated documentation to the documentation tables:
|
|
1413
|
-
if upload:
|
|
1481
|
+
if upload and prompt_only == False:
|
|
1414
1482
|
upload_documentation(process_info)
|
|
1415
1483
|
logger_safe('info', f'document_process: Uploaded documentation for process_id {process_id} to documentation tables.')
|
|
1484
|
+
|
|
1485
|
+
if upload and prompt_only == False:
|
|
1416
1486
|
upload_documentation_explain(process_info)
|
|
1417
1487
|
logger_safe('info', f'document_process: Uploaded EXPLAIN analysis for process_id {process_id} to documentation tables.')
|
|
1418
1488
|
|
|
@@ -1545,7 +1615,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
|
|
|
1545
1615
|
logger_safe('info', f'upload_documentation: Uploading documentation for process_id {process_id} into staging tables.')
|
|
1546
1616
|
tdml.copy_to_sql(
|
|
1547
1617
|
df_business_logic,
|
|
1548
|
-
table_name = "
|
|
1618
|
+
table_name = "DOC_PROCESS_BUSINESS_LOGIC_STAGING",
|
|
1549
1619
|
if_exists = 'replace',
|
|
1550
1620
|
temporary = True
|
|
1551
1621
|
)
|
|
@@ -1555,7 +1625,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
|
|
|
1555
1625
|
logger_safe('info', f'upload_documentation: Uploading feature documentation for process_id {process_id} into staging tables.')
|
|
1556
1626
|
tdml.copy_to_sql(
|
|
1557
1627
|
df_features,
|
|
1558
|
-
table_name = "
|
|
1628
|
+
table_name = "DOC_PROCESS_FEATURES_STAGING",
|
|
1559
1629
|
if_exists = 'replace',
|
|
1560
1630
|
temporary = True
|
|
1561
1631
|
)
|
|
@@ -1571,7 +1641,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
|
|
|
1571
1641
|
BUSINESS_LOGIC_DESCRIPTION,
|
|
1572
1642
|
ENTITY_DESCRIPTION,
|
|
1573
1643
|
ENTITY_COLUMNS_JSON
|
|
1574
|
-
FROM {_get_database_username()}.
|
|
1644
|
+
FROM {_get_database_username()}.DOC_PROCESS_BUSINESS_LOGIC_STAGING
|
|
1575
1645
|
) UPDATED
|
|
1576
1646
|
ON EXISTING.PROCESS_ID = UPDATED.PROCESS_ID
|
|
1577
1647
|
WHEN MATCHED THEN
|
|
@@ -1599,7 +1669,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
|
|
|
1599
1669
|
FC.FEATURE_ID,
|
|
1600
1670
|
A.FEATURE_NAME,
|
|
1601
1671
|
A.FEATURE_DESCRIPTION
|
|
1602
|
-
FROM {_get_database_username()}.
|
|
1672
|
+
FROM {_get_database_username()}.DOC_PROCESS_FEATURES_STAGING A
|
|
1603
1673
|
INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} FC
|
|
1604
1674
|
ON UPPER(FC.FEATURE_NAME) = UPPER(A.FEATURE_NAME)
|
|
1605
1675
|
AND UPPER(FC.DATA_DOMAIN) = '{process_info['DATA_DOMAIN'].upper()}'
|
|
@@ -1627,7 +1697,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
|
|
|
1627
1697
|
WHERE PROCESS_ID = '{process_id}'
|
|
1628
1698
|
AND FEATURE_ID NOT IN (
|
|
1629
1699
|
SELECT FC.FEATURE_ID
|
|
1630
|
-
FROM {_get_database_username()}.
|
|
1700
|
+
FROM {_get_database_username()}.DOC_PROCESS_FEATURES_STAGING A
|
|
1631
1701
|
INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} FC
|
|
1632
1702
|
ON UPPER(FC.FEATURE_NAME) = UPPER(A.FEATURE_NAME)
|
|
1633
1703
|
AND UPPER(FC.DATA_DOMAIN) = '{process_info['DATA_DOMAIN'].upper()}'
|
|
@@ -1658,8 +1728,8 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
|
|
|
1658
1728
|
raise
|
|
1659
1729
|
|
|
1660
1730
|
# remove staging tables
|
|
1661
|
-
tdml.execute_sql(f"DROP TABLE {_get_database_username()}.
|
|
1662
|
-
tdml.execute_sql(f"DROP TABLE {_get_database_username()}.
|
|
1731
|
+
tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOC_PROCESS_BUSINESS_LOGIC_STAGING")
|
|
1732
|
+
tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOC_PROCESS_FEATURES_STAGING")
|
|
1663
1733
|
logger_safe('info', f'upload_documentation: Successfully uploaded documentation for process_id {process_id}.')
|
|
1664
1734
|
|
|
1665
1735
|
return
|
|
@@ -1875,4 +1945,113 @@ def display_process_info(process_info: Dict[str, Any] = None, process_id : str =
|
|
|
1875
1945
|
explain_recommendations = process_info.get('EXPLAIN_RECOMMENDATIONS', None),
|
|
1876
1946
|
sql_query = process_info.get('PROCESS_SQL', None),
|
|
1877
1947
|
)
|
|
1878
|
-
return
|
|
1948
|
+
return
|
|
1949
|
+
|
|
1950
|
+
|
|
1951
|
+
def feed_process_info_with_prompt_result(process_info, sql_documentation_response=None, sql_explain_response=None, display_info=True, upload_info=True):
|
|
1952
|
+
"""
|
|
1953
|
+
Enriches a process_info dictionary with SQL documentation and EXPLAIN plan analysis results,
|
|
1954
|
+
with options to display the results and upload the enriched information.
|
|
1955
|
+
|
|
1956
|
+
This function integrates the results of SQL documentation and EXPLAIN plan analysis into the provided
|
|
1957
|
+
`process_info` dictionary. It extracts and organizes documentation for entity and feature columns,
|
|
1958
|
+
as well as optimization insights, to provide a comprehensive view of the SQL query's business logic,
|
|
1959
|
+
performance, and potential improvements. It also supports optional display of the enriched information
|
|
1960
|
+
and automatic upload of the documentation and EXPLAIN analysis to a backend system.
|
|
1961
|
+
|
|
1962
|
+
Args:
|
|
1963
|
+
process_info (dict): A dictionary containing metadata about the SQL process, including:
|
|
1964
|
+
- 'ENTITY_COLUMNS': List of columns representing the entity in the SQL query.
|
|
1965
|
+
- 'FEATURE_COLUMNS': List of columns representing features in the SQL query.
|
|
1966
|
+
sql_documentation_response (dict, optional): A dictionary containing SQL documentation results,
|
|
1967
|
+
including descriptions for entity/feature columns and query business logic. Expected keys:
|
|
1968
|
+
- 'query_business_logic': Description of the query's purpose and logic.
|
|
1969
|
+
- 'entity_description': Description of the entity represented by the query.
|
|
1970
|
+
- Column names as keys, with their descriptions as values.
|
|
1971
|
+
sql_explain_response (dict, optional): A dictionary containing SQL EXPLAIN plan analysis results,
|
|
1972
|
+
including:
|
|
1973
|
+
- 'explanation': Detailed analysis of the EXPLAIN plan.
|
|
1974
|
+
- 'optimization_score': Integer score (1-5) indicating query optimization level.
|
|
1975
|
+
- 'warnings': List of potential issues identified in the EXPLAIN plan.
|
|
1976
|
+
- 'recommendations': List of actionable recommendations for query optimization.
|
|
1977
|
+
display_info (bool, optional): If True, displays the enriched process_info using `display_process_info`.
|
|
1978
|
+
Defaults to True.
|
|
1979
|
+
upload_info (bool, optional): If True, uploads the enriched documentation and EXPLAIN analysis to a backend system.
|
|
1980
|
+
Defaults to True.
|
|
1981
|
+
|
|
1982
|
+
Returns:
|
|
1983
|
+
dict: The enriched `process_info` dictionary with the following additional keys (if input responses are provided):
|
|
1984
|
+
- 'DOCUMENTED_SQL': Business logic description of the SQL query.
|
|
1985
|
+
- 'ENTITY_DESCRIPTION': Description of the entity represented by the query.
|
|
1986
|
+
- 'DOCUMENTED_ENTITY_COLUMNS': Dictionary of documented entity columns and their descriptions.
|
|
1987
|
+
- 'DOCUMENTED_FEATURE_COLUMNS': Dictionary of documented feature columns and their descriptions.
|
|
1988
|
+
- 'EXPLAIN_ANALYSIS': Analysis of the EXPLAIN plan.
|
|
1989
|
+
- 'OPTIMIZATION_SCORE': Optimization score (1-5) for the query.
|
|
1990
|
+
- 'EXPLAIN_WARNINGS': List of warnings from the EXPLAIN plan analysis.
|
|
1991
|
+
- 'EXPLAIN_RECOMMENDATIONS': List of optimization recommendations.
|
|
1992
|
+
|
|
1993
|
+
Raises:
|
|
1994
|
+
Logs errors for any exceptions encountered during the update or upload process, but does not raise them.
|
|
1995
|
+
Errors are logged using `logger_safe` with a descriptive message.
|
|
1996
|
+
|
|
1997
|
+
Example:
|
|
1998
|
+
>>> process_info = {
|
|
1999
|
+
... 'ENTITY_COLUMNS': ['customer_id', 'order_id'],
|
|
2000
|
+
... 'FEATURE_COLUMNS': ['order_amount', 'order_date']
|
|
2001
|
+
... }
|
|
2002
|
+
>>> sql_documentation_response = {
|
|
2003
|
+
... 'query_business_logic': 'This query joins customer and order data...',
|
|
2004
|
+
... 'entity_description': 'The customer entity represents...',
|
|
2005
|
+
... 'customer_id': 'Unique identifier for customers.',
|
|
2006
|
+
... 'order_amount': 'Total amount of the order.'
|
|
2007
|
+
... }
|
|
2008
|
+
>>> sql_explain_response = {
|
|
2009
|
+
... 'explanation': 'The EXPLAIN plan shows a nested loop join...',
|
|
2010
|
+
... 'optimization_score': 3,
|
|
2011
|
+
... 'warnings': ['Full table scan on orders table'],
|
|
2012
|
+
... 'recommendations': ['Add index on orders.customer_id']
|
|
2013
|
+
... }
|
|
2014
|
+
>>> enriched_info = feed_process_info_with_prompt_result(
|
|
2015
|
+
... process_info,
|
|
2016
|
+
... sql_documentation_response,
|
|
2017
|
+
... sql_explain_response,
|
|
2018
|
+
... display_info=True,
|
|
2019
|
+
... upload_info=True
|
|
2020
|
+
... )
|
|
2021
|
+
>>> print(enriched_info.keys())
|
|
2022
|
+
['ENTITY_COLUMNS', 'FEATURE_COLUMNS', 'DOCUMENTED_SQL', 'ENTITY_DESCRIPTION',
|
|
2023
|
+
'DOCUMENTED_ENTITY_COLUMNS', 'DOCUMENTED_FEATURE_COLUMNS', 'EXPLAIN_ANALYSIS',
|
|
2024
|
+
'OPTIMIZATION_SCORE', 'EXPLAIN_WARNINGS', 'EXPLAIN_RECOMMENDATIONS']
|
|
2025
|
+
"""
|
|
2026
|
+
|
|
2027
|
+
entity_columns = process_info['ENTITY_COLUMNS']
|
|
2028
|
+
feature_columns = process_info['FEATURE_COLUMNS']
|
|
2029
|
+
|
|
2030
|
+
if sql_documentation_response is not None:
|
|
2031
|
+
try:
|
|
2032
|
+
process_info['DOCUMENTED_SQL'] = sql_documentation_response['query_business_logic']
|
|
2033
|
+
process_info['ENTITY_DESCRIPTION'] = sql_documentation_response['entity_description']
|
|
2034
|
+
process_info['DOCUMENTED_ENTITY_COLUMNS'] = {k: v for k, v in sql_documentation_response.items() if k in entity_columns}
|
|
2035
|
+
process_info['DOCUMENTED_FEATURE_COLUMNS'] = {k: v for k, v in sql_documentation_response.items() if k in feature_columns}
|
|
2036
|
+
logger_safe('info', 'update of the SQL documentation in process_info')
|
|
2037
|
+
if upload_info:
|
|
2038
|
+
upload_documentation(process_info)
|
|
2039
|
+
except Exception as e:
|
|
2040
|
+
logger_safe('error',f"error in updating the SQL documentation : {str(e).split('\n')[0]}")
|
|
2041
|
+
|
|
2042
|
+
if sql_explain_response is not None:
|
|
2043
|
+
try:
|
|
2044
|
+
process_info['EXPLAIN_ANALYSIS'] = sql_explain_response['explanation']
|
|
2045
|
+
process_info['OPTIMIZATION_SCORE'] = sql_explain_response['optimization_score']
|
|
2046
|
+
process_info['EXPLAIN_WARNINGS'] = sql_explain_response['warnings']
|
|
2047
|
+
process_info['EXPLAIN_RECOMMENDATIONS'] = sql_explain_response['recommendations']
|
|
2048
|
+
logger_safe('info', 'update of the EXPLAIN documentation in process_info')
|
|
2049
|
+
if upload_info:
|
|
2050
|
+
upload_documentation_explain(process_info)
|
|
2051
|
+
except Exception as e:
|
|
2052
|
+
logger_safe('error',f"error in updating the EXPLAIN documentation : {str(e).split('\n')[0]}")
|
|
2053
|
+
|
|
2054
|
+
if display_info:
|
|
2055
|
+
display_process_info(process_info)
|
|
2056
|
+
|
|
2057
|
+
return process_info
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
tdfs4ds/__init__.py,sha256=
|
|
1
|
+
tdfs4ds/__init__.py,sha256=gh7Uv7WmkSjxqbxfDKnemioQrywtwLEGU4XEUElI4VQ,70555
|
|
2
2
|
tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
|
|
3
3
|
tdfs4ds/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
|
|
4
4
|
tdfs4ds/data/logo/tdfs4ds_logo.png,sha256=OCKQnH0gQbRyupwZeiIgo-9c6mdRtjE2E2Zunr_4Ae0,363980
|
|
@@ -11,9 +11,9 @@ tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaU
|
|
|
11
11
|
tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
|
|
12
12
|
tdfs4ds/feature_store/feature_data_processing.py,sha256=mC58pmxIeJ7Sdw-IUvx-ToSDa6D6OBRq8MPvbmp33G0,46214
|
|
13
13
|
tdfs4ds/feature_store/feature_query_retrieval.py,sha256=51c6ZNlLFiBIxNPinS8ot8bjWEIb1QV2eVg69yzVF80,35381
|
|
14
|
-
tdfs4ds/feature_store/feature_store_management.py,sha256=
|
|
15
|
-
tdfs4ds/genai/__init__.py,sha256=
|
|
16
|
-
tdfs4ds/genai/documentation.py,sha256=
|
|
14
|
+
tdfs4ds/feature_store/feature_store_management.py,sha256=qsazxRC4jxBwfwNYpRhrDLDBtnq2BfePTQ31vmDFH_o,52190
|
|
15
|
+
tdfs4ds/genai/__init__.py,sha256=Os1NpNPNr1h5-25xt_jckIqImI3jDMxjxUvM7TqEXzE,811
|
|
16
|
+
tdfs4ds/genai/documentation.py,sha256=rcGPupWpVSG8vhGjk_AWcHarvaImM9XEBkxJYiy5SK0,92244
|
|
17
17
|
tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
|
|
18
18
|
tdfs4ds/process_store/process_followup.py,sha256=E4jgQahjhVRBbfAW3JXNLId7H5qV8ozRt-6PyAQuPzg,12583
|
|
19
19
|
tdfs4ds/process_store/process_query_administration.py,sha256=AOufkJ6DFUpBiGm-6Q6Dq0Aovw31UGTscZ3Ya0ewS-0,7851
|
|
@@ -26,7 +26,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
|
|
|
26
26
|
tdfs4ds/utils/query_management.py,sha256=kWDeTdsYcbpV5Tyhh-8uLRWvXh16nIdXNIJ97w76aNU,4848
|
|
27
27
|
tdfs4ds/utils/time_management.py,sha256=g3EJO7I8ERoZ4X7yq5SyDqSE4O9p0BRcv__QPuAxbGA,32243
|
|
28
28
|
tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
|
|
29
|
-
tdfs4ds-0.2.5.
|
|
30
|
-
tdfs4ds-0.2.5.
|
|
31
|
-
tdfs4ds-0.2.5.
|
|
32
|
-
tdfs4ds-0.2.5.
|
|
29
|
+
tdfs4ds-0.2.5.4.dist-info/METADATA,sha256=2pdrLXw7n-nNTfy3Qw9bt8COOfp7LkzjgqI7IupivR0,14358
|
|
30
|
+
tdfs4ds-0.2.5.4.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
31
|
+
tdfs4ds-0.2.5.4.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
|
|
32
|
+
tdfs4ds-0.2.5.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|