tdfs4ds 0.2.5.2__py3-none-any.whl → 0.2.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tdfs4ds/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = '0.2.5.2'
1
+ __version__ = '0.2.5.4'
2
2
  import difflib
3
3
  import logging
4
4
  import json
@@ -285,13 +285,37 @@ def prepare_feature_ingestion(df, entity_id, feature_names, feature_versions=Non
285
285
  logger_safe("debug", "nested_query=%s", nested_query)
286
286
 
287
287
  # Execute: create volatile table and test unicity
288
+ query_create_volatile = f"""
289
+ CREATE VOLATILE TABLE {volatile_table_name} AS
290
+ (
291
+ {nested_query}
292
+ ) WITH DATA
293
+ PRIMARY INDEX ({primary_index})
294
+ ON COMMIT PRESERVE ROWS
295
+ """
288
296
  try:
297
+ tdml.execute_sql(f"DROP TABLE {_get_database_username()}.{volatile_table_name}")
298
+ logger_safe('info', 'drop volatile table')
299
+ except Exception as e:
300
+ logger_safe('info', 'volatile table does not exists yet')
301
+
302
+ try:
303
+ tdml.execute_sql(query_create_volatile)
304
+ logger_safe('info', 'results calculated and materialized in a volatile table')
305
+ except Exception as e:
306
+ logger_safe('error', f"query execution failed : {str(e).split('\n')[0]}")
307
+ raise
308
+
309
+
310
+
311
+ if False:
289
312
  tdml.DataFrame.from_query(nested_query).to_sql(
290
313
  table_name = volatile_table_name,
291
314
  temporary = True,
292
315
  primary_index = primary_index.split(','),
293
316
  if_exists = 'replace'
294
317
  )
318
+ try:
295
319
  nb_duplicates = tdml.execute_sql(query_test_unicity).fetchall()[0][0]
296
320
  if nb_duplicates is not None and nb_duplicates > 0:
297
321
  logger_safe("error", "The process generates %s duplicates", nb_duplicates)
@@ -994,11 +1018,17 @@ def prepare_feature_ingestion_tdstone2(df, entity_id):
994
1018
  {volatile_expression}
995
1019
  """
996
1020
  # Execute the SQL query to create the volatile table.
1021
+ try:
1022
+ tdml.execute_sql(f"DROP TABLE {_get_database_username()}.{volatile_table_name}")
1023
+ except Exception as e:
1024
+ logger_safe('info','the VOLATILE table does not exist and will be created')
1025
+ pass
1026
+
997
1027
  try:
998
1028
  tdml.execute_sql(query)
999
1029
  except Exception as e:
1000
1030
  if tdfs4ds.DISPLAY_LOGS:
1001
- print(str(e).split('\n')[0])
1031
+ logger_safe('debug',str(e).split('\n')[0])
1002
1032
  tdml.execute_sql(f'DELETE {volatile_table_name}')
1003
1033
 
1004
1034
  # Optionally print the query if the display flag is set.
@@ -1005,20 +1005,21 @@ def delete_feature(feature_name, entity_id, data_domain=None):
1005
1005
  if tdfs4ds.DEBUG_MODE:
1006
1006
  print('table name : ', table_name)
1007
1007
 
1008
- query = f"""
1009
- NONSEQUENCED VALIDTIME DELETE {table_name}
1010
- WHERE FEATURE_ID = (
1011
- SEL FEATURE_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
1012
- WHERE FEATURE_NAME = '{feature_name}'
1013
- AND DATA_DOMAIN = '{data_domain}'
1014
- )"""
1015
- if tdfs4ds.DEBUG_MODE:
1016
- print(query)
1008
+ if False:
1009
+ query = f"""
1010
+ NONSEQUENCED VALIDTIME DELETE {table_name}
1011
+ WHERE FEATURE_ID = (
1012
+ SEL FEATURE_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
1013
+ WHERE FEATURE_NAME = '{feature_name}'
1014
+ AND DATA_DOMAIN = '{data_domain}'
1015
+ )"""
1016
+ if tdfs4ds.DEBUG_MODE:
1017
+ print(query)
1017
1018
 
1018
- try:
1019
- tdml.execute_sql(query)
1020
- except Exception as e:
1021
- print(str(e).split('\n')[0])
1019
+ try:
1020
+ tdml.execute_sql(query)
1021
+ except Exception as e:
1022
+ print(str(e).split('\n')[0])
1022
1023
 
1023
1024
  return
1024
1025
 
tdfs4ds/genai/__init__.py CHANGED
@@ -9,7 +9,8 @@ from .documentation import (
9
9
  run_sql_documentation,
10
10
  build_llm,
11
11
  get_the_explain,
12
- display_process_info
12
+ display_process_info,
13
+ feed_process_info_with_prompt_result
13
14
  )
14
15
 
15
16
  __all__ = [
@@ -23,5 +24,6 @@ __all__ = [
23
24
  "run_sql_documentation",
24
25
  "build_llm",
25
26
  "get_the_explain",
26
- "display_process_info"
27
+ "display_process_info",
28
+ "feed_process_info_with_prompt_result"
27
29
  ]
@@ -342,8 +342,20 @@ def _print_documentation(
342
342
 
343
343
  if _is_notebook():
344
344
  title_html = f"<h2>{title}</h2>" if title else ""
345
- entity_items = '\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>' for col, desc in documented_entity_columns.items())
346
- feature_items = '\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>' for col, desc in documented_feature_columns.items())
345
+ entity_items = (
346
+ '\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>'
347
+ for col, desc in documented_entity_columns.items())
348
+ if documented_entity_columns is not None
349
+ else "<li><em>No entity columns documented.</em></li>"
350
+ )
351
+
352
+ feature_items = (
353
+ '\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>'
354
+ for col, desc in documented_feature_columns.items())
355
+ if documented_feature_columns is not None
356
+ else "<li><em>No feature columns documented.</em></li>"
357
+ )
358
+
347
359
 
348
360
  # Build optional sections
349
361
  sql_section = ""
@@ -614,13 +626,13 @@ def build_documentation_json_schema(columns: List[str], provider: str = "generic
614
626
  # Fallback: generic JSON schema
615
627
  return base_schema
616
628
 
617
-
618
629
  def build_sql_documentation_chain(
619
630
  llm: ChatOpenAI,
620
631
  entity_columns: Sequence[str],
621
632
  feature_columns: Sequence[str],
622
633
  provider: str = "vllm",
623
634
  json_constraint: bool = True,
635
+ prompt_only: bool = False
624
636
  ) -> Runnable:
625
637
  """
626
638
  Build a LangChain Runnable that generates business-focused documentation
@@ -654,25 +666,32 @@ def build_sql_documentation_chain(
654
666
  If False:
655
667
  - the chain does not enforce JSON structure at the LLM level
656
668
  - the model is only guided by the prompt (weaker guarantees)
669
+ prompt_only : bool, optional (default=False)
670
+ If True:
671
+ - returns only the prompt template, without attaching the LLM or parser
672
+ - useful for debugging, testing, or customizing the prompt before execution
673
+ If False:
674
+ - returns the full chain: prompt → LLM (optionally schema-guided) → JSON parser
657
675
 
658
676
  Returns
659
677
  -------
660
678
  Runnable
661
- A LangChain Runnable that executes:
662
- prompt LLM (optionally schema-guided) → JSON parser
663
-
664
- When invoked with:
665
- {
666
- "sql_query": "...",
667
- "columns_str": "Entity columns:\n- column1\n\nFeature columns:\n- column2\n..."
668
- }
669
-
670
- It returns:
671
- dict[str, str]
672
- A mapping of each requested column name to a short,
673
- business-oriented description (5 sentences), plus a 'query_business_logic' key
674
- containing a high-level description of the query's business logic (5-10 sentences), and an 'entity_description' key
675
- with a holistic description of the entity (3-5 sentences).
679
+ If prompt_only=False:
680
+ A LangChain Runnable that executes:
681
+ prompt → LLM (optionally schema-guided) → JSON parser
682
+ When invoked with:
683
+ {
684
+ "sql_query": "...",
685
+ "columns_str": "Entity columns:\n- column1\n\nFeature columns:\n- column2\n..."
686
+ }
687
+ It returns:
688
+ dict[str, str]
689
+ A mapping of each requested column name to a short,
690
+ business-oriented description (≤ 5 sentences), plus a 'query_business_logic' key
691
+ containing a high-level description of the query's business logic (5-10 sentences), and an 'entity_description' key
692
+ with a holistic description of the entity (3-5 sentences).
693
+ If prompt_only=True:
694
+ The prompt template itself, for inspection or further customization.
676
695
 
677
696
  Notes
678
697
  -----
@@ -749,7 +768,10 @@ Columns to document (only document these):
749
768
  raw = ai_msg.content
750
769
  return parser.parse(raw)
751
770
 
752
- return prompt | constrained_llm | RunnableLambda(_parse)
771
+ if prompt_only:
772
+ return prompt
773
+ else:
774
+ return prompt | constrained_llm | RunnableLambda(_parse)
753
775
 
754
776
  def run_sql_documentation(
755
777
  chain: Runnable,
@@ -825,7 +847,10 @@ def run_sql_documentation(
825
847
  "columns_str": columns_str,
826
848
  "language" : language
827
849
  })
828
- logger_safe('info', f'run_sql_documentation: Successfully generated documentation for columns: {list(result.keys())}')
850
+ if isinstance(result, dict):
851
+ logger_safe('info', f'run_sql_documentation: Successfully generated documentation for columns: {list(result.keys())}')
852
+ else:
853
+ logger_safe('info', f'run_sql_documentation: Successfully generated documentation prompt')
829
854
  return result
830
855
  except Exception as e:
831
856
  logger_safe('error', f'run_sql_documentation: Failed to generate documentation: {e}')
@@ -839,6 +864,7 @@ def document_sql_query_columns(
839
864
  language: str = "English",
840
865
  provider: Optional[str] = None,
841
866
  json_constraint: bool = True,
867
+ prompt_only: bool = False
842
868
  ) -> Dict[str, Any]:
843
869
  """
844
870
  Convenience function to generate business-focused documentation for SQL query output columns
@@ -886,20 +912,30 @@ def document_sql_query_columns(
886
912
  - the chain does not enforce JSON structure at the LLM level
887
913
  - the model is only guided by the prompt (weaker guarantees)
888
914
 
915
+ prompt_only : bool, optional (default=False)
916
+ If True:
917
+ - returns only the prompt template, without executing the chain
918
+ - useful for debugging, testing, or customizing the prompt before execution
919
+ If False:
920
+ - executes the full chain and returns structured documentation
921
+
889
922
  Returns
890
923
  -------
891
924
  dict
892
- A dictionary with four keys:
893
- - "query_business_logic": str containing the high-level business logic description of the query
894
- - "entity_description": str containing the holistic description of the entity
895
- - "entity_columns": dict[str, str] mapping each entity column name to its description
896
- - "feature_columns": dict[str, str] mapping each feature column name to its description
925
+ If prompt_only=False:
926
+ A dictionary with four keys:
927
+ - "query_business_logic": str containing the high-level business logic description of the query
928
+ - "entity_description": str containing the holistic description of the entity
929
+ - "entity_columns": dict[str, str] mapping each entity column name to its description
930
+ - "feature_columns": dict[str, str] mapping each feature column name to its description
931
+ If prompt_only=True:
932
+ The prompt template itself, for inspection or further customization.
897
933
 
898
934
  Raises
899
935
  ------
900
936
  ValueError
901
937
  If any of the required tdfs4ds configuration variables (INSTRUCT_MODEL_URL,
902
- INSTRUCT_MODEL_API_KEY, INSTRUCT_MODEL_MODEL) are not set.
938
+ INSTRUCT_MODEL_API_KEY, INSTRUCT_MODEL_MODEL, INSTRUCT_MODEL_PROVIDER) are not set.
903
939
 
904
940
  Notes
905
941
  -----
@@ -931,30 +967,35 @@ def document_sql_query_columns(
931
967
  )
932
968
 
933
969
  # Build the documentation chain
934
- sql_doc_chain = build_sql_documentation_chain(llm, entity_columns, feature_columns, provider=provider, json_constraint=json_constraint)
970
+ sql_doc_chain = build_sql_documentation_chain(llm, entity_columns, feature_columns, provider=provider, json_constraint=json_constraint, prompt_only=prompt_only)
935
971
 
936
972
  # Run the documentation
937
973
  result = run_sql_documentation(sql_doc_chain, sql_query, entity_columns, feature_columns, language=language)
938
974
 
939
- # Separate entity columns, feature columns, entity description, and query logic
940
- entity_docs = {k: v for k, v in result.items() if k in entity_columns}
941
- feature_docs = {k: v for k, v in result.items() if k in feature_columns}
942
- entity_desc = result.get("entity_description", "")
943
- query_logic = result.get("query_business_logic", "")
975
+ if prompt_only:
976
+ logger_safe('info', f'document_sql_query_columns: Successfully generated the prompt to be used with a LLM to generate the documentation')
977
+ return result
978
+ else:
979
+ # Separate entity columns, feature columns, entity description, and query logic
980
+ entity_docs = {k: v for k, v in result.items() if k in entity_columns}
981
+ feature_docs = {k: v for k, v in result.items() if k in feature_columns}
982
+ entity_desc = result.get("entity_description", "")
983
+ query_logic = result.get("query_business_logic", "")
944
984
 
945
- logger_safe('info', f'document_sql_query_columns: Successfully completed documentation for {len(entity_docs)} entity columns, {len(feature_docs)} feature columns, entity description and query logic')
946
- return {
947
- "query_business_logic": query_logic,
948
- "entity_description": entity_desc,
949
- "entity_columns": entity_docs,
950
- "feature_columns": feature_docs
951
- }
985
+ logger_safe('info', f'document_sql_query_columns: Successfully completed documentation for {len(entity_docs)} entity columns, {len(feature_docs)} feature columns, entity description and query logic')
986
+ return {
987
+ "query_business_logic": query_logic,
988
+ "entity_description": entity_desc,
989
+ "entity_columns": entity_docs,
990
+ "feature_columns": feature_docs
991
+ }
952
992
 
953
993
 
954
994
  def build_explain_documentation_chain(
955
995
  llm: ChatOpenAI,
956
996
  provider: str = "vllm",
957
997
  json_constraint: bool = True,
998
+ prompt_only: bool = False
958
999
  ) -> Runnable:
959
1000
  """
960
1001
  Build a LangChain Runnable that analyzes SQL EXPLAIN plans and generates
@@ -1112,7 +1153,10 @@ Return ONLY valid JSON with the four keys above.
1112
1153
  raw = ai_msg.content
1113
1154
  return parser.parse(raw)
1114
1155
 
1115
- return prompt | constrained_llm | RunnableLambda(_parse)
1156
+ if prompt_only:
1157
+ return prompt
1158
+ else:
1159
+ return prompt | constrained_llm | RunnableLambda(_parse)
1116
1160
 
1117
1161
 
1118
1162
  def run_explain_documentation(
@@ -1144,7 +1188,10 @@ def run_explain_documentation(
1144
1188
  "sql_query": sql_query,
1145
1189
  "explain_plan": explain_plan
1146
1190
  })
1147
- logger_safe('info', f'run_explain_documentation: Successfully analyzed EXPLAIN plan. Score: {result.get("optimization_score", "N/A")}/5')
1191
+ if isinstance(result, dict):
1192
+ logger_safe('info', f'run_explain_documentation: Successfully analyzed EXPLAIN plan. Score: {result.get("optimization_score", "N/A")}/5')
1193
+ else:
1194
+ logger_safe('info', 'run_explain_documentation: Successfully generated the prompt to be used with a LLM to generate the documentation')
1148
1195
  return result
1149
1196
  except Exception as e:
1150
1197
  logger_safe('error', f'run_explain_documentation: Failed to analyze EXPLAIN plan: {e}')
@@ -1155,6 +1202,7 @@ def document_sql_query_explain(
1155
1202
  sql_query: str,
1156
1203
  provider: Optional[str] = None,
1157
1204
  json_constraint: bool = True,
1205
+ prompt_only: bool = False
1158
1206
  ) -> Dict[str, Any]:
1159
1207
  """
1160
1208
  Analyze a SQL query's EXPLAIN plan and return optimization recommendations.
@@ -1213,10 +1261,13 @@ def document_sql_query_explain(
1213
1261
  # get the explain plan:
1214
1262
  explain_plan = get_the_explain(sql_query)
1215
1263
  # Build and run the EXPLAIN analysis chain
1216
- explain_chain = build_explain_documentation_chain(llm, provider=provider, json_constraint=json_constraint)
1264
+ explain_chain = build_explain_documentation_chain(llm, provider=provider, json_constraint=json_constraint, prompt_only = prompt_only)
1217
1265
  result = run_explain_documentation(explain_chain, sql_query, explain_plan)
1218
1266
 
1219
- logger_safe('info', f'document_sql_query_explain: Successfully completed EXPLAIN analysis. Score: {result.get("optimization_score", "N/A")}/5')
1267
+ if prompt_only:
1268
+ logger_safe('info', f'document_sql_query_explain: Successfully completed EXPLAIN prompt generation')
1269
+ else:
1270
+ logger_safe('info', f'document_sql_query_explain: Successfully completed EXPLAIN analysis. Score: {result.get("optimization_score", "N/A")}/5')
1220
1271
  return result
1221
1272
 
1222
1273
  def documentation_tables_creation():
@@ -1313,7 +1364,7 @@ def documentation_tables_creation():
1313
1364
  logger_safe('info', 'documentation_tables_creation: Documentation tables creation process completed.')
1314
1365
  return
1315
1366
 
1316
- def document_process(process_id: str, language: str = "English", json_constraint: bool = True, show_sql_query: bool = False, show_explain_plan: bool = False, display: bool = True, upload: bool = True) -> Optional[Dict[str, Any]]:
1367
+ def document_process(process_id: str, language: str = "English", json_constraint: bool = True, show_sql_query: bool = False, show_explain_plan: bool = False, display: bool = True, upload: bool = True, prompt_only = False) -> Optional[Dict[str, Any]]:
1317
1368
  """
1318
1369
  Generate and store documentation for a data process identified by process_id.
1319
1370
  This function retrieves the SQL query and output columns for the process,
@@ -1387,32 +1438,51 @@ def document_process(process_id: str, language: str = "English", json_constraint
1387
1438
  documentation = document_sql_query_columns(
1388
1439
  sql_query = process_info['PROCESS_SQL'],
1389
1440
  entity_columns = process_info['ENTITY_COLUMNS'],
1390
- feature_columns = process_info['FEATURE_COLUMNS']
1441
+ feature_columns = process_info['FEATURE_COLUMNS'],
1442
+ prompt_only = prompt_only
1391
1443
  )
1392
1444
 
1393
- process_info['DOCUMENTED_SQL'] = documentation['query_business_logic']
1394
- process_info['ENTITY_DESCRIPTION'] = documentation['entity_description']
1395
- process_info['DOCUMENTED_ENTITY_COLUMNS'] = documentation['entity_columns']
1396
- process_info['DOCUMENTED_FEATURE_COLUMNS'] = documentation['feature_columns']
1397
-
1398
- if True:
1399
- explain_documentation = document_sql_query_explain(
1400
- sql_query = process_info['PROCESS_SQL']
1401
- )
1402
-
1403
- process_info['EXPLAIN_ANALYSIS'] = explain_documentation['explanation']
1404
- process_info['OPTIMIZATION_SCORE'] = explain_documentation['optimization_score']
1405
- process_info['EXPLAIN_WARNINGS'] = explain_documentation['warnings']
1406
- process_info['EXPLAIN_RECOMMENDATIONS'] = explain_documentation['recommendations']
1407
-
1408
- # Store the raw EXPLAIN plan if needed for display
1409
- if show_explain_plan:
1410
- process_info['RAW_EXPLAIN_PLAN'] = get_the_explain(process_info['PROCESS_SQL'])
1445
+ if prompt_only:
1446
+ process_info['PROMPT_BUSINESS_LOGIC_DESCRIPTION'] = documentation.messages[0].content
1447
+ logger_safe('info', 'Prompt available in the PROMPT_BUSINESS_LOGIC_DESCRIPTION field.')
1448
+ process_info['DOCUMENTED_SQL'] = None
1449
+ process_info['ENTITY_DESCRIPTION'] = None
1450
+ process_info['DOCUMENTED_ENTITY_COLUMNS'] = None
1451
+ process_info['DOCUMENTED_FEATURE_COLUMNS'] = None
1452
+ else:
1453
+ process_info['DOCUMENTED_SQL'] = documentation['query_business_logic']
1454
+ process_info['ENTITY_DESCRIPTION'] = documentation['entity_description']
1455
+ process_info['DOCUMENTED_ENTITY_COLUMNS'] = documentation['entity_columns']
1456
+ process_info['DOCUMENTED_FEATURE_COLUMNS'] = documentation['feature_columns']
1457
+
1458
+ explain_documentation = document_sql_query_explain(
1459
+ sql_query = process_info['PROCESS_SQL'],
1460
+ prompt_only=prompt_only
1461
+ )
1462
+
1463
+ if prompt_only:
1464
+ process_info['PROMPT_EXPLAIN_THE_EXPLAIN'] = explain_documentation.messages[0].content
1465
+ logger_safe('info', 'Prompt available in the PROMPT_EXPLAIN_THE_EXPLAIN field.')
1466
+ process_info['EXPLAIN_ANALYSIS'] = None
1467
+ process_info['OPTIMIZATION_SCORE'] = None
1468
+ process_info['EXPLAIN_WARNINGS'] = None
1469
+ process_info['EXPLAIN_RECOMMENDATIONS'] = None
1470
+ else:
1471
+ process_info['EXPLAIN_ANALYSIS'] = explain_documentation['explanation']
1472
+ process_info['OPTIMIZATION_SCORE'] = explain_documentation['optimization_score']
1473
+ process_info['EXPLAIN_WARNINGS'] = explain_documentation['warnings']
1474
+ process_info['EXPLAIN_RECOMMENDATIONS'] = explain_documentation['recommendations']
1475
+
1476
+ # Store the raw EXPLAIN plan if needed for display
1477
+ if show_explain_plan:
1478
+ process_info['RAW_EXPLAIN_PLAN'] = get_the_explain(process_info['PROCESS_SQL'])
1411
1479
 
1412
1480
  # Upload the generated documentation to the documentation tables:
1413
- if upload:
1481
+ if upload and prompt_only == False:
1414
1482
  upload_documentation(process_info)
1415
1483
  logger_safe('info', f'document_process: Uploaded documentation for process_id {process_id} to documentation tables.')
1484
+
1485
+ if upload and prompt_only == False:
1416
1486
  upload_documentation_explain(process_info)
1417
1487
  logger_safe('info', f'document_process: Uploaded EXPLAIN analysis for process_id {process_id} to documentation tables.')
1418
1488
 
@@ -1545,7 +1615,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
1545
1615
  logger_safe('info', f'upload_documentation: Uploading documentation for process_id {process_id} into staging tables.')
1546
1616
  tdml.copy_to_sql(
1547
1617
  df_business_logic,
1548
- table_name = "DOCUMENTATION_PROCESS_BUSINESS_LOGIC_STAGING",
1618
+ table_name = "DOC_PROCESS_BUSINESS_LOGIC_STAGING",
1549
1619
  if_exists = 'replace',
1550
1620
  temporary = True
1551
1621
  )
@@ -1555,7 +1625,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
1555
1625
  logger_safe('info', f'upload_documentation: Uploading feature documentation for process_id {process_id} into staging tables.')
1556
1626
  tdml.copy_to_sql(
1557
1627
  df_features,
1558
- table_name = "DOCUMENTATION_PROCESS_FEATURES_STAGING",
1628
+ table_name = "DOC_PROCESS_FEATURES_STAGING",
1559
1629
  if_exists = 'replace',
1560
1630
  temporary = True
1561
1631
  )
@@ -1571,7 +1641,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
1571
1641
  BUSINESS_LOGIC_DESCRIPTION,
1572
1642
  ENTITY_DESCRIPTION,
1573
1643
  ENTITY_COLUMNS_JSON
1574
- FROM {_get_database_username()}.DOCUMENTATION_PROCESS_BUSINESS_LOGIC_STAGING
1644
+ FROM {_get_database_username()}.DOC_PROCESS_BUSINESS_LOGIC_STAGING
1575
1645
  ) UPDATED
1576
1646
  ON EXISTING.PROCESS_ID = UPDATED.PROCESS_ID
1577
1647
  WHEN MATCHED THEN
@@ -1599,7 +1669,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
1599
1669
  FC.FEATURE_ID,
1600
1670
  A.FEATURE_NAME,
1601
1671
  A.FEATURE_DESCRIPTION
1602
- FROM {_get_database_username()}.DOCUMENTATION_PROCESS_FEATURES_STAGING A
1672
+ FROM {_get_database_username()}.DOC_PROCESS_FEATURES_STAGING A
1603
1673
  INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} FC
1604
1674
  ON UPPER(FC.FEATURE_NAME) = UPPER(A.FEATURE_NAME)
1605
1675
  AND UPPER(FC.DATA_DOMAIN) = '{process_info['DATA_DOMAIN'].upper()}'
@@ -1627,7 +1697,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
1627
1697
  WHERE PROCESS_ID = '{process_id}'
1628
1698
  AND FEATURE_ID NOT IN (
1629
1699
  SELECT FC.FEATURE_ID
1630
- FROM {_get_database_username()}.DOCUMENTATION_PROCESS_FEATURES_STAGING A
1700
+ FROM {_get_database_username()}.DOC_PROCESS_FEATURES_STAGING A
1631
1701
  INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} FC
1632
1702
  ON UPPER(FC.FEATURE_NAME) = UPPER(A.FEATURE_NAME)
1633
1703
  AND UPPER(FC.DATA_DOMAIN) = '{process_info['DATA_DOMAIN'].upper()}'
@@ -1658,8 +1728,8 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
1658
1728
  raise
1659
1729
 
1660
1730
  # remove staging tables
1661
- tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOCUMENTATION_PROCESS_BUSINESS_LOGIC_STAGING")
1662
- tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOCUMENTATION_PROCESS_FEATURES_STAGING")
1731
+ tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOC_PROCESS_BUSINESS_LOGIC_STAGING")
1732
+ tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOC_PROCESS_FEATURES_STAGING")
1663
1733
  logger_safe('info', f'upload_documentation: Successfully uploaded documentation for process_id {process_id}.')
1664
1734
 
1665
1735
  return
@@ -1875,4 +1945,113 @@ def display_process_info(process_info: Dict[str, Any] = None, process_id : str =
1875
1945
  explain_recommendations = process_info.get('EXPLAIN_RECOMMENDATIONS', None),
1876
1946
  sql_query = process_info.get('PROCESS_SQL', None),
1877
1947
  )
1878
- return
1948
+ return
1949
+
1950
+
1951
+ def feed_process_info_with_prompt_result(process_info, sql_documentation_response=None, sql_explain_response=None, display_info=True, upload_info=True):
1952
+ """
1953
+ Enriches a process_info dictionary with SQL documentation and EXPLAIN plan analysis results,
1954
+ with options to display the results and upload the enriched information.
1955
+
1956
+ This function integrates the results of SQL documentation and EXPLAIN plan analysis into the provided
1957
+ `process_info` dictionary. It extracts and organizes documentation for entity and feature columns,
1958
+ as well as optimization insights, to provide a comprehensive view of the SQL query's business logic,
1959
+ performance, and potential improvements. It also supports optional display of the enriched information
1960
+ and automatic upload of the documentation and EXPLAIN analysis to a backend system.
1961
+
1962
+ Args:
1963
+ process_info (dict): A dictionary containing metadata about the SQL process, including:
1964
+ - 'ENTITY_COLUMNS': List of columns representing the entity in the SQL query.
1965
+ - 'FEATURE_COLUMNS': List of columns representing features in the SQL query.
1966
+ sql_documentation_response (dict, optional): A dictionary containing SQL documentation results,
1967
+ including descriptions for entity/feature columns and query business logic. Expected keys:
1968
+ - 'query_business_logic': Description of the query's purpose and logic.
1969
+ - 'entity_description': Description of the entity represented by the query.
1970
+ - Column names as keys, with their descriptions as values.
1971
+ sql_explain_response (dict, optional): A dictionary containing SQL EXPLAIN plan analysis results,
1972
+ including:
1973
+ - 'explanation': Detailed analysis of the EXPLAIN plan.
1974
+ - 'optimization_score': Integer score (1-5) indicating query optimization level.
1975
+ - 'warnings': List of potential issues identified in the EXPLAIN plan.
1976
+ - 'recommendations': List of actionable recommendations for query optimization.
1977
+ display_info (bool, optional): If True, displays the enriched process_info using `display_process_info`.
1978
+ Defaults to True.
1979
+ upload_info (bool, optional): If True, uploads the enriched documentation and EXPLAIN analysis to a backend system.
1980
+ Defaults to True.
1981
+
1982
+ Returns:
1983
+ dict: The enriched `process_info` dictionary with the following additional keys (if input responses are provided):
1984
+ - 'DOCUMENTED_SQL': Business logic description of the SQL query.
1985
+ - 'ENTITY_DESCRIPTION': Description of the entity represented by the query.
1986
+ - 'DOCUMENTED_ENTITY_COLUMNS': Dictionary of documented entity columns and their descriptions.
1987
+ - 'DOCUMENTED_FEATURE_COLUMNS': Dictionary of documented feature columns and their descriptions.
1988
+ - 'EXPLAIN_ANALYSIS': Analysis of the EXPLAIN plan.
1989
+ - 'OPTIMIZATION_SCORE': Optimization score (1-5) for the query.
1990
+ - 'EXPLAIN_WARNINGS': List of warnings from the EXPLAIN plan analysis.
1991
+ - 'EXPLAIN_RECOMMENDATIONS': List of optimization recommendations.
1992
+
1993
+ Raises:
1994
+ Logs errors for any exceptions encountered during the update or upload process, but does not raise them.
1995
+ Errors are logged using `logger_safe` with a descriptive message.
1996
+
1997
+ Example:
1998
+ >>> process_info = {
1999
+ ... 'ENTITY_COLUMNS': ['customer_id', 'order_id'],
2000
+ ... 'FEATURE_COLUMNS': ['order_amount', 'order_date']
2001
+ ... }
2002
+ >>> sql_documentation_response = {
2003
+ ... 'query_business_logic': 'This query joins customer and order data...',
2004
+ ... 'entity_description': 'The customer entity represents...',
2005
+ ... 'customer_id': 'Unique identifier for customers.',
2006
+ ... 'order_amount': 'Total amount of the order.'
2007
+ ... }
2008
+ >>> sql_explain_response = {
2009
+ ... 'explanation': 'The EXPLAIN plan shows a nested loop join...',
2010
+ ... 'optimization_score': 3,
2011
+ ... 'warnings': ['Full table scan on orders table'],
2012
+ ... 'recommendations': ['Add index on orders.customer_id']
2013
+ ... }
2014
+ >>> enriched_info = feed_process_info_with_prompt_result(
2015
+ ... process_info,
2016
+ ... sql_documentation_response,
2017
+ ... sql_explain_response,
2018
+ ... display_info=True,
2019
+ ... upload_info=True
2020
+ ... )
2021
+ >>> print(enriched_info.keys())
2022
+ ['ENTITY_COLUMNS', 'FEATURE_COLUMNS', 'DOCUMENTED_SQL', 'ENTITY_DESCRIPTION',
2023
+ 'DOCUMENTED_ENTITY_COLUMNS', 'DOCUMENTED_FEATURE_COLUMNS', 'EXPLAIN_ANALYSIS',
2024
+ 'OPTIMIZATION_SCORE', 'EXPLAIN_WARNINGS', 'EXPLAIN_RECOMMENDATIONS']
2025
+ """
2026
+
2027
+ entity_columns = process_info['ENTITY_COLUMNS']
2028
+ feature_columns = process_info['FEATURE_COLUMNS']
2029
+
2030
+ if sql_documentation_response is not None:
2031
+ try:
2032
+ process_info['DOCUMENTED_SQL'] = sql_documentation_response['query_business_logic']
2033
+ process_info['ENTITY_DESCRIPTION'] = sql_documentation_response['entity_description']
2034
+ process_info['DOCUMENTED_ENTITY_COLUMNS'] = {k: v for k, v in sql_documentation_response.items() if k in entity_columns}
2035
+ process_info['DOCUMENTED_FEATURE_COLUMNS'] = {k: v for k, v in sql_documentation_response.items() if k in feature_columns}
2036
+ logger_safe('info', 'update of the SQL documentation in process_info')
2037
+ if upload_info:
2038
+ upload_documentation(process_info)
2039
+ except Exception as e:
2040
+ logger_safe('error',f"error in updating the SQL documentation : {str(e).split('\n')[0]}")
2041
+
2042
+ if sql_explain_response is not None:
2043
+ try:
2044
+ process_info['EXPLAIN_ANALYSIS'] = sql_explain_response['explanation']
2045
+ process_info['OPTIMIZATION_SCORE'] = sql_explain_response['optimization_score']
2046
+ process_info['EXPLAIN_WARNINGS'] = sql_explain_response['warnings']
2047
+ process_info['EXPLAIN_RECOMMENDATIONS'] = sql_explain_response['recommendations']
2048
+ logger_safe('info', 'update of the EXPLAIN documentation in process_info')
2049
+ if upload_info:
2050
+ upload_documentation_explain(process_info)
2051
+ except Exception as e:
2052
+ logger_safe('error',f"error in updating the EXPLAIN documentation : {str(e).split('\n')[0]}")
2053
+
2054
+ if display_info:
2055
+ display_process_info(process_info)
2056
+
2057
+ return process_info
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tdfs4ds
3
- Version: 0.2.5.2
3
+ Version: 0.2.5.4
4
4
  Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
5
5
  Author: Denis Molin
6
6
  Requires-Python: >=3.6
@@ -12,6 +12,7 @@ Requires-Dist: plotly
12
12
  Requires-Dist: tqdm
13
13
  Requires-Dist: networkx
14
14
  Requires-Dist: sqlparse
15
+ Requires-Dist: langchain-openai
15
16
 
16
17
  ![tdfs4ds logo](https://github.com/denismolin/tdfs4ds/raw/main/tdfs4ds_logo.png)
17
18
 
@@ -1,4 +1,4 @@
1
- tdfs4ds/__init__.py,sha256=NeD8Lf1HwfqupVOzESCydySlk_TdvlQbJflg7MZTKm8,70555
1
+ tdfs4ds/__init__.py,sha256=gh7Uv7WmkSjxqbxfDKnemioQrywtwLEGU4XEUElI4VQ,70555
2
2
  tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
3
3
  tdfs4ds/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
4
4
  tdfs4ds/data/logo/tdfs4ds_logo.png,sha256=OCKQnH0gQbRyupwZeiIgo-9c6mdRtjE2E2Zunr_4Ae0,363980
@@ -9,11 +9,11 @@ tdfs4ds/dataset/dataset.py,sha256=J_fgfsVdR9zSOXrUOqyotqsUD-GlQMGyuld6ueov45w,76
9
9
  tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
10
10
  tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
11
11
  tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
12
- tdfs4ds/feature_store/feature_data_processing.py,sha256=gXBsr1H05zxM4tWE7y29ucxeoTu1jQITOwTXqi1Y2pk,45214
12
+ tdfs4ds/feature_store/feature_data_processing.py,sha256=mC58pmxIeJ7Sdw-IUvx-ToSDa6D6OBRq8MPvbmp33G0,46214
13
13
  tdfs4ds/feature_store/feature_query_retrieval.py,sha256=51c6ZNlLFiBIxNPinS8ot8bjWEIb1QV2eVg69yzVF80,35381
14
- tdfs4ds/feature_store/feature_store_management.py,sha256=mtPQkdMDhcOrhj9IAaH-FEP_znK53cYtEv8zXAbsigg,52123
15
- tdfs4ds/genai/__init__.py,sha256=Hal13Kw75nDYKHtfvHZNdm98exqmY6qaqGZkJA2TQ6E,723
16
- tdfs4ds/genai/documentation.py,sha256=9BOqV7F4XVBDF8SYU6W8TRsRnDvIxR8CV4bauVimSe0,82056
14
+ tdfs4ds/feature_store/feature_store_management.py,sha256=qsazxRC4jxBwfwNYpRhrDLDBtnq2BfePTQ31vmDFH_o,52190
15
+ tdfs4ds/genai/__init__.py,sha256=Os1NpNPNr1h5-25xt_jckIqImI3jDMxjxUvM7TqEXzE,811
16
+ tdfs4ds/genai/documentation.py,sha256=rcGPupWpVSG8vhGjk_AWcHarvaImM9XEBkxJYiy5SK0,92244
17
17
  tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
18
18
  tdfs4ds/process_store/process_followup.py,sha256=E4jgQahjhVRBbfAW3JXNLId7H5qV8ozRt-6PyAQuPzg,12583
19
19
  tdfs4ds/process_store/process_query_administration.py,sha256=AOufkJ6DFUpBiGm-6Q6Dq0Aovw31UGTscZ3Ya0ewS-0,7851
@@ -26,7 +26,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
26
26
  tdfs4ds/utils/query_management.py,sha256=kWDeTdsYcbpV5Tyhh-8uLRWvXh16nIdXNIJ97w76aNU,4848
27
27
  tdfs4ds/utils/time_management.py,sha256=g3EJO7I8ERoZ4X7yq5SyDqSE4O9p0BRcv__QPuAxbGA,32243
28
28
  tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
29
- tdfs4ds-0.2.5.2.dist-info/METADATA,sha256=qADkn9deR_9Yo-IRA5J7VHE1KNcgwL2qNCKFA3DIg_o,14325
30
- tdfs4ds-0.2.5.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
31
- tdfs4ds-0.2.5.2.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
32
- tdfs4ds-0.2.5.2.dist-info/RECORD,,
29
+ tdfs4ds-0.2.5.4.dist-info/METADATA,sha256=2pdrLXw7n-nNTfy3Qw9bt8COOfp7LkzjgqI7IupivR0,14358
30
+ tdfs4ds-0.2.5.4.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
31
+ tdfs4ds-0.2.5.4.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
32
+ tdfs4ds-0.2.5.4.dist-info/RECORD,,