PyPI - tdfs4ds - Versions diffs - 0.2.5.2__py3-none-any.whl → 0.2.5.4__py3-none-any.whl - Mend

tdfs4ds 0.2.5.2py3-none-any.whl → 0.2.5.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

tdfs4ds/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = '0.2.5.2'
+__version__ = '0.2.5.4'
 import difflib
 import logging
 import json

tdfs4ds/feature_store/feature_data_processing.py CHANGED Viewed

@@ -285,13 +285,37 @@ def prepare_feature_ingestion(df, entity_id, feature_names, feature_versions=Non
         logger_safe("debug", "nested_query=%s", nested_query)
     # Execute: create volatile table and test unicity
+    query_create_volatile = f"""
+    CREATE VOLATILE TABLE {volatile_table_name} AS
+    (
+    {nested_query}
+    ) WITH DATA
+    PRIMARY INDEX ({primary_index})
+    ON COMMIT PRESERVE ROWS
+    """
     try:
+        tdml.execute_sql(f"DROP TABLE {_get_database_username()}.{volatile_table_name}")
+        logger_safe('info', 'drop volatile table')
+    except Exception as e:
+        logger_safe('info', 'volatile table does not exists yet')
+    try:
+        tdml.execute_sql(query_create_volatile)
+        logger_safe('info', 'results calculated and materialized in a volatile table')
+    except Exception as e:
+        logger_safe('error', f"query execution failed : {str(e).split('\n')[0]}")
+        raise
+    if False:
         tdml.DataFrame.from_query(nested_query).to_sql(
             table_name    = volatile_table_name,
             temporary     = True,
             primary_index = primary_index.split(','),
             if_exists     = 'replace'
         )
+    try:
         nb_duplicates = tdml.execute_sql(query_test_unicity).fetchall()[0][0]
         if nb_duplicates is not None and nb_duplicates > 0:
             logger_safe("error", "The process generates %s duplicates", nb_duplicates)
@@ -994,11 +1018,17 @@ def prepare_feature_ingestion_tdstone2(df, entity_id):
     {volatile_expression}
     """
     # Execute the SQL query to create the volatile table.
+    try:
+        tdml.execute_sql(f"DROP TABLE {_get_database_username()}.{volatile_table_name}")
+    except Exception as e:
+        logger_safe('info','the VOLATILE table does not exist and will be created')
+        pass
     try:
         tdml.execute_sql(query)
     except Exception as e:
         if tdfs4ds.DISPLAY_LOGS:
-            print(str(e).split('\n')[0])
+            logger_safe('debug',str(e).split('\n')[0])
         tdml.execute_sql(f'DELETE {volatile_table_name}')
     # Optionally print the query if the display flag is set.

tdfs4ds/feature_store/feature_store_management.py CHANGED Viewed

@@ -1005,20 +1005,21 @@ def delete_feature(feature_name, entity_id, data_domain=None):
     if tdfs4ds.DEBUG_MODE:
         print('table name : ', table_name)
-    query = f"""
-    NONSEQUENCED VALIDTIME DELETE {table_name}
-    WHERE FEATURE_ID = (
-        SEL FEATURE_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
-        WHERE FEATURE_NAME = '{feature_name}'
-        AND DATA_DOMAIN = '{data_domain}'
-    )"""
-    if tdfs4ds.DEBUG_MODE:
-        print(query)
+    if False:
+        query = f"""
+        NONSEQUENCED VALIDTIME DELETE {table_name}
+        WHERE FEATURE_ID = (
+            SEL FEATURE_ID FROM {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME_VIEW}
+            WHERE FEATURE_NAME = '{feature_name}'
+            AND DATA_DOMAIN = '{data_domain}'
+        )"""
+        if tdfs4ds.DEBUG_MODE:
+            print(query)
-    try:
-        tdml.execute_sql(query)
-    except Exception as e:
-        print(str(e).split('\n')[0])
+        try:
+            tdml.execute_sql(query)
+        except Exception as e:
+            print(str(e).split('\n')[0])
     return

tdfs4ds/genai/__init__.py CHANGED Viewed

@@ -9,7 +9,8 @@ from .documentation import (
     run_sql_documentation,
     build_llm,
     get_the_explain,
-    display_process_info
+    display_process_info,
+    feed_process_info_with_prompt_result
 )
 __all__ = [
@@ -23,5 +24,6 @@ __all__ = [
     "run_sql_documentation",
     "build_llm",
     "get_the_explain",
-    "display_process_info"
+    "display_process_info",
+    "feed_process_info_with_prompt_result"
 ]

tdfs4ds/genai/documentation.py CHANGED Viewed

@@ -342,8 +342,20 @@ def _print_documentation(
     if _is_notebook():
         title_html = f"<h2>{title}</h2>" if title else ""
-        entity_items = '\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>' for col, desc in documented_entity_columns.items())
-        feature_items = '\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>' for col, desc in documented_feature_columns.items())
+        entity_items = (
+            '\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>'
+                    for col, desc in documented_entity_columns.items())
+            if documented_entity_columns is not None
+            else "<li><em>No entity columns documented.</em></li>"
+            )
+        feature_items = (
+            '\n'.join(f'<li><strong>{col}:</strong> {_md_to_html(desc)}</li>'
+                    for col, desc in documented_feature_columns.items())
+            if documented_feature_columns is not None
+            else "<li><em>No feature columns documented.</em></li>"
+        )
         # Build optional sections
         sql_section = ""
@@ -614,13 +626,13 @@ def build_documentation_json_schema(columns: List[str], provider: str = "generic
     # Fallback: generic JSON schema
     return base_schema
 def build_sql_documentation_chain(
     llm: ChatOpenAI,
     entity_columns: Sequence[str],
     feature_columns: Sequence[str],
     provider: str = "vllm",
     json_constraint: bool = True,
+    prompt_only: bool = False
 ) -> Runnable:
     """
     Build a LangChain Runnable that generates business-focused documentation
@@ -654,25 +666,32 @@ def build_sql_documentation_chain(
         If False:
             - the chain does not enforce JSON structure at the LLM level
             - the model is only guided by the prompt (weaker guarantees)
+    prompt_only : bool, optional (default=False)
+        If True:
+            - returns only the prompt template, without attaching the LLM or parser
+            - useful for debugging, testing, or customizing the prompt before execution
+        If False:
+            - returns the full chain: prompt → LLM (optionally schema-guided) → JSON parser
     Returns
     -------
     Runnable
-        A LangChain Runnable that executes:
-            prompt → LLM (optionally schema-guided) → JSON parser
-        When invoked with:
-            {
-                "sql_query": "...",
-                "columns_str": "Entity columns:\n- column1\n\nFeature columns:\n- column2\n..."
-            }
-        It returns:
-            dict[str, str]
-                A mapping of each requested column name to a short,
-                business-oriented description (≤ 5 sentences), plus a 'query_business_logic' key
-                containing a high-level description of the query's business logic (5-10 sentences), and an 'entity_description' key
-                with a holistic description of the entity (3-5 sentences).
+        If prompt_only=False:
+            A LangChain Runnable that executes:
+                prompt → LLM (optionally schema-guided) → JSON parser
+            When invoked with:
+                {
+                    "sql_query": "...",
+                    "columns_str": "Entity columns:\n- column1\n\nFeature columns:\n- column2\n..."
+                }
+            It returns:
+                dict[str, str]
+                    A mapping of each requested column name to a short,
+                    business-oriented description (≤ 5 sentences), plus a 'query_business_logic' key
+                    containing a high-level description of the query's business logic (5-10 sentences), and an 'entity_description' key
+                    with a holistic description of the entity (3-5 sentences).
+        If prompt_only=True:
+            The prompt template itself, for inspection or further customization.
     Notes
     -----
@@ -749,7 +768,10 @@ Columns to document (only document these):
         raw = ai_msg.content
         return parser.parse(raw)
-    return prompt | constrained_llm | RunnableLambda(_parse)
+    if prompt_only:
+        return prompt
+    else:
+        return prompt | constrained_llm | RunnableLambda(_parse)
 def run_sql_documentation(
     chain: Runnable,
@@ -825,7 +847,10 @@ def run_sql_documentation(
             "columns_str": columns_str,
             "language" : language
         })
-        logger_safe('info', f'run_sql_documentation: Successfully generated documentation for columns: {list(result.keys())}')
+        if isinstance(result, dict):
+            logger_safe('info', f'run_sql_documentation: Successfully generated documentation for columns: {list(result.keys())}')
+        else:
+            logger_safe('info', f'run_sql_documentation: Successfully generated documentation prompt')
         return result
     except Exception as e:
         logger_safe('error', f'run_sql_documentation: Failed to generate documentation: {e}')
@@ -839,6 +864,7 @@ def document_sql_query_columns(
     language: str = "English",
     provider: Optional[str] = None,
     json_constraint: bool = True,
+    prompt_only: bool = False
 ) -> Dict[str, Any]:
     """
     Convenience function to generate business-focused documentation for SQL query output columns
@@ -886,20 +912,30 @@ def document_sql_query_columns(
             - the chain does not enforce JSON structure at the LLM level
             - the model is only guided by the prompt (weaker guarantees)
+    prompt_only : bool, optional (default=False)
+        If True:
+            - returns only the prompt template, without executing the chain
+            - useful for debugging, testing, or customizing the prompt before execution
+        If False:
+            - executes the full chain and returns structured documentation
     Returns
     -------
     dict
-        A dictionary with four keys:
-        - "query_business_logic": str containing the high-level business logic description of the query
-        - "entity_description": str containing the holistic description of the entity
-        - "entity_columns": dict[str, str] mapping each entity column name to its description
-        - "feature_columns": dict[str, str] mapping each feature column name to its description
+        If prompt_only=False:
+            A dictionary with four keys:
+                - "query_business_logic": str containing the high-level business logic description of the query
+                - "entity_description": str containing the holistic description of the entity
+                - "entity_columns": dict[str, str] mapping each entity column name to its description
+                - "feature_columns": dict[str, str] mapping each feature column name to its description
+        If prompt_only=True:
+            The prompt template itself, for inspection or further customization.
     Raises
     ------
     ValueError
         If any of the required tdfs4ds configuration variables (INSTRUCT_MODEL_URL,
-        INSTRUCT_MODEL_API_KEY, INSTRUCT_MODEL_MODEL) are not set.
+        INSTRUCT_MODEL_API_KEY, INSTRUCT_MODEL_MODEL, INSTRUCT_MODEL_PROVIDER) are not set.
     Notes
     -----
@@ -931,30 +967,35 @@ def document_sql_query_columns(
     )
     # Build the documentation chain
-    sql_doc_chain = build_sql_documentation_chain(llm, entity_columns, feature_columns, provider=provider, json_constraint=json_constraint)
+    sql_doc_chain = build_sql_documentation_chain(llm, entity_columns, feature_columns, provider=provider, json_constraint=json_constraint, prompt_only=prompt_only)
     # Run the documentation
     result = run_sql_documentation(sql_doc_chain, sql_query, entity_columns, feature_columns, language=language)
-    # Separate entity columns, feature columns, entity description, and query logic
-    entity_docs = {k: v for k, v in result.items() if k in entity_columns}
-    feature_docs = {k: v for k, v in result.items() if k in feature_columns}
-    entity_desc = result.get("entity_description", "")
-    query_logic = result.get("query_business_logic", "")
+    if prompt_only:
+        logger_safe('info', f'document_sql_query_columns: Successfully generated the prompt to be used with a LLM to generate the documentation')
+        return result
+    else:
+        # Separate entity columns, feature columns, entity description, and query logic
+        entity_docs = {k: v for k, v in result.items() if k in entity_columns}
+        feature_docs = {k: v for k, v in result.items() if k in feature_columns}
+        entity_desc = result.get("entity_description", "")
+        query_logic = result.get("query_business_logic", "")
-    logger_safe('info', f'document_sql_query_columns: Successfully completed documentation for {len(entity_docs)} entity columns, {len(feature_docs)} feature columns, entity description and query logic')
-    return {
-        "query_business_logic": query_logic,
-        "entity_description": entity_desc,
-        "entity_columns": entity_docs,
-        "feature_columns": feature_docs
-    }
+        logger_safe('info', f'document_sql_query_columns: Successfully completed documentation for {len(entity_docs)} entity columns, {len(feature_docs)} feature columns, entity description and query logic')
+        return {
+            "query_business_logic": query_logic,
+            "entity_description": entity_desc,
+            "entity_columns": entity_docs,
+            "feature_columns": feature_docs
+        }
 def build_explain_documentation_chain(
     llm: ChatOpenAI,
     provider: str = "vllm",
     json_constraint: bool = True,
+    prompt_only: bool = False
 ) -> Runnable:
     """
     Build a LangChain Runnable that analyzes SQL EXPLAIN plans and generates
@@ -1112,7 +1153,10 @@ Return ONLY valid JSON with the four keys above.
         raw = ai_msg.content
         return parser.parse(raw)
-    return prompt | constrained_llm | RunnableLambda(_parse)
+    if prompt_only:
+        return prompt
+    else:
+        return prompt | constrained_llm | RunnableLambda(_parse)
 def run_explain_documentation(
@@ -1144,7 +1188,10 @@ def run_explain_documentation(
             "sql_query": sql_query,
             "explain_plan": explain_plan
         })
-        logger_safe('info', f'run_explain_documentation: Successfully analyzed EXPLAIN plan. Score: {result.get("optimization_score", "N/A")}/5')
+        if isinstance(result, dict):
+            logger_safe('info', f'run_explain_documentation: Successfully analyzed EXPLAIN plan. Score: {result.get("optimization_score", "N/A")}/5')
+        else:
+            logger_safe('info', 'run_explain_documentation: Successfully generated the prompt to be used with a LLM to generate the documentation')
         return result
     except Exception as e:
         logger_safe('error', f'run_explain_documentation: Failed to analyze EXPLAIN plan: {e}')
@@ -1155,6 +1202,7 @@ def document_sql_query_explain(
     sql_query: str,
     provider: Optional[str] = None,
     json_constraint: bool = True,
+    prompt_only: bool = False
 ) -> Dict[str, Any]:
     """
     Analyze a SQL query's EXPLAIN plan and return optimization recommendations.
@@ -1213,10 +1261,13 @@ def document_sql_query_explain(
     # get the explain plan:
     explain_plan = get_the_explain(sql_query)
     # Build and run the EXPLAIN analysis chain
-    explain_chain = build_explain_documentation_chain(llm, provider=provider, json_constraint=json_constraint)
+    explain_chain = build_explain_documentation_chain(llm, provider=provider, json_constraint=json_constraint, prompt_only = prompt_only)
     result = run_explain_documentation(explain_chain, sql_query, explain_plan)
-    logger_safe('info', f'document_sql_query_explain: Successfully completed EXPLAIN analysis. Score: {result.get("optimization_score", "N/A")}/5')
+    if prompt_only:
+        logger_safe('info', f'document_sql_query_explain: Successfully completed EXPLAIN prompt generation')
+    else:
+        logger_safe('info', f'document_sql_query_explain: Successfully completed EXPLAIN analysis. Score: {result.get("optimization_score", "N/A")}/5')
     return result
 def documentation_tables_creation():
@@ -1313,7 +1364,7 @@ def documentation_tables_creation():
     logger_safe('info', 'documentation_tables_creation: Documentation tables creation process completed.')
     return
-def document_process(process_id: str, language: str = "English", json_constraint: bool = True, show_sql_query: bool = False, show_explain_plan: bool = False, display: bool = True, upload: bool = True) -> Optional[Dict[str, Any]]:
+def document_process(process_id: str, language: str = "English", json_constraint: bool = True, show_sql_query: bool = False, show_explain_plan: bool = False, display: bool = True, upload: bool = True, prompt_only = False) -> Optional[Dict[str, Any]]:
     """
     Generate and store documentation for a data process identified by process_id.
     This function retrieves the SQL query and output columns for the process,
@@ -1387,32 +1438,51 @@ def document_process(process_id: str, language: str = "English", json_constraint
     documentation = document_sql_query_columns(
             sql_query       = process_info['PROCESS_SQL'],
             entity_columns  = process_info['ENTITY_COLUMNS'],
-            feature_columns = process_info['FEATURE_COLUMNS']
+            feature_columns = process_info['FEATURE_COLUMNS'],
+            prompt_only     = prompt_only
         )
-    process_info['DOCUMENTED_SQL']             = documentation['query_business_logic']
-    process_info['ENTITY_DESCRIPTION']         = documentation['entity_description']
-    process_info['DOCUMENTED_ENTITY_COLUMNS']  = documentation['entity_columns']
-    process_info['DOCUMENTED_FEATURE_COLUMNS'] = documentation['feature_columns']
-    if True:
-        explain_documentation = document_sql_query_explain(
-                sql_query = process_info['PROCESS_SQL']
-            )
-        process_info['EXPLAIN_ANALYSIS'] = explain_documentation['explanation']
-        process_info['OPTIMIZATION_SCORE'] = explain_documentation['optimization_score']
-        process_info['EXPLAIN_WARNINGS'] = explain_documentation['warnings']
-        process_info['EXPLAIN_RECOMMENDATIONS'] = explain_documentation['recommendations']
-        # Store the raw EXPLAIN plan if needed for display
-        if show_explain_plan:
-            process_info['RAW_EXPLAIN_PLAN'] = get_the_explain(process_info['PROCESS_SQL'])
+    if prompt_only:
+        process_info['PROMPT_BUSINESS_LOGIC_DESCRIPTION'] = documentation.messages[0].content
+        logger_safe('info', 'Prompt available in the PROMPT_BUSINESS_LOGIC_DESCRIPTION field.')
+        process_info['DOCUMENTED_SQL']             = None
+        process_info['ENTITY_DESCRIPTION']         = None
+        process_info['DOCUMENTED_ENTITY_COLUMNS']  = None
+        process_info['DOCUMENTED_FEATURE_COLUMNS'] = None
+    else:
+        process_info['DOCUMENTED_SQL']             = documentation['query_business_logic']
+        process_info['ENTITY_DESCRIPTION']         = documentation['entity_description']
+        process_info['DOCUMENTED_ENTITY_COLUMNS']  = documentation['entity_columns']
+        process_info['DOCUMENTED_FEATURE_COLUMNS'] = documentation['feature_columns']
+    explain_documentation = document_sql_query_explain(
+            sql_query = process_info['PROCESS_SQL'],
+            prompt_only=prompt_only
+        )
+    if prompt_only:
+        process_info['PROMPT_EXPLAIN_THE_EXPLAIN'] = explain_documentation.messages[0].content
+        logger_safe('info', 'Prompt available in the PROMPT_EXPLAIN_THE_EXPLAIN field.')
+        process_info['EXPLAIN_ANALYSIS']           = None
+        process_info['OPTIMIZATION_SCORE']         = None
+        process_info['EXPLAIN_WARNINGS']           = None
+        process_info['EXPLAIN_RECOMMENDATIONS']    = None
+    else:
+        process_info['EXPLAIN_ANALYSIS']           = explain_documentation['explanation']
+        process_info['OPTIMIZATION_SCORE']         = explain_documentation['optimization_score']
+        process_info['EXPLAIN_WARNINGS']           = explain_documentation['warnings']
+        process_info['EXPLAIN_RECOMMENDATIONS']    = explain_documentation['recommendations']
+    # Store the raw EXPLAIN plan if needed for display
+    if show_explain_plan:
+        process_info['RAW_EXPLAIN_PLAN'] = get_the_explain(process_info['PROCESS_SQL'])
     # Upload the generated documentation to the documentation tables:
-    if upload:
+    if upload and prompt_only == False:
         upload_documentation(process_info)
         logger_safe('info', f'document_process: Uploaded documentation for process_id {process_id} to documentation tables.')
+    if upload and prompt_only == False:
         upload_documentation_explain(process_info)
         logger_safe('info', f'document_process: Uploaded EXPLAIN analysis for process_id {process_id} to documentation tables.')
@@ -1545,7 +1615,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
     logger_safe('info', f'upload_documentation: Uploading documentation for process_id {process_id} into staging tables.')
     tdml.copy_to_sql(
         df_business_logic,
-        table_name = "DOCUMENTATION_PROCESS_BUSINESS_LOGIC_STAGING",
+        table_name = "DOC_PROCESS_BUSINESS_LOGIC_STAGING",
         if_exists  = 'replace',
         temporary  = True
     )
@@ -1555,7 +1625,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
     logger_safe('info', f'upload_documentation: Uploading feature documentation for process_id {process_id} into staging tables.')
     tdml.copy_to_sql(
         df_features,
-        table_name = "DOCUMENTATION_PROCESS_FEATURES_STAGING",
+        table_name = "DOC_PROCESS_FEATURES_STAGING",
         if_exists  = 'replace',
         temporary  = True
     )
@@ -1571,7 +1641,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
             BUSINESS_LOGIC_DESCRIPTION,
             ENTITY_DESCRIPTION,
             ENTITY_COLUMNS_JSON
-        FROM {_get_database_username()}.DOCUMENTATION_PROCESS_BUSINESS_LOGIC_STAGING
+        FROM {_get_database_username()}.DOC_PROCESS_BUSINESS_LOGIC_STAGING
     ) UPDATED
     ON EXISTING.PROCESS_ID = UPDATED.PROCESS_ID
     WHEN MATCHED THEN
@@ -1599,7 +1669,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
             FC.FEATURE_ID,
             A.FEATURE_NAME,
             A.FEATURE_DESCRIPTION
-        FROM {_get_database_username()}.DOCUMENTATION_PROCESS_FEATURES_STAGING A
+        FROM {_get_database_username()}.DOC_PROCESS_FEATURES_STAGING A
         INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} FC
         ON UPPER(FC.FEATURE_NAME) = UPPER(A.FEATURE_NAME)
         AND UPPER(FC.DATA_DOMAIN) = '{process_info['DATA_DOMAIN'].upper()}'
@@ -1627,7 +1697,7 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
     WHERE PROCESS_ID = '{process_id}'
     AND FEATURE_ID NOT IN (
         SELECT FC.FEATURE_ID
-        FROM {_get_database_username()}.DOCUMENTATION_PROCESS_FEATURES_STAGING A
+        FROM {_get_database_username()}.DOC_PROCESS_FEATURES_STAGING A
         INNER JOIN {tdfs4ds.SCHEMA}.{tdfs4ds.FEATURE_CATALOG_NAME} FC
         ON UPPER(FC.FEATURE_NAME) = UPPER(A.FEATURE_NAME)
         AND UPPER(FC.DATA_DOMAIN) = '{process_info['DATA_DOMAIN'].upper()}'
@@ -1658,8 +1728,8 @@ def upload_documentation(process_info: Dict[str, Any]) -> None:
         raise
     # remove staging tables
-    tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOCUMENTATION_PROCESS_BUSINESS_LOGIC_STAGING")
-    tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOCUMENTATION_PROCESS_FEATURES_STAGING")
+    tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOC_PROCESS_BUSINESS_LOGIC_STAGING")
+    tdml.execute_sql(f"DROP TABLE {_get_database_username()}.DOC_PROCESS_FEATURES_STAGING")
     logger_safe('info', f'upload_documentation: Successfully uploaded documentation for process_id {process_id}.')
     return
@@ -1875,4 +1945,113 @@ def display_process_info(process_info: Dict[str, Any] = None, process_id : str =
         explain_recommendations    = process_info.get('EXPLAIN_RECOMMENDATIONS', None),
         sql_query                  = process_info.get('PROCESS_SQL', None),
     )
-    return
+    return
+def feed_process_info_with_prompt_result(process_info, sql_documentation_response=None, sql_explain_response=None, display_info=True, upload_info=True):
+    """
+    Enriches a process_info dictionary with SQL documentation and EXPLAIN plan analysis results,
+    with options to display the results and upload the enriched information.
+    This function integrates the results of SQL documentation and EXPLAIN plan analysis into the provided
+    `process_info` dictionary. It extracts and organizes documentation for entity and feature columns,
+    as well as optimization insights, to provide a comprehensive view of the SQL query's business logic,
+    performance, and potential improvements. It also supports optional display of the enriched information
+    and automatic upload of the documentation and EXPLAIN analysis to a backend system.
+    Args:
+        process_info (dict): A dictionary containing metadata about the SQL process, including:
+            - 'ENTITY_COLUMNS': List of columns representing the entity in the SQL query.
+            - 'FEATURE_COLUMNS': List of columns representing features in the SQL query.
+        sql_documentation_response (dict, optional): A dictionary containing SQL documentation results,
+            including descriptions for entity/feature columns and query business logic. Expected keys:
+            - 'query_business_logic': Description of the query's purpose and logic.
+            - 'entity_description': Description of the entity represented by the query.
+            - Column names as keys, with their descriptions as values.
+        sql_explain_response (dict, optional): A dictionary containing SQL EXPLAIN plan analysis results,
+            including:
+            - 'explanation': Detailed analysis of the EXPLAIN plan.
+            - 'optimization_score': Integer score (1-5) indicating query optimization level.
+            - 'warnings': List of potential issues identified in the EXPLAIN plan.
+            - 'recommendations': List of actionable recommendations for query optimization.
+        display_info (bool, optional): If True, displays the enriched process_info using `display_process_info`.
+            Defaults to True.
+        upload_info (bool, optional): If True, uploads the enriched documentation and EXPLAIN analysis to a backend system.
+            Defaults to True.
+    Returns:
+        dict: The enriched `process_info` dictionary with the following additional keys (if input responses are provided):
+            - 'DOCUMENTED_SQL': Business logic description of the SQL query.
+            - 'ENTITY_DESCRIPTION': Description of the entity represented by the query.
+            - 'DOCUMENTED_ENTITY_COLUMNS': Dictionary of documented entity columns and their descriptions.
+            - 'DOCUMENTED_FEATURE_COLUMNS': Dictionary of documented feature columns and their descriptions.
+            - 'EXPLAIN_ANALYSIS': Analysis of the EXPLAIN plan.
+            - 'OPTIMIZATION_SCORE': Optimization score (1-5) for the query.
+            - 'EXPLAIN_WARNINGS': List of warnings from the EXPLAIN plan analysis.
+            - 'EXPLAIN_RECOMMENDATIONS': List of optimization recommendations.
+    Raises:
+        Logs errors for any exceptions encountered during the update or upload process, but does not raise them.
+        Errors are logged using `logger_safe` with a descriptive message.
+    Example:
+        >>> process_info = {
+        ...     'ENTITY_COLUMNS': ['customer_id', 'order_id'],
+        ...     'FEATURE_COLUMNS': ['order_amount', 'order_date']
+        ... }
+        >>> sql_documentation_response = {
+        ...     'query_business_logic': 'This query joins customer and order data...',
+        ...     'entity_description': 'The customer entity represents...',
+        ...     'customer_id': 'Unique identifier for customers.',
+        ...     'order_amount': 'Total amount of the order.'
+        ... }
+        >>> sql_explain_response = {
+        ...     'explanation': 'The EXPLAIN plan shows a nested loop join...',
+        ...     'optimization_score': 3,
+        ...     'warnings': ['Full table scan on orders table'],
+        ...     'recommendations': ['Add index on orders.customer_id']
+        ... }
+        >>> enriched_info = feed_process_info_with_prompt_result(
+        ...     process_info,
+        ...     sql_documentation_response,
+        ...     sql_explain_response,
+        ...     display_info=True,
+        ...     upload_info=True
+        ... )
+        >>> print(enriched_info.keys())
+        ['ENTITY_COLUMNS', 'FEATURE_COLUMNS', 'DOCUMENTED_SQL', 'ENTITY_DESCRIPTION',
+         'DOCUMENTED_ENTITY_COLUMNS', 'DOCUMENTED_FEATURE_COLUMNS', 'EXPLAIN_ANALYSIS',
+         'OPTIMIZATION_SCORE', 'EXPLAIN_WARNINGS', 'EXPLAIN_RECOMMENDATIONS']
+    """
+    entity_columns  = process_info['ENTITY_COLUMNS']
+    feature_columns = process_info['FEATURE_COLUMNS']
+    if sql_documentation_response is not None:
+        try:
+            process_info['DOCUMENTED_SQL']             = sql_documentation_response['query_business_logic']
+            process_info['ENTITY_DESCRIPTION']         = sql_documentation_response['entity_description']
+            process_info['DOCUMENTED_ENTITY_COLUMNS']  = {k: v for k, v in sql_documentation_response.items() if k in entity_columns}
+            process_info['DOCUMENTED_FEATURE_COLUMNS'] = {k: v for k, v in sql_documentation_response.items() if k in feature_columns}
+            logger_safe('info', 'update of the SQL documentation in process_info')
+            if upload_info:
+                upload_documentation(process_info)
+        except Exception as e:
+            logger_safe('error',f"error in updating the SQL documentation : {str(e).split('\n')[0]}")
+    if sql_explain_response is not None:
+        try:
+            process_info['EXPLAIN_ANALYSIS']           = sql_explain_response['explanation']
+            process_info['OPTIMIZATION_SCORE']         = sql_explain_response['optimization_score']
+            process_info['EXPLAIN_WARNINGS']           = sql_explain_response['warnings']
+            process_info['EXPLAIN_RECOMMENDATIONS']    = sql_explain_response['recommendations']
+            logger_safe('info', 'update of the EXPLAIN documentation in process_info')
+            if upload_info:
+                upload_documentation_explain(process_info)
+        except Exception as e:
+            logger_safe('error',f"error in updating the EXPLAIN documentation : {str(e).split('\n')[0]}")
+    if display_info:
+        display_process_info(process_info)
+    return process_info

{tdfs4ds-0.2.5.2.dist-info → tdfs4ds-0.2.5.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tdfs4ds
-Version: 0.2.5.2
+Version: 0.2.5.4
 Summary: A python package to simplify the usage of feature store using Teradata Vantage ...
 Author: Denis Molin
 Requires-Python: >=3.6
@@ -12,6 +12,7 @@ Requires-Dist: plotly
 Requires-Dist: tqdm
 Requires-Dist: networkx
 Requires-Dist: sqlparse
+Requires-Dist: langchain-openai
 ![tdfs4ds logo](https://github.com/denismolin/tdfs4ds/raw/main/tdfs4ds_logo.png)

{tdfs4ds-0.2.5.2.dist-info → tdfs4ds-0.2.5.4.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-tdfs4ds/__init__.py,sha256=NeD8Lf1HwfqupVOzESCydySlk_TdvlQbJflg7MZTKm8,70555
+tdfs4ds/__init__.py,sha256=gh7Uv7WmkSjxqbxfDKnemioQrywtwLEGU4XEUElI4VQ,70555
 tdfs4ds/datasets.py,sha256=LE4Gn0muwdyrIrCrbkE92cnafUML63z1lj5bFIIVzmc,3524
 tdfs4ds/data/curves.csv,sha256=q0Tm-0yu7VMK4lHvHpgi1LMeRq0lO5gJy2Q17brKbEM,112488
 tdfs4ds/data/logo/tdfs4ds_logo.png,sha256=OCKQnH0gQbRyupwZeiIgo-9c6mdRtjE2E2Zunr_4Ae0,363980
@@ -9,11 +9,11 @@ tdfs4ds/dataset/dataset.py,sha256=J_fgfsVdR9zSOXrUOqyotqsUD-GlQMGyuld6ueov45w,76
 tdfs4ds/dataset/dataset_catalog.py,sha256=qxS2thDW2MvsRouSFaX1M0sX2J7IzBAYD8Yf22Tsd5k,16638
 tdfs4ds/feature_store/__init__.py,sha256=a7NPCkpTx40UR5LRErwnskpABG2Vuib7F5wUjaUGCnI,209
 tdfs4ds/feature_store/entity_management.py,sha256=9ltytv3yCTG84NZXBpb1Tlkf9pOxvrNb0MVidU4pwvE,10157
-tdfs4ds/feature_store/feature_data_processing.py,sha256=gXBsr1H05zxM4tWE7y29ucxeoTu1jQITOwTXqi1Y2pk,45214
+tdfs4ds/feature_store/feature_data_processing.py,sha256=mC58pmxIeJ7Sdw-IUvx-ToSDa6D6OBRq8MPvbmp33G0,46214
 tdfs4ds/feature_store/feature_query_retrieval.py,sha256=51c6ZNlLFiBIxNPinS8ot8bjWEIb1QV2eVg69yzVF80,35381
-tdfs4ds/feature_store/feature_store_management.py,sha256=mtPQkdMDhcOrhj9IAaH-FEP_znK53cYtEv8zXAbsigg,52123
-tdfs4ds/genai/__init__.py,sha256=Hal13Kw75nDYKHtfvHZNdm98exqmY6qaqGZkJA2TQ6E,723
-tdfs4ds/genai/documentation.py,sha256=9BOqV7F4XVBDF8SYU6W8TRsRnDvIxR8CV4bauVimSe0,82056
+tdfs4ds/feature_store/feature_store_management.py,sha256=qsazxRC4jxBwfwNYpRhrDLDBtnq2BfePTQ31vmDFH_o,52190
+tdfs4ds/genai/__init__.py,sha256=Os1NpNPNr1h5-25xt_jckIqImI3jDMxjxUvM7TqEXzE,811
+tdfs4ds/genai/documentation.py,sha256=rcGPupWpVSG8vhGjk_AWcHarvaImM9XEBkxJYiy5SK0,92244
 tdfs4ds/process_store/__init__.py,sha256=npHR_xju5ecGmWfYHDyteLwiU3x-cL4HD3sFK_th7xY,229
 tdfs4ds/process_store/process_followup.py,sha256=E4jgQahjhVRBbfAW3JXNLId7H5qV8ozRt-6PyAQuPzg,12583
 tdfs4ds/process_store/process_query_administration.py,sha256=AOufkJ6DFUpBiGm-6Q6Dq0Aovw31UGTscZ3Ya0ewS-0,7851
@@ -26,7 +26,7 @@ tdfs4ds/utils/lineage.py,sha256=gy5M42qy5fvdWmlohAY3WPYoqAyp5VakeEmeT1YjrJQ,3783
 tdfs4ds/utils/query_management.py,sha256=kWDeTdsYcbpV5Tyhh-8uLRWvXh16nIdXNIJ97w76aNU,4848
 tdfs4ds/utils/time_management.py,sha256=g3EJO7I8ERoZ4X7yq5SyDqSE4O9p0BRcv__QPuAxbGA,32243
 tdfs4ds/utils/visualization.py,sha256=5S528KoKzzkrAdCxfy7ecyqKvAXBoibNvHwz_u5ISMs,23167
-tdfs4ds-0.2.5.2.dist-info/METADATA,sha256=qADkn9deR_9Yo-IRA5J7VHE1KNcgwL2qNCKFA3DIg_o,14325
-tdfs4ds-0.2.5.2.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
-tdfs4ds-0.2.5.2.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
-tdfs4ds-0.2.5.2.dist-info/RECORD,,
+tdfs4ds-0.2.5.4.dist-info/METADATA,sha256=2pdrLXw7n-nNTfy3Qw9bt8COOfp7LkzjgqI7IupivR0,14358
+tdfs4ds-0.2.5.4.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
+tdfs4ds-0.2.5.4.dist-info/top_level.txt,sha256=wMyVkMvnBn8RRt1xBveGQxOpWFijPMPkMiE7G2mi8zo,8
+tdfs4ds-0.2.5.4.dist-info/RECORD,,

{tdfs4ds-0.2.5.2.dist-info → tdfs4ds-0.2.5.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{tdfs4ds-0.2.5.2.dist-info → tdfs4ds-0.2.5.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

tdfs4ds 0.2.5.2__py3-none-any.whl → 0.2.5.4__py3-none-any.whl

tdfs4ds 0.2.5.2py3-none-any.whl → 0.2.5.4py3-none-any.whl