unique_toolkit 1.42.8__py3-none-any.whl → 1.43.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. unique_toolkit/_common/experimental/write_up_agent/README.md +848 -0
  2. unique_toolkit/_common/experimental/write_up_agent/__init__.py +22 -0
  3. unique_toolkit/_common/experimental/write_up_agent/agent.py +170 -0
  4. unique_toolkit/_common/experimental/write_up_agent/config.py +42 -0
  5. unique_toolkit/_common/experimental/write_up_agent/examples/data.csv +13 -0
  6. unique_toolkit/_common/experimental/write_up_agent/examples/example_usage.py +78 -0
  7. unique_toolkit/_common/experimental/write_up_agent/examples/report.md +154 -0
  8. unique_toolkit/_common/experimental/write_up_agent/schemas.py +36 -0
  9. unique_toolkit/_common/experimental/write_up_agent/services/__init__.py +13 -0
  10. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/__init__.py +19 -0
  11. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/exceptions.py +29 -0
  12. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/service.py +150 -0
  13. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/utils.py +130 -0
  14. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/__init__.py +27 -0
  15. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/config.py +56 -0
  16. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/exceptions.py +79 -0
  17. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/config.py +34 -0
  18. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/system_prompt.j2 +15 -0
  19. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/user_prompt.j2 +21 -0
  20. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/service.py +369 -0
  21. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/__init__.py +29 -0
  22. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/default_template.j2 +37 -0
  23. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/exceptions.py +39 -0
  24. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/service.py +191 -0
  25. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/utils.py +182 -0
  26. unique_toolkit/_common/experimental/write_up_agent/utils.py +24 -0
  27. unique_toolkit/agentic/feature_flags/__init__.py +6 -0
  28. unique_toolkit/agentic/feature_flags/feature_flags.py +32 -0
  29. unique_toolkit/agentic/message_log_manager/service.py +88 -12
  30. {unique_toolkit-1.42.8.dist-info → unique_toolkit-1.43.0.dist-info}/METADATA +7 -1
  31. {unique_toolkit-1.42.8.dist-info → unique_toolkit-1.43.0.dist-info}/RECORD +33 -5
  32. {unique_toolkit-1.42.8.dist-info → unique_toolkit-1.43.0.dist-info}/LICENSE +0 -0
  33. {unique_toolkit-1.42.8.dist-info → unique_toolkit-1.43.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,150 @@
1
+ """DataFrame handler service."""
2
+
3
+ import pandas as pd
4
+
5
+ from unique_toolkit._common.experimental.write_up_agent.schemas import GroupData
6
+ from unique_toolkit._common.experimental.write_up_agent.services.dataframe_handler.exceptions import (
7
+ DataFrameGroupingError,
8
+ DataFrameProcessingError,
9
+ DataFrameValidationError,
10
+ )
11
+ from unique_toolkit._common.experimental.write_up_agent.services.dataframe_handler.utils import (
12
+ dataframe_to_dict_records,
13
+ normalize_column_names,
14
+ to_snake_case,
15
+ )
16
+
17
+
18
+ class DataFrameHandler:
19
+ """
20
+ Handles all DataFrame operations.
21
+
22
+ This handler automatically converts all column names to snake_case to ensure
23
+ compatibility with Jinja template syntax. For example:
24
+ - "My Column" becomes "my_column"
25
+ - "UserName" becomes "user_name"
26
+ - "column-name" becomes "column_name"
27
+
28
+ This normalization happens automatically during validation and grouping operations.
29
+
30
+ Responsibilities:
31
+ - Normalize column names to snake_case
32
+ - Validate DataFrame has required columns
33
+ - Create groups from DataFrame
34
+ """
35
+
36
+ def validate_columns(
37
+ self, df: pd.DataFrame, grouping_column: str, selected_columns: list[str]
38
+ ) -> None:
39
+ """
40
+ Validate DataFrame has required columns.
41
+
42
+ NOTE: Column names are automatically converted to snake_case before validation.
43
+ Ensure your template uses snake_case column references (e.g., {{ row.my_column }}).
44
+
45
+ Args:
46
+ df: pandas DataFrame to validate
47
+ grouping_column: Column to group by (should be in snake_case)
48
+ selected_columns: Columns that should exist (should be in snake_case)
49
+
50
+ Raises:
51
+ DataFrameValidationError: If columns are missing after normalization
52
+
53
+ Example:
54
+ >>> df = pd.DataFrame({"My Section": [1], "My Question": [2]})
55
+ >>> handler.validate_columns(df, "my_section", ["my_question"])
56
+ # Validation passes because "My Section" -> "my_section"
57
+ """
58
+ # Normalize DataFrame columns to snake_case
59
+ normalized_df = normalize_column_names(df)
60
+
61
+ required_columns = {grouping_column} | set(selected_columns)
62
+ missing_columns = required_columns - set(normalized_df.columns)
63
+
64
+ if missing_columns:
65
+ raise DataFrameValidationError(
66
+ f"DataFrame missing required columns after snake_case normalization: {sorted(missing_columns)}. "
67
+ f"Available columns: {sorted(normalized_df.columns)}",
68
+ missing_columns=sorted(missing_columns),
69
+ )
70
+
71
+ def create_groups(
72
+ self, df: pd.DataFrame, grouping_column: str, selected_columns: list[str]
73
+ ) -> list[GroupData]:
74
+ """
75
+ Create groups from DataFrame.
76
+
77
+ NOTE: Column names are automatically converted to snake_case.
78
+ Group values (group_key) are kept as-is from the DataFrame (NOT normalized).
79
+
80
+ The returned GroupData instances will have:
81
+ - snake_case column names in their rows
82
+ - Original group_key values from DataFrame
83
+
84
+ IMPORTANT: Groups are returned in the order of their first appearance in the DataFrame,
85
+ NOT sorted alphabetically. This preserves the logical flow of your data.
86
+
87
+ Args:
88
+ df: pandas DataFrame to group
89
+ grouping_column: Column to group by (should be in snake_case)
90
+ selected_columns: Columns to include in rows (should be in snake_case)
91
+
92
+ Returns:
93
+ List of GroupData instances in order of first appearance, each containing
94
+ group_key (in snake_case) and rows with snake_case columns
95
+
96
+ Raises:
97
+ DataFrameGroupingError: If grouping fails
98
+ DataFrameProcessingError: If data processing fails
99
+
100
+ Example:
101
+ >>> df = pd.DataFrame({
102
+ ... "My Section": ["Intro", "Methods", "Results", "Intro"],
103
+ ... "My Question": ["Q1", "Q2", "Q3", "Q4"],
104
+ ... })
105
+ >>> groups = handler.create_groups(df, "my_section", ["my_question"])
106
+ >>> [g.group_key for g in groups]
107
+ ['intro', 'methods', 'results'] # Values normalized to snake_case, order preserved
108
+ """
109
+ # Normalize column names to snake_case
110
+ normalized_df = normalize_column_names(df)
111
+
112
+ if grouping_column not in normalized_df.columns:
113
+ raise DataFrameGroupingError(
114
+ f"Grouping column '{grouping_column}' not found in normalized DataFrame. "
115
+ f"Available columns: {sorted(normalized_df.columns)}",
116
+ grouping_column=grouping_column,
117
+ )
118
+
119
+ try:
120
+ # Use sort=False to preserve the order of first appearance in the DataFrame
121
+ grouped = normalized_df.groupby(grouping_column, sort=False)
122
+ except Exception as e:
123
+ raise DataFrameGroupingError(
124
+ f"Failed to group DataFrame by '{grouping_column}': {e}",
125
+ grouping_column=grouping_column,
126
+ ) from e
127
+
128
+ results = []
129
+
130
+ try:
131
+ for group_key, group_df in grouped:
132
+ # Filter columns if specified
133
+ if selected_columns:
134
+ cols_to_use = [c for c in selected_columns if c in group_df.columns]
135
+ limited_df = group_df.loc[:, cols_to_use]
136
+ else:
137
+ limited_df = group_df
138
+
139
+ # Convert to dict records
140
+ rows = dataframe_to_dict_records(limited_df)
141
+
142
+ # Normalize group_key value to snake_case for consistency with template syntax
143
+ normalized_group_key = to_snake_case(str(group_key))
144
+
145
+ # Create GroupData instance with proper typing
146
+ results.append(GroupData(group_key=normalized_group_key, rows=rows))
147
+ except Exception as e:
148
+ raise DataFrameProcessingError(f"Error processing grouped data: {e}") from e
149
+
150
+ return results
@@ -0,0 +1,130 @@
1
+ """Utility functions for DataFrame operations."""
2
+
3
+ import re
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def to_snake_case(text: str) -> str:
9
+ """
10
+ Convert a string to snake_case.
11
+
12
+ This ensures column names are compatible with Jinja template syntax.
13
+
14
+ Examples:
15
+ >>> to_snake_case("MyColumn")
16
+ 'my_column'
17
+ >>> to_snake_case("my_column")
18
+ 'my_column'
19
+ >>> to_snake_case("My Column Name")
20
+ 'my_column_name'
21
+ >>> to_snake_case("column-name")
22
+ 'column_name'
23
+ >>> to_snake_case("Column_123")
24
+ 'column_123'
25
+
26
+ Args:
27
+ text: String to convert
28
+
29
+ Returns:
30
+ snake_case version of the string
31
+ """
32
+ # Replace spaces and hyphens with underscores
33
+ text = text.replace(" ", "_").replace("-", "_")
34
+
35
+ # Insert underscore before uppercase letters (for camelCase/PascalCase)
36
+ text = re.sub(r"(?<!^)(?=[A-Z])", "_", text)
37
+
38
+ # Convert to lowercase
39
+ text = text.lower()
40
+
41
+ # Remove duplicate underscores
42
+ text = re.sub(r"_+", "_", text)
43
+
44
+ # Remove leading/trailing underscores
45
+ text = text.strip("_")
46
+
47
+ return text
48
+
49
+
50
+ def from_snake_case_to_display_name(text: str) -> str:
51
+ """
52
+ Convert snake_case text back to Title Case for display.
53
+
54
+ Args:
55
+ text: snake_case text to convert
56
+
57
+ Returns:
58
+ Title Case version of the text
59
+
60
+ Example:
61
+ >>> from_snake_case("executive_summary")
62
+ 'Executive Summary'
63
+ >>> from_snake_case("my_column_name")
64
+ 'My Column Name'
65
+ >>> from_snake_case("api_design")
66
+ 'Api Design'
67
+ """
68
+ # Split on underscores and capitalize each word
69
+ words = text.split("_")
70
+ return " ".join(word.capitalize() for word in words)
71
+
72
+
73
+ def normalize_column_names(df: pd.DataFrame) -> pd.DataFrame:
74
+ """
75
+ Convert all DataFrame column names to snake_case.
76
+
77
+ This normalization ensures column names are compatible with Jinja template
78
+ syntax (e.g., {{ row.my_column }} works, but {{ row.My Column }} doesn't).
79
+
80
+ Examples:
81
+ >>> df = pd.DataFrame({"My Column": [1], "AnotherColumn": [2]})
82
+ >>> normalized = normalize_column_names(df)
83
+ >>> list(normalized.columns)
84
+ ['my_column', 'another_column']
85
+
86
+ Args:
87
+ df: Input DataFrame
88
+
89
+ Returns:
90
+ New DataFrame with normalized column names
91
+ """
92
+ ## TODO [UN-16142]: Normalization may lead to duplicate column names, we should handle this case
93
+ normalized_columns = {col: to_snake_case(col) for col in df.columns}
94
+ return df.rename(columns=normalized_columns)
95
+
96
+
97
+ def limit_dataframe_rows(df: pd.DataFrame, max_rows: int) -> pd.DataFrame:
98
+ """
99
+ Limit DataFrame to first N rows.
100
+
101
+ Args:
102
+ df: DataFrame to limit
103
+ max_rows: Maximum number of rows
104
+
105
+ Returns:
106
+ DataFrame with at most max_rows rows
107
+ """
108
+ if len(df) <= max_rows:
109
+ return df.copy()
110
+ return df.head(max_rows).copy()
111
+
112
+
113
+ def dataframe_to_dict_records(
114
+ df: pd.DataFrame, columns: list[str] | None = None
115
+ ) -> list[dict]:
116
+ """
117
+ Convert DataFrame to list of dict records.
118
+
119
+ Args:
120
+ df: DataFrame to convert
121
+ columns: Optional list of columns to include
122
+
123
+ Returns:
124
+ List of dict records
125
+ """
126
+ if columns:
127
+ df = df.loc[:, columns]
128
+
129
+ # Replace NaN with None for better serialization
130
+ return df.where(pd.notna(df), None).to_dict(orient="records")
@@ -0,0 +1,27 @@
1
+ """Generation handler module."""
2
+
3
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.config import (
4
+ GenerationHandlerConfig,
5
+ )
6
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.exceptions import (
7
+ AggregationError,
8
+ BatchCreationError,
9
+ GenerationHandlerError,
10
+ LLMCallError,
11
+ PromptBuildError,
12
+ TokenLimitError,
13
+ )
14
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.service import (
15
+ GenerationHandler,
16
+ )
17
+
18
+ __all__ = [
19
+ "GenerationHandler",
20
+ "GenerationHandlerConfig",
21
+ "GenerationHandlerError",
22
+ "BatchCreationError",
23
+ "PromptBuildError",
24
+ "LLMCallError",
25
+ "AggregationError",
26
+ "TokenLimitError",
27
+ ]
@@ -0,0 +1,56 @@
1
+ """Configuration for generation handler."""
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.prompts.config import (
6
+ GenerationHandlerPromptsConfig,
7
+ )
8
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
9
+ from unique_toolkit._common.validators import LMI, get_LMI_default_field
10
+ from unique_toolkit.language_model.default_language_model import DEFAULT_GPT_4o
11
+
12
+
13
+ class GenerationHandlerConfig(BaseModel):
14
+ """Configuration for generation handler.
15
+
16
+ This configuration controls how groups are batched, how prompts are built,
17
+ and how the LLM is called for generating summaries.
18
+ """
19
+
20
+ model_config = get_configuration_dict()
21
+
22
+ language_model: LMI = get_LMI_default_field(
23
+ DEFAULT_GPT_4o,
24
+ description="The language model to use for generating summaries.",
25
+ )
26
+
27
+ common_instruction: str = Field(
28
+ default="You are a technical writer. Summarize the provided content concisely and clearly.",
29
+ description="Common instruction applied to all groups",
30
+ )
31
+
32
+ # TODO [UN-16142]: Add default instructions for each group
33
+ group_specific_instructions: dict[str, str] = Field(
34
+ default_factory=dict,
35
+ description=(
36
+ "Custom instructions per group. "
37
+ "Keys should be formatted as 'column:value' (e.g., 'section:Introduction')"
38
+ ),
39
+ )
40
+
41
+ max_tokens_per_batch: int = Field(
42
+ default=4000,
43
+ ge=100,
44
+ description="Maximum tokens per batch for LLM input (affects batching strategy)",
45
+ )
46
+
47
+ max_rows_per_batch: int = Field(
48
+ default=20,
49
+ ge=1,
50
+ description="Maximum rows per batch (secondary limit to tokens)",
51
+ )
52
+
53
+ prompts_config: GenerationHandlerPromptsConfig = Field(
54
+ default_factory=GenerationHandlerPromptsConfig,
55
+ description="Configuration for the prompts.",
56
+ )
@@ -0,0 +1,79 @@
1
+ """Exceptions for generation handler operations."""
2
+
3
+
4
+ class GenerationHandlerError(Exception):
5
+ """Base exception for all generation handler errors."""
6
+
7
+ pass
8
+
9
+
10
+ class BatchCreationError(GenerationHandlerError):
11
+ """Raised when batch creation fails."""
12
+
13
+ def __init__(
14
+ self,
15
+ message: str,
16
+ group_key: str | None = None,
17
+ row_count: int | None = None,
18
+ ):
19
+ super().__init__(message)
20
+ self.group_key = group_key
21
+ self.row_count = row_count
22
+
23
+
24
+ class PromptBuildError(GenerationHandlerError):
25
+ """Raised when prompt building fails."""
26
+
27
+ def __init__(
28
+ self,
29
+ message: str,
30
+ prompt_type: str | None = None,
31
+ context: dict | None = None,
32
+ ):
33
+ super().__init__(message)
34
+ self.prompt_type = prompt_type
35
+ self.context = context or {}
36
+
37
+
38
+ class LLMCallError(GenerationHandlerError):
39
+ """Raised when LLM call fails."""
40
+
41
+ def __init__(
42
+ self,
43
+ message: str,
44
+ group_key: str | None = None,
45
+ batch_index: int | None = None,
46
+ error_details: str | None = None,
47
+ ):
48
+ super().__init__(message)
49
+ self.group_key = group_key
50
+ self.batch_index = batch_index
51
+ self.error_details = error_details
52
+
53
+
54
+ class AggregationError(GenerationHandlerError):
55
+ """Raised when aggregating batch results fails."""
56
+
57
+ def __init__(
58
+ self,
59
+ message: str,
60
+ group_key: str | None = None,
61
+ batch_count: int | None = None,
62
+ ):
63
+ super().__init__(message)
64
+ self.group_key = group_key
65
+ self.batch_count = batch_count
66
+
67
+
68
+ class TokenLimitError(GenerationHandlerError):
69
+ """Raised when token counting or limit validation fails."""
70
+
71
+ def __init__(
72
+ self,
73
+ message: str,
74
+ estimated_tokens: int | None = None,
75
+ max_tokens: int | None = None,
76
+ ):
77
+ super().__init__(message)
78
+ self.estimated_tokens = estimated_tokens
79
+ self.max_tokens = max_tokens
@@ -0,0 +1,34 @@
1
+ from pathlib import Path
2
+ from typing import Annotated
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+ from unique_toolkit._common.experimental.write_up_agent.utils import template_loader
7
+ from unique_toolkit._common.pydantic.rjsf_tags import RJSFMetaTag
8
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
9
+
10
+ PARENT_DIR = Path(__file__).parent
11
+
12
+ _SYSTEM_PROMPT_TEMPLATE = template_loader(PARENT_DIR, "system_prompt.j2")
13
+ _USER_PROMPT_TEMPLATE = template_loader(PARENT_DIR, "user_prompt.j2")
14
+
15
+
16
+ class GenerationHandlerPromptsConfig(BaseModel):
17
+ model_config = get_configuration_dict()
18
+
19
+ system_prompt_template: Annotated[
20
+ str,
21
+ RJSFMetaTag.StringWidget.textarea(
22
+ rows=len(_SYSTEM_PROMPT_TEMPLATE.split("\n"))
23
+ ),
24
+ ] = Field(
25
+ default=_SYSTEM_PROMPT_TEMPLATE,
26
+ description="The system prompt for the source selection.",
27
+ )
28
+ user_prompt_template: Annotated[
29
+ str,
30
+ RJSFMetaTag.StringWidget.textarea(rows=len(_USER_PROMPT_TEMPLATE.split("\n"))),
31
+ ] = Field(
32
+ default=_USER_PROMPT_TEMPLATE,
33
+ description="The user prompt for the source selection.",
34
+ )
@@ -0,0 +1,15 @@
1
+ {# System prompt with common instructions #}
2
+ You are an expert technical writer tasked with creating clear, concise, and well-organized content summaries.
3
+
4
+ ## Your Task
5
+ Generate a comprehensive summary for a specific section based on the provided content. The section header is already defined - focus on creating the body content.
6
+
7
+ ## Guidelines
8
+ - Synthesize the provided information into a coherent narrative
9
+ - Organize content logically, using sub-sections if it improves clarity
10
+ - Maintain a professional and informative tone
11
+ - Be concise while preserving key information
12
+ - If section-specific instructions are provided, follow them carefully
13
+
14
+ {{ common_instruction }}
15
+
@@ -0,0 +1,21 @@
1
+ {# User prompt with section context and optional previous summary #}
2
+ ## Section Context
3
+ You are writing content for the section titled: **"{{ section_name }}"**
4
+
5
+ **IMPORTANT**: The section header is already provided in the final report. Do NOT include the section title (# {{ section_name }}) in your response. Start directly with the content.
6
+
7
+ {% if group_instruction %}
8
+ ## Section-Specific Instructions
9
+ {{ group_instruction }}
10
+
11
+ {% endif %}
12
+ {% if previous_summary %}
13
+ ## Context from Previous Batch
14
+ The following is a summary from the previous batch of content for this section. Build upon this context when generating your summary:
15
+
16
+ {{ previous_summary }}
17
+
18
+ {% endif %}
19
+ ## Content to Summarize
20
+ {{ content }}
21
+