unique_toolkit 1.42.9__py3-none-any.whl → 1.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. unique_toolkit/_common/experimental/write_up_agent/README.md +848 -0
  2. unique_toolkit/_common/experimental/write_up_agent/__init__.py +22 -0
  3. unique_toolkit/_common/experimental/write_up_agent/agent.py +170 -0
  4. unique_toolkit/_common/experimental/write_up_agent/config.py +42 -0
  5. unique_toolkit/_common/experimental/write_up_agent/examples/data.csv +13 -0
  6. unique_toolkit/_common/experimental/write_up_agent/examples/example_usage.py +78 -0
  7. unique_toolkit/_common/experimental/write_up_agent/schemas.py +36 -0
  8. unique_toolkit/_common/experimental/write_up_agent/services/__init__.py +13 -0
  9. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/__init__.py +19 -0
  10. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/exceptions.py +29 -0
  11. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/service.py +150 -0
  12. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/utils.py +130 -0
  13. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/__init__.py +27 -0
  14. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/config.py +56 -0
  15. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/exceptions.py +79 -0
  16. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/config.py +34 -0
  17. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/system_prompt.j2 +15 -0
  18. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/user_prompt.j2 +21 -0
  19. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/service.py +369 -0
  20. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/__init__.py +29 -0
  21. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/default_template.j2 +37 -0
  22. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/exceptions.py +39 -0
  23. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/service.py +191 -0
  24. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/utils.py +182 -0
  25. unique_toolkit/_common/experimental/write_up_agent/utils.py +24 -0
  26. {unique_toolkit-1.42.9.dist-info → unique_toolkit-1.43.1.dist-info}/METADATA +7 -1
  27. {unique_toolkit-1.42.9.dist-info → unique_toolkit-1.43.1.dist-info}/RECORD +29 -4
  28. {unique_toolkit-1.42.9.dist-info → unique_toolkit-1.43.1.dist-info}/LICENSE +0 -0
  29. {unique_toolkit-1.42.9.dist-info → unique_toolkit-1.43.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,130 @@
1
+ """Utility functions for DataFrame operations."""
2
+
3
+ import re
4
+
5
+ import pandas as pd
6
+
7
+
8
+ def to_snake_case(text: str) -> str:
9
+ """
10
+ Convert a string to snake_case.
11
+
12
+ This ensures column names are compatible with Jinja template syntax.
13
+
14
+ Examples:
15
+ >>> to_snake_case("MyColumn")
16
+ 'my_column'
17
+ >>> to_snake_case("my_column")
18
+ 'my_column'
19
+ >>> to_snake_case("My Column Name")
20
+ 'my_column_name'
21
+ >>> to_snake_case("column-name")
22
+ 'column_name'
23
+ >>> to_snake_case("Column_123")
24
+ 'column_123'
25
+
26
+ Args:
27
+ text: String to convert
28
+
29
+ Returns:
30
+ snake_case version of the string
31
+ """
32
+ # Replace spaces and hyphens with underscores
33
+ text = text.replace(" ", "_").replace("-", "_")
34
+
35
+ # Insert underscore before uppercase letters (for camelCase/PascalCase)
36
+ text = re.sub(r"(?<!^)(?=[A-Z])", "_", text)
37
+
38
+ # Convert to lowercase
39
+ text = text.lower()
40
+
41
+ # Remove duplicate underscores
42
+ text = re.sub(r"_+", "_", text)
43
+
44
+ # Remove leading/trailing underscores
45
+ text = text.strip("_")
46
+
47
+ return text
48
+
49
+
50
+ def from_snake_case_to_display_name(text: str) -> str:
51
+ """
52
+ Convert snake_case text back to Title Case for display.
53
+
54
+ Args:
55
+ text: snake_case text to convert
56
+
57
+ Returns:
58
+ Title Case version of the text
59
+
60
+ Example:
61
+ >>> from_snake_case("executive_summary")
62
+ 'Executive Summary'
63
+ >>> from_snake_case("my_column_name")
64
+ 'My Column Name'
65
+ >>> from_snake_case("api_design")
66
+ 'Api Design'
67
+ """
68
+ # Split on underscores and capitalize each word
69
+ words = text.split("_")
70
+ return " ".join(word.capitalize() for word in words)
71
+
72
+
73
+ def normalize_column_names(df: pd.DataFrame) -> pd.DataFrame:
74
+ """
75
+ Convert all DataFrame column names to snake_case.
76
+
77
+ This normalization ensures column names are compatible with Jinja template
78
+ syntax (e.g., {{ row.my_column }} works, but {{ row.My Column }} doesn't).
79
+
80
+ Examples:
81
+ >>> df = pd.DataFrame({"My Column": [1], "AnotherColumn": [2]})
82
+ >>> normalized = normalize_column_names(df)
83
+ >>> list(normalized.columns)
84
+ ['my_column', 'another_column']
85
+
86
+ Args:
87
+ df: Input DataFrame
88
+
89
+ Returns:
90
+ New DataFrame with normalized column names
91
+ """
92
+ ## TODO [UN-16142]: Normalization may lead to duplicate column names, we should handle this case
93
+ normalized_columns = {col: to_snake_case(col) for col in df.columns}
94
+ return df.rename(columns=normalized_columns)
95
+
96
+
97
+ def limit_dataframe_rows(df: pd.DataFrame, max_rows: int) -> pd.DataFrame:
98
+ """
99
+ Limit DataFrame to first N rows.
100
+
101
+ Args:
102
+ df: DataFrame to limit
103
+ max_rows: Maximum number of rows
104
+
105
+ Returns:
106
+ DataFrame with at most max_rows rows
107
+ """
108
+ if len(df) <= max_rows:
109
+ return df.copy()
110
+ return df.head(max_rows).copy()
111
+
112
+
113
+ def dataframe_to_dict_records(
114
+ df: pd.DataFrame, columns: list[str] | None = None
115
+ ) -> list[dict]:
116
+ """
117
+ Convert DataFrame to list of dict records.
118
+
119
+ Args:
120
+ df: DataFrame to convert
121
+ columns: Optional list of columns to include
122
+
123
+ Returns:
124
+ List of dict records
125
+ """
126
+ if columns:
127
+ df = df.loc[:, columns]
128
+
129
+ # Replace NaN with None for better serialization
130
+ return df.where(pd.notna(df), None).to_dict(orient="records")
@@ -0,0 +1,27 @@
1
+ """Generation handler module."""
2
+
3
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.config import (
4
+ GenerationHandlerConfig,
5
+ )
6
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.exceptions import (
7
+ AggregationError,
8
+ BatchCreationError,
9
+ GenerationHandlerError,
10
+ LLMCallError,
11
+ PromptBuildError,
12
+ TokenLimitError,
13
+ )
14
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.service import (
15
+ GenerationHandler,
16
+ )
17
+
18
+ __all__ = [
19
+ "GenerationHandler",
20
+ "GenerationHandlerConfig",
21
+ "GenerationHandlerError",
22
+ "BatchCreationError",
23
+ "PromptBuildError",
24
+ "LLMCallError",
25
+ "AggregationError",
26
+ "TokenLimitError",
27
+ ]
@@ -0,0 +1,56 @@
1
+ """Configuration for generation handler."""
2
+
3
+ from pydantic import BaseModel, Field
4
+
5
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.prompts.config import (
6
+ GenerationHandlerPromptsConfig,
7
+ )
8
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
9
+ from unique_toolkit._common.validators import LMI, get_LMI_default_field
10
+ from unique_toolkit.language_model.default_language_model import DEFAULT_GPT_4o
11
+
12
+
13
+ class GenerationHandlerConfig(BaseModel):
14
+ """Configuration for generation handler.
15
+
16
+ This configuration controls how groups are batched, how prompts are built,
17
+ and how the LLM is called for generating summaries.
18
+ """
19
+
20
+ model_config = get_configuration_dict()
21
+
22
+ language_model: LMI = get_LMI_default_field(
23
+ DEFAULT_GPT_4o,
24
+ description="The language model to use for generating summaries.",
25
+ )
26
+
27
+ common_instruction: str = Field(
28
+ default="You are a technical writer. Summarize the provided content concisely and clearly.",
29
+ description="Common instruction applied to all groups",
30
+ )
31
+
32
+ # TODO [UN-16142]: Add default instructions for each group
33
+ group_specific_instructions: dict[str, str] = Field(
34
+ default_factory=dict,
35
+ description=(
36
+ "Custom instructions per group. "
37
+ "Keys should be formatted as 'column:value' (e.g., 'section:Introduction')"
38
+ ),
39
+ )
40
+
41
+ max_tokens_per_batch: int = Field(
42
+ default=4000,
43
+ ge=100,
44
+ description="Maximum tokens per batch for LLM input (affects batching strategy)",
45
+ )
46
+
47
+ max_rows_per_batch: int = Field(
48
+ default=20,
49
+ ge=1,
50
+ description="Maximum rows per batch (secondary limit to tokens)",
51
+ )
52
+
53
+ prompts_config: GenerationHandlerPromptsConfig = Field(
54
+ default_factory=GenerationHandlerPromptsConfig,
55
+ description="Configuration for the prompts.",
56
+ )
@@ -0,0 +1,79 @@
1
+ """Exceptions for generation handler operations."""
2
+
3
+
4
+ class GenerationHandlerError(Exception):
5
+ """Base exception for all generation handler errors."""
6
+
7
+ pass
8
+
9
+
10
+ class BatchCreationError(GenerationHandlerError):
11
+ """Raised when batch creation fails."""
12
+
13
+ def __init__(
14
+ self,
15
+ message: str,
16
+ group_key: str | None = None,
17
+ row_count: int | None = None,
18
+ ):
19
+ super().__init__(message)
20
+ self.group_key = group_key
21
+ self.row_count = row_count
22
+
23
+
24
+ class PromptBuildError(GenerationHandlerError):
25
+ """Raised when prompt building fails."""
26
+
27
+ def __init__(
28
+ self,
29
+ message: str,
30
+ prompt_type: str | None = None,
31
+ context: dict | None = None,
32
+ ):
33
+ super().__init__(message)
34
+ self.prompt_type = prompt_type
35
+ self.context = context or {}
36
+
37
+
38
+ class LLMCallError(GenerationHandlerError):
39
+ """Raised when LLM call fails."""
40
+
41
+ def __init__(
42
+ self,
43
+ message: str,
44
+ group_key: str | None = None,
45
+ batch_index: int | None = None,
46
+ error_details: str | None = None,
47
+ ):
48
+ super().__init__(message)
49
+ self.group_key = group_key
50
+ self.batch_index = batch_index
51
+ self.error_details = error_details
52
+
53
+
54
+ class AggregationError(GenerationHandlerError):
55
+ """Raised when aggregating batch results fails."""
56
+
57
+ def __init__(
58
+ self,
59
+ message: str,
60
+ group_key: str | None = None,
61
+ batch_count: int | None = None,
62
+ ):
63
+ super().__init__(message)
64
+ self.group_key = group_key
65
+ self.batch_count = batch_count
66
+
67
+
68
+ class TokenLimitError(GenerationHandlerError):
69
+ """Raised when token counting or limit validation fails."""
70
+
71
+ def __init__(
72
+ self,
73
+ message: str,
74
+ estimated_tokens: int | None = None,
75
+ max_tokens: int | None = None,
76
+ ):
77
+ super().__init__(message)
78
+ self.estimated_tokens = estimated_tokens
79
+ self.max_tokens = max_tokens
@@ -0,0 +1,34 @@
1
+ from pathlib import Path
2
+ from typing import Annotated
3
+
4
+ from pydantic import BaseModel, Field
5
+
6
+ from unique_toolkit._common.experimental.write_up_agent.utils import template_loader
7
+ from unique_toolkit._common.pydantic.rjsf_tags import RJSFMetaTag
8
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
9
+
10
+ PARENT_DIR = Path(__file__).parent
11
+
12
+ _SYSTEM_PROMPT_TEMPLATE = template_loader(PARENT_DIR, "system_prompt.j2")
13
+ _USER_PROMPT_TEMPLATE = template_loader(PARENT_DIR, "user_prompt.j2")
14
+
15
+
16
+ class GenerationHandlerPromptsConfig(BaseModel):
17
+ model_config = get_configuration_dict()
18
+
19
+ system_prompt_template: Annotated[
20
+ str,
21
+ RJSFMetaTag.StringWidget.textarea(
22
+ rows=len(_SYSTEM_PROMPT_TEMPLATE.split("\n"))
23
+ ),
24
+ ] = Field(
25
+ default=_SYSTEM_PROMPT_TEMPLATE,
26
+ description="The system prompt for the source selection.",
27
+ )
28
+ user_prompt_template: Annotated[
29
+ str,
30
+ RJSFMetaTag.StringWidget.textarea(rows=len(_USER_PROMPT_TEMPLATE.split("\n"))),
31
+ ] = Field(
32
+ default=_USER_PROMPT_TEMPLATE,
33
+ description="The user prompt for the source selection.",
34
+ )
@@ -0,0 +1,15 @@
1
+ {# System prompt with common instructions #}
2
+ You are an expert technical writer tasked with creating clear, concise, and well-organized content summaries.
3
+
4
+ ## Your Task
5
+ Generate a comprehensive summary for a specific section based on the provided content. The section header is already defined - focus on creating the body content.
6
+
7
+ ## Guidelines
8
+ - Synthesize the provided information into a coherent narrative
9
+ - Organize content logically, using sub-sections if it improves clarity
10
+ - Maintain a professional and informative tone
11
+ - Be concise while preserving key information
12
+ - If section-specific instructions are provided, follow them carefully
13
+
14
+ {{ common_instruction }}
15
+
@@ -0,0 +1,21 @@
1
+ {# User prompt with section context and optional previous summary #}
2
+ ## Section Context
3
+ You are writing content for the section titled: **"{{ section_name }}"**
4
+
5
+ **IMPORTANT**: The section header is already provided in the final report. Do NOT include the section title (# {{ section_name }}) in your response. Start directly with the content.
6
+
7
+ {% if group_instruction %}
8
+ ## Section-Specific Instructions
9
+ {{ group_instruction }}
10
+
11
+ {% endif %}
12
+ {% if previous_summary %}
13
+ ## Context from Previous Batch
14
+ The following is a summary from the previous batch of content for this section. Build upon this context when generating your summary:
15
+
16
+ {{ previous_summary }}
17
+
18
+ {% endif %}
19
+ ## Content to Summarize
20
+ {{ content }}
21
+