unique_toolkit 1.42.9__py3-none-any.whl → 1.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. unique_toolkit/_common/experimental/write_up_agent/README.md +848 -0
  2. unique_toolkit/_common/experimental/write_up_agent/__init__.py +22 -0
  3. unique_toolkit/_common/experimental/write_up_agent/agent.py +170 -0
  4. unique_toolkit/_common/experimental/write_up_agent/config.py +42 -0
  5. unique_toolkit/_common/experimental/write_up_agent/examples/data.csv +13 -0
  6. unique_toolkit/_common/experimental/write_up_agent/examples/example_usage.py +78 -0
  7. unique_toolkit/_common/experimental/write_up_agent/schemas.py +36 -0
  8. unique_toolkit/_common/experimental/write_up_agent/services/__init__.py +13 -0
  9. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/__init__.py +19 -0
  10. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/exceptions.py +29 -0
  11. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/service.py +150 -0
  12. unique_toolkit/_common/experimental/write_up_agent/services/dataframe_handler/utils.py +130 -0
  13. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/__init__.py +27 -0
  14. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/config.py +56 -0
  15. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/exceptions.py +79 -0
  16. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/config.py +34 -0
  17. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/system_prompt.j2 +15 -0
  18. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/prompts/user_prompt.j2 +21 -0
  19. unique_toolkit/_common/experimental/write_up_agent/services/generation_handler/service.py +369 -0
  20. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/__init__.py +29 -0
  21. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/default_template.j2 +37 -0
  22. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/exceptions.py +39 -0
  23. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/service.py +191 -0
  24. unique_toolkit/_common/experimental/write_up_agent/services/template_handler/utils.py +182 -0
  25. unique_toolkit/_common/experimental/write_up_agent/utils.py +24 -0
  26. {unique_toolkit-1.42.9.dist-info → unique_toolkit-1.43.1.dist-info}/METADATA +7 -1
  27. {unique_toolkit-1.42.9.dist-info → unique_toolkit-1.43.1.dist-info}/RECORD +29 -4
  28. {unique_toolkit-1.42.9.dist-info → unique_toolkit-1.43.1.dist-info}/LICENSE +0 -0
  29. {unique_toolkit-1.42.9.dist-info → unique_toolkit-1.43.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,22 @@
1
+ """
2
+ Write-Up Agent: Template-driven DataFrame summarization and report generation.
3
+ """
4
+
5
+ from unique_toolkit._common.experimental.write_up_agent.agent import WriteUpAgent
6
+ from unique_toolkit._common.experimental.write_up_agent.config import (
7
+ WriteUpAgentConfig,
8
+ )
9
+ from unique_toolkit._common.experimental.write_up_agent.schemas import (
10
+ GroupData,
11
+ ProcessedGroup,
12
+ )
13
+
14
+ __all__ = [
15
+ # Main agent
16
+ "WriteUpAgent",
17
+ # Configuration
18
+ "WriteUpAgentConfig",
19
+ # Data schemas
20
+ "GroupData",
21
+ "ProcessedGroup",
22
+ ]
@@ -0,0 +1,170 @@
1
+ """
2
+ Write-Up Agent - Main pipeline orchestrator.
3
+ """
4
+
5
+ import logging
6
+
7
+ import pandas as pd
8
+
9
+ from unique_toolkit._common.experimental.write_up_agent.config import (
10
+ WriteUpAgentConfig,
11
+ )
12
+ from unique_toolkit._common.experimental.write_up_agent.schemas import GroupData
13
+ from unique_toolkit._common.experimental.write_up_agent.services.dataframe_handler import (
14
+ DataFrameHandler,
15
+ )
16
+ from unique_toolkit._common.experimental.write_up_agent.services.dataframe_handler.exceptions import (
17
+ DataFrameGroupingError,
18
+ DataFrameHandlerError,
19
+ DataFrameProcessingError,
20
+ DataFrameValidationError,
21
+ )
22
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler import (
23
+ GenerationHandler,
24
+ GenerationHandlerError,
25
+ )
26
+ from unique_toolkit._common.experimental.write_up_agent.services.template_handler import (
27
+ TemplateHandler,
28
+ )
29
+ from unique_toolkit._common.experimental.write_up_agent.services.template_handler.exceptions import (
30
+ ColumnExtractionError,
31
+ TemplateHandlerError,
32
+ TemplateParsingError,
33
+ TemplateRenderingError,
34
+ TemplateStructureError,
35
+ )
36
+ from unique_toolkit.language_model.service import LanguageModelService
37
+
38
+ _LOGGER = logging.getLogger(__name__)
39
+
40
+
41
+ class WriteUpAgent:
42
+ """
43
+ Main pipeline orchestrator for DataFrame summarization.
44
+
45
+ Orchestrates the complete pipeline:
46
+ 1. Extract template info (grouping + columns)
47
+ 2. Validate DataFrame
48
+ 3. Create groups
49
+ 4. Render each group
50
+ 5. Process with LLM
51
+ 6. Return results
52
+ """
53
+
54
+ def __init__(self, config: WriteUpAgentConfig):
55
+ """
56
+ Initialize WriteUpAgent.
57
+
58
+ Args:
59
+ config: Configuration with template and settings
60
+ """
61
+ self._config = config
62
+ self._template_handler = TemplateHandler(config.template)
63
+ self._dataframe_handler = DataFrameHandler()
64
+
65
+ # Create generation handler with injected renderer
66
+ def renderer(group_data: GroupData) -> str:
67
+ return self._template_handler.render_group(group_data)
68
+
69
+ # TODO [UN-16142]: Find a better way to inject the renderer
70
+ self._generation_handler = GenerationHandler(
71
+ self._config.generation_handler_config, renderer
72
+ )
73
+
74
+ def process(self, df: pd.DataFrame, llm_service: LanguageModelService) -> str:
75
+ """
76
+ Execute complete pipeline and generate final report.
77
+
78
+ Args:
79
+ df: pandas DataFrame to process
80
+ llm_service: LanguageModelService to use for generating summaries
81
+
82
+ Returns:
83
+ Final markdown report as a single string with all groups processed
84
+
85
+ Raises:
86
+ Various handler exceptions if processing fails
87
+
88
+ Example:
89
+ >>> config = WriteUpAgentConfig(template="...", max_rows_per_group=10)
90
+ >>> agent = WriteUpAgent(config)
91
+ >>> report = agent.process(df)
92
+ >>> print(report)
93
+ """
94
+ # TODO [UN-16142]: Add error handling for each step separately
95
+ try:
96
+ # Step 1: Extract template structure
97
+ _LOGGER.info("Extracting template structure...")
98
+ grouping_column = self._template_handler.get_grouping_column()
99
+ selected_columns = self._template_handler.get_selected_columns()
100
+ _LOGGER.info(f"Detected grouping column: {grouping_column}")
101
+ _LOGGER.info(f"Detected data columns: {selected_columns}")
102
+
103
+ # Step 2: Validate DataFrame
104
+ _LOGGER.info("Validating DataFrame columns...")
105
+ self._dataframe_handler.validate_columns(
106
+ df, grouping_column, selected_columns
107
+ )
108
+
109
+ # Step 3: Create groups
110
+ _LOGGER.info("Creating groups from DataFrame...")
111
+ groups = self._dataframe_handler.create_groups(
112
+ df, grouping_column, selected_columns
113
+ )
114
+ _LOGGER.info(f"Created {len(groups)} groups")
115
+
116
+ # Step 4: Process groups with GenerationHandler
117
+ _LOGGER.info("Processing groups with GenerationHandler...")
118
+ processed_groups = self._generation_handler.process_groups(
119
+ groups, grouping_column, llm_service
120
+ )
121
+ _LOGGER.info(f"Generation complete for {len(processed_groups)} groups")
122
+
123
+ # Step 5: Render final report with LLM responses
124
+ _LOGGER.info("Rendering final report...")
125
+
126
+ final_report = self._template_handler.render_all_groups(processed_groups)
127
+
128
+ _LOGGER.info(f"Report generated ({len(final_report)} characters)")
129
+
130
+ return final_report
131
+
132
+ except TemplateParsingError as e:
133
+ _LOGGER.error(f"Template parsing failed: {e}")
134
+ raise
135
+
136
+ except TemplateStructureError as e:
137
+ _LOGGER.error(f"Template structure invalid: {e}")
138
+ raise
139
+
140
+ except ColumnExtractionError as e:
141
+ _LOGGER.error(f"Column extraction failed: {e}")
142
+ raise
143
+
144
+ except DataFrameValidationError as e:
145
+ _LOGGER.error(f"DataFrame validation failed: {e}")
146
+ raise
147
+
148
+ except DataFrameGroupingError as e:
149
+ _LOGGER.error(f"DataFrame grouping failed: {e}")
150
+ raise
151
+
152
+ except DataFrameProcessingError as e:
153
+ _LOGGER.error(f"DataFrame processing failed: {e}")
154
+ raise
155
+
156
+ except GenerationHandlerError as e:
157
+ _LOGGER.error(f"Generation failed: {e}")
158
+ raise
159
+
160
+ except TemplateRenderingError as e:
161
+ _LOGGER.error(f"Final rendering failed: {e}")
162
+ raise
163
+
164
+ except (TemplateHandlerError, DataFrameHandlerError) as e:
165
+ _LOGGER.error(f"Handler error: {e}")
166
+ raise
167
+
168
+ except Exception as e:
169
+ _LOGGER.error(f"Unexpected error: {e}", exc_info=True)
170
+ raise
@@ -0,0 +1,42 @@
1
+ from pydantic import BaseModel, Field, field_validator
2
+
3
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.config import (
4
+ GenerationHandlerConfig,
5
+ )
6
+ from unique_toolkit._common.experimental.write_up_agent.services.template_handler import (
7
+ default_jinja_template_loader,
8
+ )
9
+ from unique_toolkit._common.pydantic_helpers import get_configuration_dict
10
+
11
+
12
+ class WriteUpAgentConfig(BaseModel):
13
+ """Configuration for the Write-Up Agent that generates summaries from DataFrame data.
14
+
15
+ The agent uses a Jinja template as the single source of truth for data structure.
16
+ The template is parsed to automatically detect grouping columns and data references.
17
+ """
18
+
19
+ model_config = get_configuration_dict()
20
+
21
+ # Template Configuration (single source of truth)
22
+ template: str = Field(
23
+ default_factory=default_jinja_template_loader,
24
+ description=(
25
+ "Jinja template string that defines the structure of the summary. "
26
+ "The template is parsed to automatically detect grouping columns and data references. "
27
+ "If not provided, loads the default Q&A template. "
28
+ "Example: '{% for g in groups %}## {{ g.section }}{% endfor %}'"
29
+ ),
30
+ )
31
+
32
+ generation_handler_config: GenerationHandlerConfig = Field(
33
+ default_factory=GenerationHandlerConfig,
34
+ description="Configuration for the generation handler.",
35
+ )
36
+
37
+ @field_validator("template")
38
+ @classmethod
39
+ def validate_template_not_empty(cls, v: str) -> str:
40
+ if not v.strip():
41
+ raise ValueError("Template must not be empty")
42
+ return v
@@ -0,0 +1,13 @@
1
+ section,question,answer
2
+ Introduction,What is the Write-Up Agent?,The Write-Up Agent is a tool that automatically generates summaries from structured DataFrame data using LLM technology.
3
+ Introduction,Who should use this tool?,Data scientists and analysts who need to convert tabular data into readable reports.
4
+ Introduction,What are the key benefits?,Automated report generation with customizable templates and intelligent summarization.
5
+ Methods,How does the agent process data?,The agent groups data by sections and generates summaries for each group using LLM calls with adaptive batching.
6
+ Methods,What is adaptive batching?,A technique that splits large groups into smaller batches to fit within token limits while maintaining context.
7
+ Methods,Can I customize the output format?,Yes! You can provide custom Jinja templates to control the structure and style of the generated report.
8
+ Results,What level of accuracy can I expect?,The agent leverages state-of-the-art LLMs to produce accurate and contextually relevant summaries.
9
+ Results,How fast is the processing?,Processing speed depends on the LLM provider and the size of your dataset. Batching helps optimize performance.
10
+ Results,Can it handle large datasets?,Yes! The agent automatically batches large groups to handle datasets of any size efficiently.
11
+ Conclusion,Is this production-ready?,Yes! The agent includes robust error handling and type-safe operations using Pydantic schemas.
12
+ Conclusion,Where can I find more examples?,Check the examples directory for additional use cases and custom template examples.
13
+
@@ -0,0 +1,78 @@
1
+ """
2
+ Example: Using the Write-Up Agent to generate summaries from DataFrame data.
3
+ """
4
+
5
+ # TODO [UN-16142]: Add example usage in tutorial instead of here
6
+
7
+ import logging
8
+ from pathlib import Path
9
+
10
+ import pandas as pd
11
+
12
+ from unique_toolkit._common.experimental.write_up_agent import (
13
+ WriteUpAgent,
14
+ WriteUpAgentConfig,
15
+ )
16
+ from unique_toolkit._common.experimental.write_up_agent.services.generation_handler.config import (
17
+ GenerationHandlerConfig,
18
+ )
19
+ from unique_toolkit.app.unique_settings import UniqueSettings
20
+ from unique_toolkit.language_model.service import LanguageModelService
21
+
22
+ logging.basicConfig(level=logging.DEBUG)
23
+
24
+ # Setup paths
25
+ current_dir = Path(__file__).parent
26
+ env_path = current_dir / "unique.env"
27
+ data_path = current_dir / "data.csv"
28
+
29
+ # Initialize SDK with your API keys
30
+ _SETTINGS = UniqueSettings.from_env(env_file=env_path)
31
+ _SETTINGS.init_sdk()
32
+
33
+ # Configure the Write-Up Agent
34
+ # Using default configuration which expects: section, question, answer columns
35
+ write_up_agent_config = WriteUpAgentConfig(
36
+ generation_handler_config=GenerationHandlerConfig(
37
+ # Optional: Customize generation settings
38
+ # max_rows_per_batch=20, # Max rows per batch (default: 20)
39
+ # max_tokens_per_batch=4000, # Max tokens per batch (default: 4000)
40
+ # common_instruction="You are a technical writer...", # Custom system prompt
41
+ # group_specific_instructions={
42
+ # # IMPORTANT: Both column and value must be in snake_case
43
+ # # DataFrame: Section="Introduction" → Key: "section:introduction"
44
+ # "section:introduction": "Be welcoming and engaging",
45
+ # "section:methods": "Be precise and technical"
46
+ # }
47
+ )
48
+ )
49
+
50
+ # Initialize the agent with LLM service
51
+ write_up_agent = WriteUpAgent(
52
+ config=write_up_agent_config,
53
+ )
54
+
55
+ # Load your DataFrame
56
+ # IMPORTANT: DataFrame must have columns: section, question, answer (otherwise adapt the template)
57
+ df = pd.read_csv(data_path)
58
+
59
+ print(f"Processing {len(df)} rows across {df['section'].nunique()} sections...")
60
+ print(f"Columns in DataFrame: {list(df.columns)}")
61
+ print()
62
+
63
+ llm_service = LanguageModelService.from_settings(_SETTINGS)
64
+
65
+ # Generate the report
66
+ report = write_up_agent.process(df, llm_service=llm_service)
67
+
68
+ # Display the result
69
+ print("=" * 80)
70
+ print("GENERATED REPORT")
71
+ print("=" * 80)
72
+ print(report)
73
+
74
+ # Optional: Save to file
75
+ output_path = current_dir / "report.md"
76
+ output_path.write_text(report)
77
+ print()
78
+ print(f"Report saved to: {output_path}")
@@ -0,0 +1,36 @@
1
+ """Data schemas for the Write-Up Agent."""
2
+
3
+ from typing import Any
4
+
5
+ from pydantic import BaseModel, Field
6
+
7
+
8
+ class GroupData(BaseModel):
9
+ """
10
+ Represents a group of rows from a DataFrame.
11
+
12
+ This is the core data structure passed between handlers in the pipeline.
13
+ """
14
+
15
+ group_key: str = Field(
16
+ ...,
17
+ description="The value of the grouping column for this group (e.g., 'Introduction', 'Methods')",
18
+ )
19
+
20
+ rows: list[dict[str, Any]] = Field(
21
+ ...,
22
+ description="List of row dictionaries containing the selected columns for this group",
23
+ )
24
+
25
+
26
+ class ProcessedGroup(GroupData):
27
+ """
28
+ Represents a group after LLM processing.
29
+
30
+ Extends GroupData with the LLM-generated response.
31
+ """
32
+
33
+ llm_response: str = Field(
34
+ ...,
35
+ description="The LLM-generated summary/output for this group",
36
+ )
@@ -0,0 +1,13 @@
1
+ """Services for the write-up agent pipeline."""
2
+
3
+ from unique_toolkit._common.experimental.write_up_agent.services.dataframe_handler import (
4
+ DataFrameHandler,
5
+ )
6
+ from unique_toolkit._common.experimental.write_up_agent.services.template_handler import (
7
+ TemplateHandler,
8
+ )
9
+
10
+ __all__ = [
11
+ "DataFrameHandler",
12
+ "TemplateHandler",
13
+ ]
@@ -0,0 +1,19 @@
1
+ """DataFrame handler module."""
2
+
3
+ from unique_toolkit._common.experimental.write_up_agent.services.dataframe_handler.exceptions import (
4
+ DataFrameGroupingError,
5
+ DataFrameHandlerError,
6
+ DataFrameProcessingError,
7
+ DataFrameValidationError,
8
+ )
9
+ from unique_toolkit._common.experimental.write_up_agent.services.dataframe_handler.service import (
10
+ DataFrameHandler,
11
+ )
12
+
13
+ __all__ = [
14
+ "DataFrameHandler",
15
+ "DataFrameHandlerError",
16
+ "DataFrameValidationError",
17
+ "DataFrameGroupingError",
18
+ "DataFrameProcessingError",
19
+ ]
@@ -0,0 +1,29 @@
1
+ """Exceptions for DataFrame handler operations."""
2
+
3
+
4
+ class DataFrameHandlerError(Exception):
5
+ """Base exception for all DataFrame handler errors."""
6
+
7
+ pass
8
+
9
+
10
+ class DataFrameValidationError(DataFrameHandlerError):
11
+ """Raised when DataFrame validation fails (e.g., missing columns)."""
12
+
13
+ def __init__(self, message: str, missing_columns: list[str] | None = None):
14
+ super().__init__(message)
15
+ self.missing_columns = missing_columns or []
16
+
17
+
18
+ class DataFrameGroupingError(DataFrameHandlerError):
19
+ """Raised when DataFrame grouping operation fails."""
20
+
21
+ def __init__(self, message: str, grouping_column: str | None = None):
22
+ super().__init__(message)
23
+ self.grouping_column = grouping_column
24
+
25
+
26
+ class DataFrameProcessingError(DataFrameHandlerError):
27
+ """Raised when general DataFrame processing fails."""
28
+
29
+ pass
@@ -0,0 +1,150 @@
1
+ """DataFrame handler service."""
2
+
3
+ import pandas as pd
4
+
5
+ from unique_toolkit._common.experimental.write_up_agent.schemas import GroupData
6
+ from unique_toolkit._common.experimental.write_up_agent.services.dataframe_handler.exceptions import (
7
+ DataFrameGroupingError,
8
+ DataFrameProcessingError,
9
+ DataFrameValidationError,
10
+ )
11
+ from unique_toolkit._common.experimental.write_up_agent.services.dataframe_handler.utils import (
12
+ dataframe_to_dict_records,
13
+ normalize_column_names,
14
+ to_snake_case,
15
+ )
16
+
17
+
18
+ class DataFrameHandler:
19
+ """
20
+ Handles all DataFrame operations.
21
+
22
+ This handler automatically converts all column names to snake_case to ensure
23
+ compatibility with Jinja template syntax. For example:
24
+ - "My Column" becomes "my_column"
25
+ - "UserName" becomes "user_name"
26
+ - "column-name" becomes "column_name"
27
+
28
+ This normalization happens automatically during validation and grouping operations.
29
+
30
+ Responsibilities:
31
+ - Normalize column names to snake_case
32
+ - Validate DataFrame has required columns
33
+ - Create groups from DataFrame
34
+ """
35
+
36
+ def validate_columns(
37
+ self, df: pd.DataFrame, grouping_column: str, selected_columns: list[str]
38
+ ) -> None:
39
+ """
40
+ Validate DataFrame has required columns.
41
+
42
+ NOTE: Column names are automatically converted to snake_case before validation.
43
+ Ensure your template uses snake_case column references (e.g., {{ row.my_column }}).
44
+
45
+ Args:
46
+ df: pandas DataFrame to validate
47
+ grouping_column: Column to group by (should be in snake_case)
48
+ selected_columns: Columns that should exist (should be in snake_case)
49
+
50
+ Raises:
51
+ DataFrameValidationError: If columns are missing after normalization
52
+
53
+ Example:
54
+ >>> df = pd.DataFrame({"My Section": [1], "My Question": [2]})
55
+ >>> handler.validate_columns(df, "my_section", ["my_question"])
56
+ # Validation passes because "My Section" -> "my_section"
57
+ """
58
+ # Normalize DataFrame columns to snake_case
59
+ normalized_df = normalize_column_names(df)
60
+
61
+ required_columns = {grouping_column} | set(selected_columns)
62
+ missing_columns = required_columns - set(normalized_df.columns)
63
+
64
+ if missing_columns:
65
+ raise DataFrameValidationError(
66
+ f"DataFrame missing required columns after snake_case normalization: {sorted(missing_columns)}. "
67
+ f"Available columns: {sorted(normalized_df.columns)}",
68
+ missing_columns=sorted(missing_columns),
69
+ )
70
+
71
+ def create_groups(
72
+ self, df: pd.DataFrame, grouping_column: str, selected_columns: list[str]
73
+ ) -> list[GroupData]:
74
+ """
75
+ Create groups from DataFrame.
76
+
77
+ NOTE: Column names are automatically converted to snake_case.
78
+ Group values (group_key) are kept as-is from the DataFrame (NOT normalized).
79
+
80
+ The returned GroupData instances will have:
81
+ - snake_case column names in their rows
82
+ - Original group_key values from DataFrame
83
+
84
+ IMPORTANT: Groups are returned in the order of their first appearance in the DataFrame,
85
+ NOT sorted alphabetically. This preserves the logical flow of your data.
86
+
87
+ Args:
88
+ df: pandas DataFrame to group
89
+ grouping_column: Column to group by (should be in snake_case)
90
+ selected_columns: Columns to include in rows (should be in snake_case)
91
+
92
+ Returns:
93
+ List of GroupData instances in order of first appearance, each containing
94
+ group_key (in snake_case) and rows with snake_case columns
95
+
96
+ Raises:
97
+ DataFrameGroupingError: If grouping fails
98
+ DataFrameProcessingError: If data processing fails
99
+
100
+ Example:
101
+ >>> df = pd.DataFrame({
102
+ ... "My Section": ["Intro", "Methods", "Results", "Intro"],
103
+ ... "My Question": ["Q1", "Q2", "Q3", "Q4"],
104
+ ... })
105
+ >>> groups = handler.create_groups(df, "my_section", ["my_question"])
106
+ >>> [g.group_key for g in groups]
107
+ ['intro', 'methods', 'results'] # Values normalized to snake_case, order preserved
108
+ """
109
+ # Normalize column names to snake_case
110
+ normalized_df = normalize_column_names(df)
111
+
112
+ if grouping_column not in normalized_df.columns:
113
+ raise DataFrameGroupingError(
114
+ f"Grouping column '{grouping_column}' not found in normalized DataFrame. "
115
+ f"Available columns: {sorted(normalized_df.columns)}",
116
+ grouping_column=grouping_column,
117
+ )
118
+
119
+ try:
120
+ # Use sort=False to preserve the order of first appearance in the DataFrame
121
+ grouped = normalized_df.groupby(grouping_column, sort=False)
122
+ except Exception as e:
123
+ raise DataFrameGroupingError(
124
+ f"Failed to group DataFrame by '{grouping_column}': {e}",
125
+ grouping_column=grouping_column,
126
+ ) from e
127
+
128
+ results = []
129
+
130
+ try:
131
+ for group_key, group_df in grouped:
132
+ # Filter columns if specified
133
+ if selected_columns:
134
+ cols_to_use = [c for c in selected_columns if c in group_df.columns]
135
+ limited_df = group_df.loc[:, cols_to_use]
136
+ else:
137
+ limited_df = group_df
138
+
139
+ # Convert to dict records
140
+ rows = dataframe_to_dict_records(limited_df)
141
+
142
+ # Normalize group_key value to snake_case for consistency with template syntax
143
+ normalized_group_key = to_snake_case(str(group_key))
144
+
145
+ # Create GroupData instance with proper typing
146
+ results.append(GroupData(group_key=normalized_group_key, rows=rows))
147
+ except Exception as e:
148
+ raise DataFrameProcessingError(f"Error processing grouped data: {e}") from e
149
+
150
+ return results