ultrasav 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,294 @@
1
+ import polars as pl
2
+ import xlsxwriter
3
+ from typing import Any
4
+ from pathlib import Path
5
+ from .pastel_color_schemes import get_color_scheme
6
+ from .def_write_excel_engine import write_excel_engine
7
+ # version_2
8
+
9
+
10
+ def map_to_excel(
11
+ df: pl.DataFrame,
12
+ file_path: str | Path,
13
+ merge_columns: list[str] | None = None,
14
+ column_widths: dict[str, int] | None = None,
15
+ header_format: dict[str, Any] | None = None,
16
+ column_formats: dict[str, dict[str, Any]] | None = None,
17
+ merge_format: dict[str, Any] | None = None,
18
+ group_border_format: dict[str, Any] | None = None,
19
+ alternating_row_colors: tuple[str, str] | None = None,
20
+ alternating_group_formats: tuple[dict[str, Any], dict[str, Any]] | None = None,
21
+ sheet_name: str = "Sheet1",
22
+ freeze_panes: tuple | None = (1, 0),
23
+ ) -> None:
24
+ """
25
+ Write survey metadata DataFrame to Excel with standardized formatting.
26
+
27
+ This is a convenience wrapper around write_excel_engine() with pre-configured
28
+ default formatting optimized for survey data maps and metadata. All parameters
29
+ can still be overridden if needed.
30
+
31
+ Parameters
32
+ ----------
33
+ df : pl.DataFrame
34
+ The DataFrame to write to Excel.
35
+ file_path : str or Path
36
+ Path to the output Excel file. Parent directories will be created if needed.
37
+ merge_columns : list of str, optional
38
+ Column names to merge. Defaults to ['variable', 'variable_label', 'variable_type'].
39
+ Pass an empty list [] to disable merging, or provide custom columns to override.
40
+ column_widths : dict of str to int, optional
41
+ Mapping of column names to widths in pixels. Defaults include optimized widths
42
+ for common survey metadata columns. User values override defaults.
43
+ header_format : dict, optional
44
+ xlsxwriter format properties for header row. Defaults to bold, 12pt font
45
+ with gray bottom border. User values override defaults.
46
+ column_formats : dict of str to dict, optional
47
+ Mapping of column names to xlsxwriter format dictionaries. Defaults include
48
+ centered formatting for value_code and percentage formatting for base_pct.
49
+ User values override defaults.
50
+ merge_format : dict, optional
51
+ xlsxwriter format properties for merged cells. Defaults to left-aligned
52
+ with vertical centering. User values override defaults.
53
+ group_border_format : dict, optional
54
+ Border format for merge group bottoms. Defaults to thick green border.
55
+ User values override defaults.
56
+ alternating_row_colors : tuple of (str, str), optional
57
+ Two colors to alternate between merge groups. Defaults to light and darker grey.
58
+ Pass None to disable alternating colors, or provide custom colors to override.
59
+ Note: If alternating_group_formats is provided, it takes precedence.
60
+ alternating_group_formats : tuple of (dict, dict), optional
61
+ Two complete format dictionaries to alternate between merge groups.
62
+ Provides full control over font color, borders, background, etc.
63
+ Takes precedence over alternating_row_colors. Pass None to use colors only.
64
+ sheet_name : str, default "Sheet1"
65
+ Name of the worksheet.
66
+ freeze_panes : tuple of (row, col), optional
67
+ Position to freeze panes. Default (1, 0) freezes the header row.
68
+
69
+ Examples
70
+ --------
71
+ Simple usage with all defaults:
72
+
73
+ >>> df = pl.DataFrame({
74
+ ... "variable": ["Q1", "Q1", "Q2"],
75
+ ... "variable_label": ["Age", "Age", "Gender"],
76
+ ... "variable_type": ["single", "single", "single"],
77
+ ... "value_code": [1, 2, 1],
78
+ ... "value_label": ["18-25", "26+", "Male"]
79
+ ... })
80
+ >>> map_to_excel(df, "survey_map.xlsx")
81
+
82
+ Override with custom alternating formats:
83
+
84
+ >>> map_to_excel(
85
+ ... df,
86
+ ... "survey_map.xlsx",
87
+ ... alternating_group_formats=(
88
+ ... {"bg_color": "#F0F8FF", "font_color": "#00008B", "border": 1},
89
+ ... {"bg_color": "#FFF0F5", "font_color": "#8B008B", "border": 1}
90
+ ... )
91
+ ... )
92
+
93
+ Custom alternating colors only:
94
+
95
+ >>> map_to_excel(
96
+ ... df,
97
+ ... "survey_map.xlsx",
98
+ ... alternating_row_colors=("#FFE6E6", "#FFCCCC") # Light red alternating
99
+ ... )
100
+
101
+ Disable all alternating:
102
+
103
+ >>> map_to_excel(
104
+ ... df,
105
+ ... "survey_map.xlsx",
106
+ ... alternating_row_colors=None,
107
+ ... alternating_group_formats=None
108
+ ... )
109
+
110
+ Notes
111
+ -----
112
+ This function is specifically designed for survey metadata with standard columns
113
+ like variable, variable_label, value_code, etc. It provides sensible defaults
114
+ while maintaining full flexibility to override any formatting parameter.
115
+
116
+ The default formatting includes:
117
+ - Merging by variable, variable_label, and variable_type
118
+ - Optimized column widths for survey metadata
119
+ - Professional header styling with borders
120
+ - Centered value codes
121
+ - Percentage formatting for base_pct column (0.00%)
122
+ - Green group borders for visual separation
123
+ - Classic Grey Scale alternating formats with borders and professional fonts
124
+
125
+ See Also
126
+ --------
127
+ write_excel_with_merge : The underlying function with full control
128
+ """
129
+ # Validate input DataFrame type
130
+ if not isinstance(df, pl.DataFrame):
131
+ # Check specifically for pandas DataFrame
132
+ df_type = type(df).__module__ + "." + type(df).__name__
133
+ if "pandas" in df_type.lower():
134
+ raise TypeError(
135
+ f"Expected a Polars DataFrame, but received a pandas DataFrame. "
136
+ f"Convert with: pl.from_pandas(df)"
137
+ )
138
+ else:
139
+ raise TypeError(
140
+ f"Expected a Polars DataFrame, but received {type(df).__name__}. "
141
+ f"Please pass a pl.DataFrame."
142
+ )
143
+
144
+ # Define default values
145
+ default_merge_columns = [
146
+ 'variable',
147
+ 'variable_label',
148
+ 'variable_type',
149
+ ]
150
+
151
+ default_column_widths = {
152
+ "variable": 100,
153
+ "variable_label": 400,
154
+ "variable_type": 100,
155
+ "value_code": 80,
156
+ "value_label": 200,
157
+ "value_n": 100,
158
+ "base_n": 100,
159
+ "base_pct": 100,
160
+ "total_n": 100,
161
+ "total_pct": 100,
162
+ "missing_value_label": 130,
163
+ "missing_data": 120
164
+ }
165
+
166
+ default_header_format = {
167
+ "bold": True,
168
+ "font_size": 12,
169
+ "bottom": 1,
170
+ "bottom_color": "#808080"
171
+ }
172
+
173
+ default_column_formats = {
174
+ "variable_label": {
175
+ "text_wrap": True
176
+ },
177
+ "value_code": {
178
+ "num_format": "0",
179
+ "align": "center",
180
+ "valign": "vcenter"
181
+ },
182
+ "base_pct": {
183
+ "num_format": "0.0%",
184
+ "align": "right",
185
+ "valign": "vcenter"
186
+ },
187
+ "total_pct": {
188
+ "num_format": "0.0%",
189
+ "align": "right",
190
+ "valign": "vcenter"
191
+ }
192
+ }
193
+
194
+ default_merge_format = {
195
+ "text_wrap": True, # This is needs to be true for triggering the wrap
196
+ "align": "left",
197
+ "valign": "vcenter",
198
+ }
199
+
200
+ default_group_border_format = {
201
+ # "bottom": 4,
202
+ # "bottom_color": "#4d6b4a"
203
+ # commented out if default_alternating_formats are provided below
204
+ }
205
+
206
+ # Classic Grey Scale as default alternating formats
207
+ default_alternating_formats = (
208
+ get_color_scheme("classic_grey")
209
+ # get_color_scheme("pastel_green")
210
+ # get_color_scheme("pastel_blue")
211
+ # get_color_scheme("pastel_purple")
212
+ )
213
+ # {
214
+ # "bg_color": "#F5F5F5", # Light grey
215
+ # "font_color": "#1A1A1A", # Near black
216
+ # "border": 1,
217
+ # "border_color": "#D9D9D9",
218
+ # "valign": "vcenter"
219
+ # },
220
+ # {
221
+ # "bg_color": "#FFFFFF", # Pure white
222
+ # "font_color": "#2C2C2C", # Charcoal grey
223
+ # "border": 1,
224
+ # "border_color": "#D9D9D9",
225
+ # "valign": "vcenter"
226
+ # }
227
+
228
+ # Merge user-provided values with defaults
229
+ # For merge_columns, use default only if None, allow empty list to disable
230
+ if merge_columns is None:
231
+ merge_columns = default_merge_columns
232
+
233
+ # For column_widths, merge user values with defaults
234
+ if column_widths is None:
235
+ column_widths = default_column_widths
236
+ else:
237
+ column_widths = {**default_column_widths, **column_widths}
238
+
239
+ # For header_format, merge user values with defaults
240
+ if header_format is None:
241
+ header_format = default_header_format
242
+ else:
243
+ header_format = {**default_header_format, **header_format}
244
+
245
+ # For column_formats, merge nested dictionaries
246
+ if column_formats is None:
247
+ column_formats = default_column_formats
248
+ else:
249
+ merged_formats = default_column_formats.copy()
250
+ for col, fmt in column_formats.items():
251
+ if col in merged_formats:
252
+ # Merge the format dicts for this column
253
+ merged_formats[col] = {**merged_formats[col], **fmt}
254
+ else:
255
+ merged_formats[col] = fmt
256
+ column_formats = merged_formats
257
+
258
+ # For merge_format, merge user values with defaults
259
+ if merge_format is None:
260
+ merge_format = default_merge_format
261
+ else:
262
+ merge_format = {**default_merge_format, **merge_format}
263
+
264
+ # For group_border_format, merge user values with defaults
265
+ if group_border_format is None:
266
+ group_border_format = default_group_border_format
267
+ else:
268
+ group_border_format = {**default_group_border_format, **group_border_format}
269
+
270
+ # For alternating formats, use the comprehensive defaults if neither is specified
271
+ if alternating_group_formats is None and alternating_row_colors is None:
272
+ # Neither specified, use default comprehensive formats
273
+ alternating_group_formats = default_alternating_formats
274
+ # If either is explicitly set (including to None), use as-is
275
+
276
+ # Call the main function with merged parameters
277
+ write_excel_engine(
278
+ df=df,
279
+ file_path=file_path,
280
+ merge_columns=merge_columns,
281
+ column_widths=column_widths,
282
+ header_format=header_format,
283
+ column_formats=column_formats,
284
+ merge_format=merge_format,
285
+ group_border_format=group_border_format,
286
+ alternating_row_colors=alternating_row_colors,
287
+ alternating_group_formats=alternating_group_formats,
288
+ sheet_name=sheet_name,
289
+ freeze_panes=freeze_panes
290
+ )
291
+
292
+ # Success message
293
+ output_path = Path(file_path)
294
+ print(f"✓ Saved: {output_path.name}") #({df.shape[0]:,} rows × {df.shape[1]} cols)"
@@ -0,0 +1,298 @@
1
+ import polars as pl
2
+ import xlsxwriter
3
+ from typing import Any
4
+ from pathlib import Path
5
+ from .pastel_color_schemes import get_color_scheme
6
+ # version_6
7
+
8
+ def write_excel_engine(
9
+ df: pl.DataFrame,
10
+ file_path: str | Path,
11
+ merge_columns: list[str] | None = None,
12
+ column_widths: dict[str, int] | None = None,
13
+ header_format: dict[str, Any] | None = None,
14
+ column_formats: dict[str, dict[str, Any]] | None = None,
15
+ merge_format: dict[str, Any] | None = None,
16
+ group_border_format: dict[str, Any] | None = None,
17
+ alternating_row_colors: tuple[str, str] | None = None,
18
+ alternating_group_formats: tuple[dict[str, Any], dict[str, Any]] | None = None,
19
+ sheet_name: str = "Sheet1",
20
+ freeze_panes: tuple | None = (1, 0),
21
+ ) -> None:
22
+ """
23
+ Write a Polars DataFrame to Excel with merged cells for consecutive duplicate values.
24
+
25
+ This function extends Polars' write_excel functionality by adding support for
26
+ merging cells when consecutive rows have identical values in specified columns.
27
+ This is particularly useful for survey metadata, hierarchical data, or any
28
+ dataset where visual grouping improves readability.
29
+
30
+ Parameters
31
+ ----------
32
+ df : pl.DataFrame
33
+ The DataFrame to write to Excel.
34
+ file_path : str or Path
35
+ Path to the output Excel file. Parent directories will be created if needed.
36
+ merge_columns : list of str, optional
37
+ Column names to merge. Cells are merged when ALL specified columns have
38
+ consecutive duplicate values. If None, no merging is performed.
39
+ Example: ["variable", "question_label"]
40
+ column_widths : dict of str to int, optional
41
+ Mapping of column names to widths in pixels.
42
+ Example: {"variable": 200, "question_label": 500}
43
+ header_format : dict, optional
44
+ xlsxwriter format properties for header row.
45
+ Example: {"bold": True, "font_color": "#4472C4", "bg_color": "#F0F0F0"}
46
+ column_formats : dict of str to dict, optional
47
+ Mapping of column names to xlsxwriter format dictionaries.
48
+ Example: {"value_code": {"num_format": "0", "align": "center"}}
49
+ merge_format : dict, optional
50
+ xlsxwriter format properties for merged cells. Defaults to left-aligned
51
+ with vertical centering.
52
+ Example: {"align": "left", "valign": "vcenter", "text_wrap": True}
53
+ group_border_format : dict, optional
54
+ Border format to apply to the bottom row of each merge group. When set,
55
+ adds a bottom border to all cells (merged and non-merged) at the same row
56
+ where a merge group ends. Common usage: {"bottom": 1, "bottom_color": "#808080"}
57
+ alternating_row_colors : tuple of (str, str), optional
58
+ Two colors to alternate between merge groups for better visual separation.
59
+ Groups will alternate between these background colors.
60
+ Example: ("#F5F5F5", "#E0E0E0") for light and darker grey.
61
+ Note: If alternating_group_formats is also provided, it takes precedence.
62
+ alternating_group_formats : tuple of (dict, dict), optional
63
+ Two complete format dictionaries to alternate between merge groups.
64
+ This provides full control over all formatting aspects including font color,
65
+ borders, background, etc. Takes precedence over alternating_row_colors.
66
+ Example: (
67
+ {"bg_color": "#F5F5F5", "font_color": "#000000", "border": 1},
68
+ {"bg_color": "#E0E0E0", "font_color": "#333333", "border": 2}
69
+ )
70
+ sheet_name : str, default "Sheet1"
71
+ Name of the worksheet.
72
+ freeze_panes : tuple of (row, col), optional
73
+ Position to freeze panes. Default (1, 0) freezes the header row.
74
+ Set to None to disable. Example: (1, 2) freezes header and first 2 columns.
75
+
76
+ Raises
77
+ ------
78
+ ValueError
79
+ If merge_columns contains column names not in the DataFrame.
80
+ TypeError
81
+ If df is not a Polars DataFrame.
82
+
83
+ Examples
84
+ --------
85
+ Basic usage with merged cells:
86
+
87
+ >>> import polars as pl
88
+ >>> df = pl.DataFrame({
89
+ ... "variable": ["S0", "S0", "S0", "S1", "S1"],
90
+ ... "question": ["Age?", "Age?", "Age?", "Gender?", "Gender?"],
91
+ ... "value_code": [1, 2, 3, 1, 2],
92
+ ... "value_label": ["18-25", "26-35", "36+", "Male", "Female"]
93
+ ... })
94
+ >>> write_excel_engine(
95
+ ... df=df,
96
+ ... file_path="survey.xlsx",
97
+ ... merge_columns=["variable", "question"],
98
+ ... column_widths={"variable": 150, "question": 300}
99
+ ... )
100
+
101
+ Advanced formatting with alternating group formats:
102
+
103
+ >>> write_excel_engine(
104
+ ... df=df,
105
+ ... file_path="survey.xlsx",
106
+ ... merge_columns=["variable", "question"],
107
+ ... alternating_group_formats=(
108
+ ... {"bg_color": "#F0F8FF", "font_color": "#00008B", "border": 1, "border_color": "#4169E1"},
109
+ ... {"bg_color": "#FFF0F5", "font_color": "#8B008B", "border": 1, "border_color": "#DA70D6"}
110
+ ... ),
111
+ ... freeze_panes=(1, 2)
112
+ ... )
113
+
114
+ Notes
115
+ -----
116
+ - Column widths are converted from pixels to Excel character units (approx. 7 pixels per unit)
117
+ - Merging only occurs for consecutive rows with identical values in ALL merge_columns
118
+ - Non-consecutive duplicates are not merged (maintains data integrity)
119
+ - Alternating formats are applied to entire merge groups, not individual rows
120
+ - alternating_group_formats takes precedence over alternating_row_colors if both are provided
121
+ - The function uses xlsxwriter as the backend (same as Polars' write_excel)
122
+
123
+ See Also
124
+ --------
125
+ polars.DataFrame.write_excel : Standard Polars Excel writer without merging
126
+ """
127
+ # Input validation
128
+ if not isinstance(df, pl.DataFrame):
129
+ raise TypeError(f"Expected pl.DataFrame, got {type(df).__name__}")
130
+
131
+ if df.is_empty():
132
+ raise ValueError("Cannot write empty DataFrame to Excel")
133
+
134
+ # Validate merge_columns
135
+ if merge_columns:
136
+ invalid_cols = set(merge_columns) - set(df.columns)
137
+ if invalid_cols:
138
+ raise ValueError(
139
+ f"merge_columns contains invalid column names: {invalid_cols}. "
140
+ f"Available columns: {df.columns}"
141
+ )
142
+
143
+ # Convert file_path to Path and ensure parent directory exists
144
+ file_path = Path(file_path)
145
+ file_path.parent.mkdir(parents=True, exist_ok=True)
146
+
147
+ # Create workbook and worksheet
148
+ workbook = xlsxwriter.Workbook(str(file_path))
149
+ worksheet = workbook.add_worksheet(sheet_name)
150
+
151
+ # Apply freeze panes if specified
152
+ if freeze_panes:
153
+ worksheet.freeze_panes(*freeze_panes)
154
+
155
+ # Create formats
156
+ fmt_header = workbook.add_format(header_format or {})
157
+
158
+ # Default merge format with vertical centering
159
+ default_merge_format = {"align": "left", "valign": "vcenter"}
160
+ if merge_format:
161
+ default_merge_format.update(merge_format)
162
+ fmt_merge = workbook.add_format(default_merge_format)
163
+
164
+ # Create column-specific formats
165
+ fmt_columns = {}
166
+ if column_formats:
167
+ for col_name, fmt_dict in column_formats.items():
168
+ if col_name in df.columns:
169
+ fmt_columns[col_name] = workbook.add_format(fmt_dict)
170
+
171
+ # Get column names and create index mapping
172
+ columns = df.columns
173
+ col_indices = {col: idx for idx, col in enumerate(columns)}
174
+
175
+ # Set column widths
176
+ if column_widths:
177
+ for col_name, width_px in column_widths.items():
178
+ if col_name in col_indices:
179
+ # Convert pixels to Excel character units (approximate: 1 char ≈ 7 pixels)
180
+ width_chars = width_px / 7
181
+ col_idx = col_indices[col_name]
182
+ worksheet.set_column(col_idx, col_idx, width_chars)
183
+
184
+ # Write headers
185
+ for col_idx, col_name in enumerate(columns):
186
+ worksheet.write(0, col_idx, col_name, fmt_header)
187
+
188
+ # Convert dataframe to list of dictionaries for easier processing
189
+ data = df.to_dicts()
190
+
191
+ if not merge_columns or len(data) == 0:
192
+ # No merging needed - use standard write
193
+ for row_idx, row_data in enumerate(data, start=1):
194
+ for col_idx, col_name in enumerate(columns):
195
+ value = row_data[col_name]
196
+ cell_format = fmt_columns.get(col_name)
197
+ worksheet.write(row_idx, col_idx, value, cell_format)
198
+ else:
199
+ # Write data with merging logic
200
+ row = 1
201
+ i = 0
202
+ group_index = 0 # Track which group we're in for alternating formats
203
+
204
+ while i < len(data):
205
+ # Get values for merge columns in current row
206
+ merge_values = tuple(data[i][col] for col in merge_columns)
207
+
208
+ start_row = row
209
+ j = i
210
+
211
+ # Determine the formatting for this group
212
+ group_format_dict = None
213
+ if alternating_group_formats:
214
+ # Use comprehensive alternating formats
215
+ format_index = group_index % 2
216
+ group_format_dict = alternating_group_formats[format_index]
217
+ elif alternating_row_colors:
218
+ # Use simple color alternation
219
+ color_index = group_index % 2
220
+ group_format_dict = {"bg_color": alternating_row_colors[color_index]}
221
+
222
+ # Find consecutive rows with same merge column values
223
+ while j < len(data):
224
+ current_merge_values = tuple(data[j][col] for col in merge_columns)
225
+ if current_merge_values != merge_values:
226
+ break
227
+
228
+ # Determine if this is the last row in the group
229
+ is_last_row_in_group = (j + 1 >= len(data) or
230
+ tuple(data[j + 1][col] for col in merge_columns) != merge_values)
231
+
232
+ # Write all non-merge columns for this row
233
+ for col_name in columns:
234
+ if col_name not in merge_columns:
235
+ col_idx = col_indices[col_name]
236
+ value = data[j][col_name]
237
+
238
+ # Prepare format with group formatting and/or border
239
+ combined_format = {}
240
+
241
+ # Add column-specific format if exists
242
+ if col_name in column_formats:
243
+ combined_format.update(column_formats[col_name])
244
+
245
+ # Add group format (colors, borders, font, etc.)
246
+ if group_format_dict:
247
+ combined_format.update(group_format_dict)
248
+
249
+ # Add group border if this is the last row
250
+ if is_last_row_in_group and group_border_format:
251
+ combined_format.update(group_border_format)
252
+
253
+ # Create and apply the format
254
+ if combined_format:
255
+ cell_format = workbook.add_format(combined_format)
256
+ else:
257
+ cell_format = fmt_columns.get(col_name)
258
+
259
+ worksheet.write(row, col_idx, value, cell_format)
260
+
261
+ row += 1
262
+ j += 1
263
+
264
+ # Write/merge the merge columns
265
+ end_row = row - 1
266
+ for col_name in merge_columns:
267
+ col_idx = col_indices[col_name]
268
+ value = data[i][col_name]
269
+
270
+ # Prepare format for merged cells
271
+ combined_merge_format = default_merge_format.copy()
272
+
273
+ # Add group format if specified
274
+ if group_format_dict:
275
+ combined_merge_format.update(group_format_dict)
276
+
277
+ # Add border if group_border_format is specified
278
+ if group_border_format:
279
+ combined_merge_format.update(group_border_format)
280
+
281
+ current_fmt = workbook.add_format(combined_merge_format)
282
+
283
+ if end_row > start_row:
284
+ # Multiple rows - merge cells
285
+ worksheet.merge_range(start_row, col_idx, end_row, col_idx,
286
+ value, current_fmt)
287
+ else:
288
+ # Single row - no merge needed
289
+ worksheet.write(start_row, col_idx, value, current_fmt)
290
+
291
+ i = j
292
+ group_index += 1 # Move to next group for format alternation
293
+
294
+ # Close workbook to save file
295
+ try:
296
+ workbook.close()
297
+ except Exception as e:
298
+ raise IOError(f"Failed to write Excel file to {file_path}: {e}") from e