ultrasav 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ultrasav/__init__.py +280 -0
- ultrasav/_add_cases.py +227 -0
- ultrasav/_data.py +513 -0
- ultrasav/_make_dummy.py +137 -0
- ultrasav/_merge_data.py +435 -0
- ultrasav/_merge_meta.py +280 -0
- ultrasav/_metadata.py +570 -0
- ultrasav/_read_files.py +558 -0
- ultrasav/_write_files.py +111 -0
- ultrasav/metaman/__init__.py +91 -0
- ultrasav/metaman/def_detect_variable_type.py +454 -0
- ultrasav/metaman/def_get_meta.py +561 -0
- ultrasav/metaman/def_make_datamap.py +127 -0
- ultrasav/metaman/def_make_labels.py +833 -0
- ultrasav/metaman/def_map_engine.py +529 -0
- ultrasav/metaman/def_map_to_excel.py +294 -0
- ultrasav/metaman/def_write_excel_engine.py +298 -0
- ultrasav/metaman/pastel_color_schemes.py +185 -0
- ultrasav-0.1.4.dist-info/METADATA +550 -0
- ultrasav-0.1.4.dist-info/RECORD +21 -0
- ultrasav-0.1.4.dist-info/WHEEL +4 -0
ultrasav/_metadata.py
ADDED
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
#_v4_updated
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Any
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Metadata:
|
|
9
|
+
"""
|
|
10
|
+
A class to handle SPSS metadata updates for writing SAV files.
|
|
11
|
+
|
|
12
|
+
This class takes the original pyreadstat metadata and allows explicit updates.
|
|
13
|
+
It does NOT track dataframe changes - users must explicitly update metadata
|
|
14
|
+
to match their dataframe transformations.
|
|
15
|
+
|
|
16
|
+
All metadata updates MERGE with original metadata - they don't replace it.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
meta_obj : pyreadstat metadata object, dict, or None
|
|
21
|
+
Can be:
|
|
22
|
+
- pyreadstat metadata object from read_sav()
|
|
23
|
+
- dict with metadata parameters to set
|
|
24
|
+
- None for empty metadata
|
|
25
|
+
|
|
26
|
+
Examples
|
|
27
|
+
--------
|
|
28
|
+
>>> # From pyreadstat
|
|
29
|
+
>>> df, meta_raw = pyreadstat.read_sav("file.sav")
|
|
30
|
+
>>> meta = Metadata(meta_raw)
|
|
31
|
+
|
|
32
|
+
>>> # Empty metadata
|
|
33
|
+
>>> meta = Metadata()
|
|
34
|
+
|
|
35
|
+
>>> # With initial values
|
|
36
|
+
>>> meta = Metadata({"column_labels": {"Q1": "Question 1"}})
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
# Store the original metadata object
|
|
40
|
+
_original_meta: Any | None = field(default=None, init=False)
|
|
41
|
+
|
|
42
|
+
# User updates - these will override original metadata when provided
|
|
43
|
+
_user_column_labels: dict[str, str] | None = field(default=None, init=False)
|
|
44
|
+
_user_variable_value_labels: dict[str, dict[int | float | str, str]] | None = field(default=None, init=False)
|
|
45
|
+
_user_variable_format: dict[str, str] | None = field(default=None, init=False)
|
|
46
|
+
_user_variable_measure: dict[str, str] | None = field(default=None, init=False)
|
|
47
|
+
_user_variable_display_width: dict[str, int] | None = field(default=None, init=False)
|
|
48
|
+
_user_missing_ranges: dict[str, list] | None = field(default=None, init=False)
|
|
49
|
+
_user_note: str | list[str] | None = field(default=None, init=False)
|
|
50
|
+
_user_file_label: str | None = field(default=None, init=False)
|
|
51
|
+
_user_compress: bool | None = field(default=None, init=False)
|
|
52
|
+
_user_row_compress: bool | None = field(default=None, init=False)
|
|
53
|
+
|
|
54
|
+
def __init__(self, meta_obj=None):
|
|
55
|
+
"""
|
|
56
|
+
Initialize Metadata instance.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
meta_obj : pyreadstat metadata object, dict, or None
|
|
61
|
+
Can be pyreadstat metadata, a dict of parameters, or None for empty
|
|
62
|
+
"""
|
|
63
|
+
# Initialize all fields
|
|
64
|
+
self._original_meta = None
|
|
65
|
+
self._user_column_labels = None
|
|
66
|
+
self._user_variable_value_labels = None
|
|
67
|
+
self._user_variable_format = None
|
|
68
|
+
self._user_variable_measure = None
|
|
69
|
+
self._user_variable_display_width = None
|
|
70
|
+
self._user_missing_ranges = None
|
|
71
|
+
self._user_note = None
|
|
72
|
+
self._user_file_label = None
|
|
73
|
+
self._user_compress = None
|
|
74
|
+
self._user_row_compress = None
|
|
75
|
+
|
|
76
|
+
if meta_obj is not None:
|
|
77
|
+
# Check if it's pyreadstat metadata (has specific attributes)
|
|
78
|
+
if hasattr(meta_obj, 'column_names') and hasattr(meta_obj, 'column_labels'):
|
|
79
|
+
# It's pyreadstat metadata
|
|
80
|
+
self._original_meta = meta_obj
|
|
81
|
+
elif isinstance(meta_obj, dict):
|
|
82
|
+
# It's user-provided dict of updates
|
|
83
|
+
self.update(**meta_obj)
|
|
84
|
+
else:
|
|
85
|
+
# Try to detect if it's pyreadstat metadata by other attributes
|
|
86
|
+
if hasattr(meta_obj, 'number_columns') or hasattr(meta_obj, 'file_label'):
|
|
87
|
+
self._original_meta = meta_obj
|
|
88
|
+
else:
|
|
89
|
+
raise TypeError(
|
|
90
|
+
f"Unsupported metadata type: {type(meta_obj)}. "
|
|
91
|
+
"Expected pyreadstat metadata object, dict, or None."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
@classmethod
|
|
95
|
+
def from_pyreadstat(cls, meta_obj):
|
|
96
|
+
"""
|
|
97
|
+
Create a Metadata instance from a pyreadstat metadata object.
|
|
98
|
+
|
|
99
|
+
DEPRECATED: Use Metadata(meta_obj) instead.
|
|
100
|
+
|
|
101
|
+
Parameters
|
|
102
|
+
----------
|
|
103
|
+
meta_obj : pyreadstat metadata object or None
|
|
104
|
+
The metadata object returned by pyreadstat.read_sav()
|
|
105
|
+
|
|
106
|
+
Returns
|
|
107
|
+
-------
|
|
108
|
+
Metadata
|
|
109
|
+
A new Metadata instance
|
|
110
|
+
"""
|
|
111
|
+
warnings.warn(
|
|
112
|
+
"Metadata.from_pyreadstat() is deprecated. Use Metadata(meta_obj) instead.",
|
|
113
|
+
DeprecationWarning,
|
|
114
|
+
stacklevel=2
|
|
115
|
+
)
|
|
116
|
+
return cls(meta_obj)
|
|
117
|
+
|
|
118
|
+
def _merge_with_original(self, user_dict: dict | None,
|
|
119
|
+
original_attr: str,
|
|
120
|
+
process_values: bool = False) -> dict:
|
|
121
|
+
"""
|
|
122
|
+
Generic method to merge user updates with original metadata.
|
|
123
|
+
|
|
124
|
+
Parameters
|
|
125
|
+
----------
|
|
126
|
+
user_dict : dict or None
|
|
127
|
+
User-provided updates
|
|
128
|
+
original_attr : str
|
|
129
|
+
Name of the attribute in original metadata
|
|
130
|
+
process_values : bool
|
|
131
|
+
If True, process value labels (convert keys to numbers)
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
dict
|
|
136
|
+
Merged dictionary (original + updates)
|
|
137
|
+
"""
|
|
138
|
+
# If no user updates, return original
|
|
139
|
+
if not user_dict:
|
|
140
|
+
if not self._original_meta or not hasattr(self._original_meta, original_attr):
|
|
141
|
+
return {}
|
|
142
|
+
original = getattr(self._original_meta, original_attr)
|
|
143
|
+
return original.copy() if original else {}
|
|
144
|
+
|
|
145
|
+
# If no original metadata, return user updates
|
|
146
|
+
if not self._original_meta or not hasattr(self._original_meta, original_attr):
|
|
147
|
+
if process_values:
|
|
148
|
+
# Convert keys to numbers if possible for value labels
|
|
149
|
+
converted = {}
|
|
150
|
+
for var, lbls in user_dict.items():
|
|
151
|
+
converted[var] = self._convert_keys_to_numbers_if_possible(lbls)
|
|
152
|
+
return converted
|
|
153
|
+
return user_dict.copy()
|
|
154
|
+
|
|
155
|
+
# Merge: start with original, then apply user updates
|
|
156
|
+
original = getattr(self._original_meta, original_attr)
|
|
157
|
+
existing = original.copy() if original else {}
|
|
158
|
+
|
|
159
|
+
# Apply user updates
|
|
160
|
+
for key, value in user_dict.items():
|
|
161
|
+
if process_values:
|
|
162
|
+
existing[key] = self._convert_keys_to_numbers_if_possible(value)
|
|
163
|
+
else:
|
|
164
|
+
existing[key] = value
|
|
165
|
+
|
|
166
|
+
return existing
|
|
167
|
+
|
|
168
|
+
# ===================================================================
|
|
169
|
+
# WRITABLE PROPERTIES (can be updated by user)
|
|
170
|
+
# ===================================================================
|
|
171
|
+
|
|
172
|
+
@property
|
|
173
|
+
def column_labels(self) -> dict[str, str]:
|
|
174
|
+
"""Get current column labels (original + updates)."""
|
|
175
|
+
if not self._user_column_labels:
|
|
176
|
+
if not self._original_meta:
|
|
177
|
+
return {}
|
|
178
|
+
# Special handling for column_labels as it's stored differently
|
|
179
|
+
if hasattr(self._original_meta, 'column_names') and hasattr(self._original_meta, 'column_labels'):
|
|
180
|
+
return dict(zip(self._original_meta.column_names,
|
|
181
|
+
self._original_meta.column_labels))
|
|
182
|
+
return {}
|
|
183
|
+
|
|
184
|
+
if self._original_meta is None:
|
|
185
|
+
return self._user_column_labels
|
|
186
|
+
|
|
187
|
+
# Start with existing labels
|
|
188
|
+
existing = {}
|
|
189
|
+
if hasattr(self._original_meta, 'column_names') and hasattr(self._original_meta, 'column_labels'):
|
|
190
|
+
existing = dict(zip(self._original_meta.column_names,
|
|
191
|
+
self._original_meta.column_labels))
|
|
192
|
+
|
|
193
|
+
# Override with user updates
|
|
194
|
+
return {**existing, **self._user_column_labels}
|
|
195
|
+
|
|
196
|
+
@column_labels.setter
|
|
197
|
+
def column_labels(self, value: dict[str, str]):
|
|
198
|
+
"""Set user column labels updates (merges with original)."""
|
|
199
|
+
self._user_column_labels = value
|
|
200
|
+
|
|
201
|
+
@property
|
|
202
|
+
def variable_value_labels(self) -> dict[str, dict[int | float | str, str]]:
|
|
203
|
+
"""Get current variable value labels (original + updates)."""
|
|
204
|
+
return self._merge_with_original(
|
|
205
|
+
self._user_variable_value_labels,
|
|
206
|
+
'variable_value_labels',
|
|
207
|
+
process_values=True
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
@variable_value_labels.setter
|
|
211
|
+
def variable_value_labels(self, value: dict[str, dict[int | float | str, str]]):
|
|
212
|
+
"""Set user variable value labels updates (merges with original)."""
|
|
213
|
+
self._user_variable_value_labels = value
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def variable_format(self) -> dict[str, str]:
|
|
217
|
+
"""Get current variable formats (original + updates)."""
|
|
218
|
+
# First try variable_format, then fall back to original_variable_types
|
|
219
|
+
if hasattr(self._original_meta, 'variable_format') and self._original_meta.variable_format:
|
|
220
|
+
return self._merge_with_original(
|
|
221
|
+
self._user_variable_format,
|
|
222
|
+
'variable_format'
|
|
223
|
+
)
|
|
224
|
+
elif hasattr(self._original_meta, 'original_variable_types') and not self._user_variable_format:
|
|
225
|
+
# Use original_variable_types as fallback if no variable_format exists
|
|
226
|
+
return self._original_meta.original_variable_types.copy()
|
|
227
|
+
else:
|
|
228
|
+
# Merge user updates with original_variable_types if available
|
|
229
|
+
if self._user_variable_format:
|
|
230
|
+
if hasattr(self._original_meta, 'original_variable_types'):
|
|
231
|
+
existing = self._original_meta.original_variable_types.copy()
|
|
232
|
+
for key, value in self._user_variable_format.items():
|
|
233
|
+
existing[key] = value
|
|
234
|
+
return existing
|
|
235
|
+
return self._user_variable_format.copy()
|
|
236
|
+
return {}
|
|
237
|
+
|
|
238
|
+
@variable_format.setter
|
|
239
|
+
def variable_format(self, value: dict[str, str]):
|
|
240
|
+
"""Set user variable format updates (merges with original)."""
|
|
241
|
+
self._user_variable_format = value
|
|
242
|
+
|
|
243
|
+
@property
|
|
244
|
+
def variable_measure(self) -> dict[str, str]:
|
|
245
|
+
"""Get current variable measures (original + updates)."""
|
|
246
|
+
return self._merge_with_original(
|
|
247
|
+
self._user_variable_measure,
|
|
248
|
+
'variable_measure'
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
@variable_measure.setter
|
|
252
|
+
def variable_measure(self, value: dict[str, str]):
|
|
253
|
+
"""Set user variable measure updates (merges with original)."""
|
|
254
|
+
self._user_variable_measure = value
|
|
255
|
+
|
|
256
|
+
@property
|
|
257
|
+
def variable_display_width(self) -> dict[str, int]:
|
|
258
|
+
"""Get current variable display widths (original + updates)."""
|
|
259
|
+
return self._merge_with_original(
|
|
260
|
+
self._user_variable_display_width,
|
|
261
|
+
'variable_display_width'
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
@variable_display_width.setter
|
|
265
|
+
def variable_display_width(self, value: dict[str, int]):
|
|
266
|
+
"""Set user variable display width updates (merges with original)."""
|
|
267
|
+
self._user_variable_display_width = value
|
|
268
|
+
|
|
269
|
+
@property
|
|
270
|
+
def missing_ranges(self) -> dict[str, list] | None:
|
|
271
|
+
"""Get current missing ranges (original + updates)."""
|
|
272
|
+
# missing_ranges follows same merge pattern
|
|
273
|
+
if not self._user_missing_ranges:
|
|
274
|
+
return getattr(self._original_meta, "missing_ranges", None) if self._original_meta else None
|
|
275
|
+
|
|
276
|
+
if not self._original_meta or not hasattr(self._original_meta, "missing_ranges"):
|
|
277
|
+
return self._user_missing_ranges
|
|
278
|
+
|
|
279
|
+
# Merge: start with original, apply user updates
|
|
280
|
+
original = getattr(self._original_meta, "missing_ranges", {})
|
|
281
|
+
if original:
|
|
282
|
+
merged = original.copy()
|
|
283
|
+
for key, value in self._user_missing_ranges.items():
|
|
284
|
+
merged[key] = value
|
|
285
|
+
return merged
|
|
286
|
+
return self._user_missing_ranges
|
|
287
|
+
|
|
288
|
+
@missing_ranges.setter
|
|
289
|
+
def missing_ranges(self, value: dict[str, list]):
|
|
290
|
+
"""Set user missing ranges (merges with original)."""
|
|
291
|
+
self._user_missing_ranges = value
|
|
292
|
+
|
|
293
|
+
@property
|
|
294
|
+
def note(self) -> str | list[str] | None:
|
|
295
|
+
"""Get current note (user or original)."""
|
|
296
|
+
if self._user_note is not None:
|
|
297
|
+
return self._user_note
|
|
298
|
+
if self._original_meta and hasattr(self._original_meta, "notes") and self._original_meta.notes:
|
|
299
|
+
return self._original_meta.notes
|
|
300
|
+
return None
|
|
301
|
+
|
|
302
|
+
@note.setter
|
|
303
|
+
def note(self, value: str | list[str]):
|
|
304
|
+
"""Set user note (replaces original)."""
|
|
305
|
+
self._user_note = value
|
|
306
|
+
|
|
307
|
+
@property
|
|
308
|
+
def file_label(self) -> str:
|
|
309
|
+
"""Get current file label (user or original)."""
|
|
310
|
+
if self._user_file_label is not None:
|
|
311
|
+
return self._user_file_label
|
|
312
|
+
return getattr(self._original_meta, "file_label", "") if self._original_meta else ""
|
|
313
|
+
|
|
314
|
+
@file_label.setter
|
|
315
|
+
def file_label(self, value: str):
|
|
316
|
+
"""Set user file label (replaces original)."""
|
|
317
|
+
self._user_file_label = value
|
|
318
|
+
|
|
319
|
+
@property
|
|
320
|
+
def compress(self) -> bool:
|
|
321
|
+
"""Get compress setting."""
|
|
322
|
+
return self._user_compress if self._user_compress is not None else False
|
|
323
|
+
|
|
324
|
+
@compress.setter
|
|
325
|
+
def compress(self, value: bool):
|
|
326
|
+
"""Set compress setting."""
|
|
327
|
+
self._user_compress = value
|
|
328
|
+
|
|
329
|
+
@property
|
|
330
|
+
def row_compress(self) -> bool:
|
|
331
|
+
"""Get row_compress setting."""
|
|
332
|
+
return self._user_row_compress if self._user_row_compress is not None else False
|
|
333
|
+
|
|
334
|
+
@row_compress.setter
|
|
335
|
+
def row_compress(self, value: bool):
|
|
336
|
+
"""Set row_compress setting."""
|
|
337
|
+
self._user_row_compress = value
|
|
338
|
+
|
|
339
|
+
# ===================================================================
|
|
340
|
+
# READ-ONLY PROPERTIES (from original metadata)
|
|
341
|
+
# ===================================================================
|
|
342
|
+
|
|
343
|
+
# Basic file information
|
|
344
|
+
@property
|
|
345
|
+
def notes(self) -> str | list[str] | None:
|
|
346
|
+
"""Get notes from original metadata (same as note property)."""
|
|
347
|
+
return self.note
|
|
348
|
+
|
|
349
|
+
@property
|
|
350
|
+
def creation_time(self) -> str | None:
|
|
351
|
+
"""Get creation time from original metadata."""
|
|
352
|
+
return getattr(self._original_meta, "creation_time", None) if self._original_meta else None
|
|
353
|
+
|
|
354
|
+
@property
|
|
355
|
+
def modification_time(self) -> str | None:
|
|
356
|
+
"""Get modification time from original metadata."""
|
|
357
|
+
return getattr(self._original_meta, "modification_time", None) if self._original_meta else None
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def file_encoding(self) -> str | None:
|
|
361
|
+
"""Get file encoding from original metadata."""
|
|
362
|
+
return getattr(self._original_meta, "file_encoding", None) if self._original_meta else None
|
|
363
|
+
|
|
364
|
+
@property
|
|
365
|
+
def table_name(self) -> str | None:
|
|
366
|
+
"""Get table name from original metadata."""
|
|
367
|
+
return getattr(self._original_meta, "table_name", None) if self._original_meta else None
|
|
368
|
+
|
|
369
|
+
# Column/variable information
|
|
370
|
+
@property
|
|
371
|
+
def column_names(self) -> list[str]:
|
|
372
|
+
"""Get column names from original metadata."""
|
|
373
|
+
if self._original_meta and hasattr(self._original_meta, 'column_names'):
|
|
374
|
+
return list(self._original_meta.column_names)
|
|
375
|
+
return []
|
|
376
|
+
|
|
377
|
+
@property
|
|
378
|
+
def column_names_to_labels(self) -> dict[str, str]:
|
|
379
|
+
"""Get column names to labels mapping (same as column_labels property)."""
|
|
380
|
+
return self.column_labels
|
|
381
|
+
|
|
382
|
+
@property
|
|
383
|
+
def number_columns(self) -> int | None:
|
|
384
|
+
"""Get number of columns from original metadata."""
|
|
385
|
+
return getattr(self._original_meta, "number_columns", None) if self._original_meta else None
|
|
386
|
+
|
|
387
|
+
@property
|
|
388
|
+
def number_rows(self) -> int | None:
|
|
389
|
+
"""Get number of rows from original metadata."""
|
|
390
|
+
return getattr(self._original_meta, "number_rows", None) if self._original_meta else None
|
|
391
|
+
|
|
392
|
+
# Variable types and formats
|
|
393
|
+
@property
|
|
394
|
+
def original_variable_types(self) -> dict[str, str]:
|
|
395
|
+
"""Get original variable types from metadata."""
|
|
396
|
+
if self._original_meta and hasattr(self._original_meta, 'original_variable_types'):
|
|
397
|
+
return self._original_meta.original_variable_types.copy()
|
|
398
|
+
return {}
|
|
399
|
+
|
|
400
|
+
@property
|
|
401
|
+
def readstat_variable_types(self) -> dict[str, str]:
|
|
402
|
+
"""Get readstat variable types from metadata."""
|
|
403
|
+
if self._original_meta and hasattr(self._original_meta, 'readstat_variable_types'):
|
|
404
|
+
return self._original_meta.readstat_variable_types.copy()
|
|
405
|
+
return {}
|
|
406
|
+
|
|
407
|
+
# Value labels and mappings
|
|
408
|
+
@property
|
|
409
|
+
def value_labels(self) -> dict:
|
|
410
|
+
"""Get value labels from original metadata."""
|
|
411
|
+
if self._original_meta and hasattr(self._original_meta, 'value_labels'):
|
|
412
|
+
return self._original_meta.value_labels.copy() if self._original_meta.value_labels else {}
|
|
413
|
+
return {}
|
|
414
|
+
|
|
415
|
+
@property
|
|
416
|
+
def variable_to_label(self) -> dict[str, str]:
|
|
417
|
+
"""Get variable to label mapping from original metadata."""
|
|
418
|
+
if self._original_meta and hasattr(self._original_meta, 'variable_to_label'):
|
|
419
|
+
return self._original_meta.variable_to_label.copy() if self._original_meta.variable_to_label else {}
|
|
420
|
+
return {}
|
|
421
|
+
|
|
422
|
+
# Missing value information
|
|
423
|
+
@property
|
|
424
|
+
def missing_user_values(self) -> dict | None:
|
|
425
|
+
"""Get missing user values from original metadata."""
|
|
426
|
+
return getattr(self._original_meta, "missing_user_values", None) if self._original_meta else None
|
|
427
|
+
|
|
428
|
+
# Display properties
|
|
429
|
+
@property
|
|
430
|
+
def variable_alignment(self) -> dict[str, str]:
|
|
431
|
+
"""Get variable alignment from original metadata."""
|
|
432
|
+
if self._original_meta and hasattr(self._original_meta, 'variable_alignment'):
|
|
433
|
+
return self._original_meta.variable_alignment.copy() if self._original_meta.variable_alignment else {}
|
|
434
|
+
return {}
|
|
435
|
+
|
|
436
|
+
@property
|
|
437
|
+
def variable_storage_width(self) -> dict[str, int]:
|
|
438
|
+
"""Get variable storage width from original metadata."""
|
|
439
|
+
if self._original_meta and hasattr(self._original_meta, 'variable_storage_width'):
|
|
440
|
+
return self._original_meta.variable_storage_width.copy() if self._original_meta.variable_storage_width else {}
|
|
441
|
+
return {}
|
|
442
|
+
|
|
443
|
+
# Multiple response sets
|
|
444
|
+
@property
|
|
445
|
+
def mr_sets(self) -> dict | None:
|
|
446
|
+
"""Get multiple response sets from original metadata."""
|
|
447
|
+
return getattr(self._original_meta, "mr_sets", None) if self._original_meta else None
|
|
448
|
+
|
|
449
|
+
# ===================================================================
|
|
450
|
+
# METHODS
|
|
451
|
+
# ===================================================================
|
|
452
|
+
|
|
453
|
+
def update(self, **kwargs) -> 'Metadata':
|
|
454
|
+
"""
|
|
455
|
+
Update metadata with user-provided values.
|
|
456
|
+
|
|
457
|
+
Parameters
|
|
458
|
+
----------
|
|
459
|
+
**kwargs : dict
|
|
460
|
+
Any of the writable metadata attributes (column_labels, variable_value_labels, etc.)
|
|
461
|
+
|
|
462
|
+
Returns
|
|
463
|
+
-------
|
|
464
|
+
self
|
|
465
|
+
Returns self for method chaining
|
|
466
|
+
|
|
467
|
+
Examples
|
|
468
|
+
--------
|
|
469
|
+
>>> meta.update(
|
|
470
|
+
... column_labels={"Q1": "Question 1"},
|
|
471
|
+
... file_label="My Survey"
|
|
472
|
+
... )
|
|
473
|
+
"""
|
|
474
|
+
for key, value in kwargs.items():
|
|
475
|
+
if hasattr(self, key) and not key.startswith('_'):
|
|
476
|
+
setattr(self, key, value)
|
|
477
|
+
else:
|
|
478
|
+
warnings.warn(f"Unknown metadata attribute: {key}", UserWarning, stacklevel=2)
|
|
479
|
+
|
|
480
|
+
return self
|
|
481
|
+
|
|
482
|
+
def _convert_keys_to_numbers_if_possible(self, value_labels_dict):
|
|
483
|
+
"""Convert string keys to numbers where possible (from v1.0 logic)."""
|
|
484
|
+
updated = {}
|
|
485
|
+
for k, v in value_labels_dict.items():
|
|
486
|
+
try:
|
|
487
|
+
temp = float(k)
|
|
488
|
+
if temp.is_integer():
|
|
489
|
+
temp = int(temp)
|
|
490
|
+
updated[temp] = v
|
|
491
|
+
except (ValueError, TypeError):
|
|
492
|
+
updated[k] = v
|
|
493
|
+
return updated
|
|
494
|
+
|
|
495
|
+
def _force_string_labels(self, labels_dict):
|
|
496
|
+
"""Ensure all labels are strings (from v1.0 logic)."""
|
|
497
|
+
if not labels_dict:
|
|
498
|
+
return {}
|
|
499
|
+
fixed = {}
|
|
500
|
+
for col_name, lbl_val in labels_dict.items():
|
|
501
|
+
col_name_str = str(col_name)
|
|
502
|
+
label_str = str(lbl_val) if lbl_val is not None else ""
|
|
503
|
+
fixed[col_name_str] = label_str
|
|
504
|
+
return fixed
|
|
505
|
+
|
|
506
|
+
def _resolve_compress_settings(self):
|
|
507
|
+
"""Resolve compression settings."""
|
|
508
|
+
final_compress = self.compress
|
|
509
|
+
final_row_compress = self.row_compress
|
|
510
|
+
|
|
511
|
+
if final_compress and final_row_compress:
|
|
512
|
+
warnings.warn(
|
|
513
|
+
"Both 'compress' and 'row_compress' are True; prioritizing 'compress' over 'row_compress'.",
|
|
514
|
+
UserWarning,
|
|
515
|
+
stacklevel=2
|
|
516
|
+
)
|
|
517
|
+
final_row_compress = False
|
|
518
|
+
|
|
519
|
+
return final_compress, final_row_compress
|
|
520
|
+
|
|
521
|
+
def get_write_params(self) -> dict[str, Any]:
|
|
522
|
+
"""
|
|
523
|
+
Get parameters formatted for pyreadstat.write_sav().
|
|
524
|
+
|
|
525
|
+
Returns
|
|
526
|
+
-------
|
|
527
|
+
dict
|
|
528
|
+
Dictionary of parameters ready to pass to write_sav
|
|
529
|
+
"""
|
|
530
|
+
# Ensure column labels are all strings
|
|
531
|
+
column_labels = self._force_string_labels(self.column_labels)
|
|
532
|
+
|
|
533
|
+
# Resolve note formatting
|
|
534
|
+
final_note = self.note
|
|
535
|
+
if isinstance(final_note, list):
|
|
536
|
+
final_note = "\n".join(final_note)
|
|
537
|
+
|
|
538
|
+
# Resolve compression settings
|
|
539
|
+
final_compress, final_row_compress = self._resolve_compress_settings()
|
|
540
|
+
|
|
541
|
+
params = {
|
|
542
|
+
'file_label': self.file_label,
|
|
543
|
+
'column_labels': column_labels if column_labels else None,
|
|
544
|
+
'compress': final_compress,
|
|
545
|
+
'row_compress': final_row_compress,
|
|
546
|
+
'note': final_note,
|
|
547
|
+
'variable_value_labels': self.variable_value_labels if self.variable_value_labels else None,
|
|
548
|
+
'missing_ranges': self.missing_ranges,
|
|
549
|
+
'variable_display_width': self.variable_display_width if self.variable_display_width else None,
|
|
550
|
+
'variable_measure': self.variable_measure if self.variable_measure else None,
|
|
551
|
+
'variable_format': self.variable_format if self.variable_format else None,
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
# Remove None values for cleaner params
|
|
555
|
+
return {k: v for k, v in params.items() if v is not None}
|
|
556
|
+
|
|
557
|
+
def copy(self) -> 'Metadata':
|
|
558
|
+
"""Create a deep copy of the metadata."""
|
|
559
|
+
return deepcopy(self)
|
|
560
|
+
|
|
561
|
+
def __repr__(self) -> str:
|
|
562
|
+
info = []
|
|
563
|
+
if self._original_meta:
|
|
564
|
+
info.append(f"columns={self.number_columns}")
|
|
565
|
+
if self.column_labels:
|
|
566
|
+
info.append(f"labels={len(self.column_labels)}")
|
|
567
|
+
if self.variable_value_labels:
|
|
568
|
+
info.append(f"value_labels={len(self.variable_value_labels)}")
|
|
569
|
+
|
|
570
|
+
return f"Metadata({', '.join(info)})"
|