ultrasav 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ultrasav/__init__.py +280 -0
- ultrasav/_add_cases.py +227 -0
- ultrasav/_data.py +513 -0
- ultrasav/_make_dummy.py +137 -0
- ultrasav/_merge_data.py +435 -0
- ultrasav/_merge_meta.py +280 -0
- ultrasav/_metadata.py +570 -0
- ultrasav/_read_files.py +558 -0
- ultrasav/_write_files.py +111 -0
- ultrasav/metaman/__init__.py +91 -0
- ultrasav/metaman/def_detect_variable_type.py +454 -0
- ultrasav/metaman/def_get_meta.py +561 -0
- ultrasav/metaman/def_make_datamap.py +127 -0
- ultrasav/metaman/def_make_labels.py +833 -0
- ultrasav/metaman/def_map_engine.py +529 -0
- ultrasav/metaman/def_map_to_excel.py +294 -0
- ultrasav/metaman/def_write_excel_engine.py +298 -0
- ultrasav/metaman/pastel_color_schemes.py +185 -0
- ultrasav-0.1.4.dist-info/METADATA +550 -0
- ultrasav-0.1.4.dist-info/RECORD +21 -0
- ultrasav-0.1.4.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: ultrasav
|
|
3
|
+
Version: 0.1.4
|
|
4
|
+
Summary: A Python package for working with SPSS/SAV files with two-track architecture separating data and metadata operations
|
|
5
|
+
Keywords: spss,spss labels,spss files,sav files,sav,statistics,data-science,data-processing,survey-data,metadata,spss metadata,pyreadstat,dataframe-agnostic,polars,pandas,read spss,read sav,write spss,write sav,merge spss,merge sav,datamap,spss-datamap,validation,data-quality,tidyspss,metaprinter
|
|
6
|
+
Author: Albert Li
|
|
7
|
+
License: MIT
|
|
8
|
+
Requires-Dist: pandas>=2.2.0
|
|
9
|
+
Requires-Dist: polars>=1.3.0
|
|
10
|
+
Requires-Dist: pyreadstat>=1.3.2
|
|
11
|
+
Requires-Dist: narwhals>=2.11.0
|
|
12
|
+
Requires-Dist: openpyxl>=3.0.0
|
|
13
|
+
Requires-Dist: xlsxwriter>=3.1.0
|
|
14
|
+
Requires-Dist: ultrasav[excel,dev,docs] ; extra == 'all'
|
|
15
|
+
Requires-Dist: pytest>=7.0.0 ; extra == 'dev'
|
|
16
|
+
Requires-Dist: pytest-cov>=4.0.0 ; extra == 'dev'
|
|
17
|
+
Requires-Dist: black>=23.0.0 ; extra == 'dev'
|
|
18
|
+
Requires-Dist: ruff>=0.1.0 ; extra == 'dev'
|
|
19
|
+
Requires-Dist: mypy>=1.0.0 ; extra == 'dev'
|
|
20
|
+
Requires-Dist: pre-commit>=3.0.0 ; extra == 'dev'
|
|
21
|
+
Requires-Dist: sphinx>=6.0.0 ; extra == 'docs'
|
|
22
|
+
Requires-Dist: sphinx-rtd-theme>=1.0.0 ; extra == 'docs'
|
|
23
|
+
Requires-Dist: myst-parser>=1.0.0 ; extra == 'docs'
|
|
24
|
+
Requires-Dist: fastexcel>=0.9.0 ; extra == 'excel'
|
|
25
|
+
Maintainer: Albert Li
|
|
26
|
+
Requires-Python: >=3.11
|
|
27
|
+
Project-URL: Changelog, https://github.com/albertxli/ultrasav/blob/main/CHANGELOG.md
|
|
28
|
+
Project-URL: Documentation, https://ultrasav.readthedocs.io
|
|
29
|
+
Project-URL: Homepage, https://github.com/albertxli/ultrasav
|
|
30
|
+
Provides-Extra: all
|
|
31
|
+
Provides-Extra: dev
|
|
32
|
+
Provides-Extra: docs
|
|
33
|
+
Provides-Extra: excel
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# ultrasav
|
|
37
|
+
|
|
38
|
+
⚡ An 'Ultra-powerful' Python package for preparing production-ready SPSS/SAV files using a two-track architecture that separates data and metadata operations.
|
|
39
|
+
|
|
40
|
+
> *"Specium Ray for your data!" - Transform SPSS files with the power of Ultra*
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
## 💡 Motivation
|
|
44
|
+
|
|
45
|
+
**ultrasav** is built as a thoughtful wrapper around the excellent pyreadstat package. We're not here to reinvent the wheel for reading and writing SAV files - pyreadstat already does that brilliantly!
|
|
46
|
+
|
|
47
|
+
Instead, ultrasav provides additional transformation tools for tasks that are commonly done by folks who work with SAV files regularly:
|
|
48
|
+
- 🏷️ **Rename variables** - Change variable names in batch with clean methodology
|
|
49
|
+
- 🔄 **Recode values** - Transform codes across multiple variables with clean syntax
|
|
50
|
+
- 🏷️ **Update labels** - Batch update variable labels and value labels without losing track
|
|
51
|
+
- 📊 **Reorganize columns** - Move variables to specific positions for standardized layouts
|
|
52
|
+
- 📀 **Merge files intelligently** - Stack survey data while preserving all metadata
|
|
53
|
+
- 🎯 **Handle missing values** - Consistent missing value definitions across datasets
|
|
54
|
+
- 🦸 **Inspect & report metadata** - Generate datamaps and validation reports with metaman
|
|
55
|
+
|
|
56
|
+
## 🎯 Core Philosophy
|
|
57
|
+
|
|
58
|
+
**ultrasav** follows a simple but powerful principle: **Data and Metadata are two independent layers that only come together at read/write time.**
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
┌─────────────┐ ┌─────────────┐
|
|
62
|
+
│ DATA │ │ METADATA │
|
|
63
|
+
│ DataFrame │ │ Labels │
|
|
64
|
+
│ Operations │ │ Formats │
|
|
65
|
+
└─────────────┘ └─────────────┘
|
|
66
|
+
│ │
|
|
67
|
+
└────────┬────────────────┘
|
|
68
|
+
│
|
|
69
|
+
▼
|
|
70
|
+
┌─────────────┐
|
|
71
|
+
│ WRITE SAV │
|
|
72
|
+
└─────────────┘
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### The Common Problems
|
|
76
|
+
|
|
77
|
+
If you work with SPSS files in Python, you've probably asked yourself:
|
|
78
|
+
|
|
79
|
+
- How do I bulk update variable labels and value labels?
|
|
80
|
+
- How do I quickly relocate variables to ideal positions?
|
|
81
|
+
- How do I merge datasets — and more specifically, how are the labels being merged?
|
|
82
|
+
- How can I see a comprehensive datamap of my data?
|
|
83
|
+
- Most importantly: **How do I prepare a tidy SPSS file with clean labels and metadata that is production-ready?**
|
|
84
|
+
|
|
85
|
+
ultrasav answers all of these.
|
|
86
|
+
|
|
87
|
+
### The ultrasav Way
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
import ultrasav as ul
|
|
91
|
+
|
|
92
|
+
# Read → splits into two independent tracks
|
|
93
|
+
df, meta = ul.read_sav("survey.sav")
|
|
94
|
+
|
|
95
|
+
# Track 1 - Data: Transform data freely
|
|
96
|
+
data = ul.Data(df) # Wrap df into our Data class
|
|
97
|
+
df = data.move(first=['id']).rename({'Q1': 'satisfaction'}).replace({'satisfaction': {6: 99}}).to_native()
|
|
98
|
+
|
|
99
|
+
# Track 2 - Metadata: Update metadata independently
|
|
100
|
+
meta = ul.Metadata(meta) # Wrap meta into our Metadata class
|
|
101
|
+
meta.column_labels = {'satisfaction': 'Overall satisfaction'}
|
|
102
|
+
meta.variable_value_labels={'recommend': {0: 'No', 1: 'Yes'}}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
# Convergence: Reunite at write time
|
|
106
|
+
ul.write_sav(df, meta, "clean_survey.sav")
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
The goal is to provide you with a **clean and easy-to-understand way** to transform your SPSS data that you can use in real production workflows with minimal tweaking.
|
|
110
|
+
|
|
111
|
+
### 🚀 DataFrame-Agnostic Design
|
|
112
|
+
|
|
113
|
+
One of ultrasav's superpowers is being **dataframe-agnostic** — it works seamlessly with both **polars** and **pandas** thanks to [narwhals](https://github.com/MarcoGorelli/narwhals) under the hood:
|
|
114
|
+
|
|
115
|
+
- 🐻❄️ **Polars by default** - Blazing fast performance out of the box
|
|
116
|
+
- 🐼 **Pandas fully supported** - Use `output_format="pandas"` when needed
|
|
117
|
+
- 🔄 **Switch freely** - Convert between pandas and polars anytime
|
|
118
|
+
- 🔧 **Future-proof** - Ready for whatever dataframe library comes next
|
|
119
|
+
|
|
120
|
+
**Default output format: Polars** — All operations return polars DataFrames by default for blazing-fast performance. Pandas is fully supported via the `output_format="pandas"` parameter.
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
import ultrasav as ul
|
|
124
|
+
|
|
125
|
+
# Polars by default
|
|
126
|
+
df_pl, meta = ul.read_sav("survey.sav", output_format="polars")
|
|
127
|
+
|
|
128
|
+
# Or explicitly request pandas
|
|
129
|
+
df_pd, meta = ul.read_sav("survey.sav", output_format="pandas")
|
|
130
|
+
|
|
131
|
+
# The Data class works with either
|
|
132
|
+
data = ul.Data(df_pl) # Works with both Polars and pandas!
|
|
133
|
+
|
|
134
|
+
# Transform using ultrasav's consistent API
|
|
135
|
+
data = data.rename({"Q1": "satisfaction"}).replace({'satisfaction': {6: 99}})
|
|
136
|
+
df_native = data.to_native() # Get back your polars DataFrame
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Who Is This For?
|
|
140
|
+
|
|
141
|
+
- 📊 **Market Researchers** - Merge waves, standardize labels, prepare deliverables
|
|
142
|
+
- 🔬 **Data Scientists** - Clean survey data, prepare features, maintain metadata
|
|
143
|
+
- 🏭 **Data Engineers** - Build robust pipelines that preserve SPSS metadata
|
|
144
|
+
- 🎓 **Academic Researchers** - Manage longitudinal studies, harmonize datasets
|
|
145
|
+
- 📈 **Anyone working with SPSS** - If you use SAV files regularly, this is for you!
|
|
146
|
+
|
|
147
|
+
## 🚀 Installation
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
# Using uv
|
|
151
|
+
uv add ultrasav
|
|
152
|
+
|
|
153
|
+
# Or using pip
|
|
154
|
+
pip install ultrasav
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## 📚 Quick Start
|
|
158
|
+
|
|
159
|
+
### Basic Usage
|
|
160
|
+
|
|
161
|
+
```python
|
|
162
|
+
import ultrasav as ul
|
|
163
|
+
|
|
164
|
+
# Read SPSS file - automatically splits into data and metadata
|
|
165
|
+
df, meta = ul.read_sav("survey.sav")
|
|
166
|
+
# Note: You can also use pyreadstat directly - our classes work with pyreadstat meta objects too
|
|
167
|
+
|
|
168
|
+
# Track 1: Process data independently
|
|
169
|
+
data = ul.Data(df) # Wrap in Data class for transformations
|
|
170
|
+
data = data.move(first=["ID", "Date"]) # Reorder columns
|
|
171
|
+
data = data.rename({"Q1": "Satisfaction"}) # Rename columns
|
|
172
|
+
data = data.replace({"Satisfaction": {99: None}}) # Replace values
|
|
173
|
+
df = data.to_native() # Back to native DataFrame
|
|
174
|
+
|
|
175
|
+
# Track 2: Process metadata independently
|
|
176
|
+
meta.column_labels = {"Satisfaction": "Customer Satisfaction Score"}
|
|
177
|
+
meta.variable_value_labels = {
|
|
178
|
+
"Satisfaction": {1: "Very Dissatisfied", 5: "Very Satisfied"}
|
|
179
|
+
}
|
|
180
|
+
meta.variable_measure = {
|
|
181
|
+
'Satisfaction': 'ordinal',
|
|
182
|
+
'Gender': 'nominal',
|
|
183
|
+
'Age': 'scale',
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
# Convergence: Write both tracks to SPSS
|
|
187
|
+
ul.write_sav(df, meta, "cleaned_survey.sav")
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### Merging Files
|
|
191
|
+
|
|
192
|
+
```python
|
|
193
|
+
import ultrasav as ul
|
|
194
|
+
|
|
195
|
+
# Merge multiple files vertically with automatic metadata handling
|
|
196
|
+
df, meta = ul.add_cases([
|
|
197
|
+
"wave1.sav",
|
|
198
|
+
"wave2.sav",
|
|
199
|
+
"wave3.sav"
|
|
200
|
+
])
|
|
201
|
+
|
|
202
|
+
# Metadata is automatically preserved from top to bottom.
|
|
203
|
+
# A source-tracking column is automatically added to show each row's origin.
|
|
204
|
+
# Example: mrgsrc: ["wave1.sav", "wave2.sav", "wave3.sav"]
|
|
205
|
+
|
|
206
|
+
ul.write_sav(df, meta, "merged_output.sav")
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Advanced Merging
|
|
210
|
+
|
|
211
|
+
```python
|
|
212
|
+
import ultrasav as ul
|
|
213
|
+
|
|
214
|
+
# Use specific metadata template for all files
|
|
215
|
+
standard_meta = ul.Metadata() # Create an empty meta object
|
|
216
|
+
standard_meta.column_labels = {"Q1": "Satisfaction", "Q2": "Loyalty"}
|
|
217
|
+
standard_meta.variable_value_labels = {
|
|
218
|
+
"Satisfaction": {1: "Very Dissatisfied", 5: "Very Satisfied"}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
data, meta = ul.add_cases(
|
|
222
|
+
inputs=["file1.sav", "file2.sav", "file3.csv"],
|
|
223
|
+
meta=[standard_meta], # Apply this metadata to merged data
|
|
224
|
+
source_col="mrgsrc", # Auto append column 'mrgsrc' to track source files
|
|
225
|
+
output_format="polars" # Explicit format (polars is default)
|
|
226
|
+
)
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
### Writing Back
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
# Read SPSS file
|
|
233
|
+
df, meta = ul.read_sav("huge_survey.sav")
|
|
234
|
+
|
|
235
|
+
# All ultrasav operations work the same
|
|
236
|
+
df = ul.Data(df).rename({"Q1": "satisfaction"}).drop(["unused_var"]).to_native()
|
|
237
|
+
|
|
238
|
+
# Efficient write-back
|
|
239
|
+
# Simply provide the 'meta' object; labels and formats are applied automatically.
|
|
240
|
+
# Compatible with both ultrasav and pyreadstat meta objects.
|
|
241
|
+
ul.write_sav(df, meta, "processed_data.sav")
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
## 🦸 Metaman: The Metadata Submodule
|
|
245
|
+
|
|
246
|
+
ultrasav includes **metaman**, a powerful submodule for metadata inspection, extraction, and reporting. All metaman functions are accessible directly from the top-level `ul` namespace.
|
|
247
|
+
|
|
248
|
+
### Generate Validation Datamaps
|
|
249
|
+
|
|
250
|
+
Create comprehensive datamaps showing variable types, value distributions, and data quality metrics:
|
|
251
|
+
|
|
252
|
+
```python
|
|
253
|
+
import ultrasav as ul
|
|
254
|
+
|
|
255
|
+
df, meta = ul.read_sav("survey.sav")
|
|
256
|
+
|
|
257
|
+
# Create a validation datamap
|
|
258
|
+
datamap = ul.make_datamap(df, meta)
|
|
259
|
+
|
|
260
|
+
# Export to beautifully formatted Excel
|
|
261
|
+
# This function supports polars only at the moment
|
|
262
|
+
ul.map_to_excel(datamap, "validation_report.xlsx")
|
|
263
|
+
|
|
264
|
+
# Use custom color schemes
|
|
265
|
+
ul.map_to_excel(
|
|
266
|
+
datamap,
|
|
267
|
+
"validation_report.xlsx",
|
|
268
|
+
alternating_group_formats=ul.get_color_scheme("pastel_blue")
|
|
269
|
+
)
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
The datamap includes:
|
|
273
|
+
- Variable names and labels
|
|
274
|
+
- Variable types (single-select, multi-select, numeric, text, date)
|
|
275
|
+
- Value codes and labels
|
|
276
|
+
- Value counts and percentages
|
|
277
|
+
- Missing data flags
|
|
278
|
+
- Missing value label detection
|
|
279
|
+
|
|
280
|
+
**Note: Variable types are inferred from both SPSS data and metadata on a best-effort basis and may not always perfectly reflect the true underlying types.**
|
|
281
|
+
|
|
282
|
+
### Extract Metadata to Python Files
|
|
283
|
+
|
|
284
|
+
Save existing metadata (if any) from a sav file as importable Python dictionaries for reuse across projects:
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
import ultrasav as ul
|
|
288
|
+
|
|
289
|
+
df, meta = ul.read_sav("survey.sav")
|
|
290
|
+
|
|
291
|
+
# Extract metadata (labels) to in-memory python object
|
|
292
|
+
meta_dict = ul.get_meta(meta)
|
|
293
|
+
|
|
294
|
+
# Extract and save ALL metadata to a Python file
|
|
295
|
+
meta_dict = ul.get_meta(meta, include_all=True, output_path="survey_labels.py")
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
### Create Labels from Excel Templates
|
|
299
|
+
|
|
300
|
+
Build label dictionaries from scratch using Excel templates - perfect for translating surveys or standardizing labels:
|
|
301
|
+
|
|
302
|
+
```python
|
|
303
|
+
import ultrasav as ul
|
|
304
|
+
|
|
305
|
+
# Excel file with 'col_label' and 'value_label' sheets
|
|
306
|
+
col_labels, val_labels = ul.make_labels(
|
|
307
|
+
input_path="label_template.xlsx",
|
|
308
|
+
output_path="translated_labels.py" # optional
|
|
309
|
+
)
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
**Excel Structure:**
|
|
313
|
+
|
|
314
|
+
Your Excel file should have two sheets:
|
|
315
|
+
|
|
316
|
+
1. **Column Labels Sheet** (default sheet name: "col_label"):
|
|
317
|
+
| variable | label |
|
|
318
|
+
|----------|-------|
|
|
319
|
+
| age | Age of respondent |
|
|
320
|
+
| gender | Gender |
|
|
321
|
+
| income | Annual household income |
|
|
322
|
+
|
|
323
|
+
2. **Value Labels Sheet** (default sheet name: "value_label"):
|
|
324
|
+
| variable | value | label |
|
|
325
|
+
|----------|-------|-------|
|
|
326
|
+
| gender | 1 | Male |
|
|
327
|
+
| gender | 2 | Female |
|
|
328
|
+
| income | 1 | Under $25k |
|
|
329
|
+
| income | 2 | $25k-50k |
|
|
330
|
+
|
|
331
|
+
## 📖 API Reference
|
|
332
|
+
|
|
333
|
+
### Core Functions
|
|
334
|
+
|
|
335
|
+
#### `read_sav(filepath, output_format="polars")`
|
|
336
|
+
Read SPSS file and return separated data and metadata.
|
|
337
|
+
This is a wrapper around pyreadstat.read_sav with some additional encoding handling
|
|
338
|
+
|
|
339
|
+
```python
|
|
340
|
+
df, meta = ul.read_sav("survey.sav")
|
|
341
|
+
```
|
|
342
|
+
|
|
343
|
+
#### `write_sav(data, meta, filepath)`
|
|
344
|
+
Write data and metadata to SPSS file.
|
|
345
|
+
|
|
346
|
+
```python
|
|
347
|
+
ul.write_sav(df, meta, "processed_data.sav")
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
#### `add_cases(inputs, meta=None, source_col="mrgsrc")`
|
|
351
|
+
Merge multiple files/dataframes vertically with metadata handling, return merged data and metadata.
|
|
352
|
+
|
|
353
|
+
```python
|
|
354
|
+
df_merged, meta_merged = ul.add_cases(["wave1.sav","wave2.sav", "wave3.sav"])
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
### Classes
|
|
358
|
+
|
|
359
|
+
#### `Data`
|
|
360
|
+
Handles all dataframe operations while maintaining compatibility with both Polars and pandas.
|
|
361
|
+
|
|
362
|
+
```python
|
|
363
|
+
import ultrasav as ul
|
|
364
|
+
|
|
365
|
+
df, meta = ul.read_sav("survey.sav") # Returns a Polars DataFrame and meta object
|
|
366
|
+
|
|
367
|
+
# Convert polars or pandas df into our ul.Data() class
|
|
368
|
+
data = ul.Data(df)
|
|
369
|
+
|
|
370
|
+
# Data Class Methods
|
|
371
|
+
# move - to relocate columns
|
|
372
|
+
data = data.move(
|
|
373
|
+
first=['respondent_id'],
|
|
374
|
+
last=['timestamp'],
|
|
375
|
+
before={'age': 'gender'}, # place 'age' column before 'gender'
|
|
376
|
+
after={'wave': ['age', 'gender', 'income']} # place demographic columns after 'wave'
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
# rename - to rename columns
|
|
380
|
+
data = data.rename({"old": "new"})
|
|
381
|
+
|
|
382
|
+
# replace - to replace/recode values
|
|
383
|
+
data = data.replace({"col": {1: 100}})
|
|
384
|
+
|
|
385
|
+
# select - to select columns
|
|
386
|
+
data = data.select(['age', 'gender'])
|
|
387
|
+
|
|
388
|
+
# drop - to drop columns
|
|
389
|
+
data = data.drop(['id', 'language'])
|
|
390
|
+
|
|
391
|
+
# to_native - to return ul.Data(df) back to its native dataframe
|
|
392
|
+
df = data.to_native() # Get back Polars/pandas DataFrame
|
|
393
|
+
|
|
394
|
+
# Optionally, use chaining for cleaner code
|
|
395
|
+
df = (
|
|
396
|
+
ul.Data(df)
|
|
397
|
+
.move(first=['respondent_id'])
|
|
398
|
+
.rename({"old": "new"})
|
|
399
|
+
.replace({"col": {1: 100}})
|
|
400
|
+
.select(['age', 'gender'])
|
|
401
|
+
.drop(['id', 'language'])
|
|
402
|
+
.to_native()
|
|
403
|
+
)
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
#### `Metadata`
|
|
407
|
+
Manages all SPSS metadata independently from data.
|
|
408
|
+
|
|
409
|
+
```python
|
|
410
|
+
import ultrasav as ul
|
|
411
|
+
|
|
412
|
+
df, meta = ul.read_sav("survey.sav")
|
|
413
|
+
|
|
414
|
+
meta = ul.Metadata(meta)
|
|
415
|
+
|
|
416
|
+
# All updatable metadata
|
|
417
|
+
|
|
418
|
+
meta.column_labels = {"Q1": "Question 1"}
|
|
419
|
+
meta.variable_value_labels = {"Q1": {1: "Yes", 0: "No"}}
|
|
420
|
+
meta.variable_measure = {"age": "scale"}
|
|
421
|
+
meta.variable_format = {"age": "F3.0", "city_name": "A50"}
|
|
422
|
+
meta.variable_display_width = {"city_name": 50,}
|
|
423
|
+
meta.missing_ranges = {"Q1": [99], "Q2": [{"lo":998,"hi":999}]}
|
|
424
|
+
meta.notes = "Created on 2025-02-15"
|
|
425
|
+
meta.file_label = "My Survey 2025"
|
|
426
|
+
|
|
427
|
+
# Optionally, use '.update()' to update everything at once
|
|
428
|
+
meta = meta.update(
|
|
429
|
+
column_labels = {"Q1": "Question 1"},
|
|
430
|
+
variable_value_labels = {"Q1": {1: "Yes", 0: "No"}},
|
|
431
|
+
variable_measure = {"age": "scale"},
|
|
432
|
+
variable_format = {"age": "F3.0", "city_name": "A50"},
|
|
433
|
+
...
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
# You can update any writable metadata fields supported by pyreadstat.
|
|
437
|
+
```
|
|
438
|
+
**Metadata Updating Logic**
|
|
439
|
+
- Original metadata is preserved and never destroyed
|
|
440
|
+
- User updates overlay on top of originals
|
|
441
|
+
- When you set `meta.column_labels = {"Q1": "New Label"}`:
|
|
442
|
+
- This updates Q1's column label if there is an existing column label within the original meta.column_labels
|
|
443
|
+
- If Q1 is not in the original metadata, then Q1's new label will simply be appended at the bottom of the meta.column_labels dict
|
|
444
|
+
- All other column labels remain unchanged
|
|
445
|
+
- Original metadata still exists underneath
|
|
446
|
+
- This update logic applies to all updatable metadata
|
|
447
|
+
|
|
448
|
+
**Note on `variable_value_labels` Update Behavior:**
|
|
449
|
+
|
|
450
|
+
When updating `meta.variable_value_labels`, the entire value-label dictionary for a variable is **replaced**, not merged.
|
|
451
|
+
|
|
452
|
+
```python
|
|
453
|
+
# Original metadata
|
|
454
|
+
meta.variable_value_labels = {"Q1": {1: "Yes", 2: "No", 99: "Unsure"}}
|
|
455
|
+
|
|
456
|
+
# User update
|
|
457
|
+
meta.variable_value_labels = {"Q1": {1: "Yes", 0: "No"}}
|
|
458
|
+
|
|
459
|
+
# Result for Q1 becomes:
|
|
460
|
+
{"Q1": {1: "Yes", 0: "No"}} # Previous values 2 and 99 are NOT preserved
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
This means:
|
|
464
|
+
- Only the value-label pairs explicitly provided in the update are kept
|
|
465
|
+
- The entire dictionary for that variable is replaced at once
|
|
466
|
+
- Variable-level entries are preserved (e.g., "Q1" still exists), but value-level merging does not occur
|
|
467
|
+
|
|
468
|
+
This follows ultrasav's design principle: metadata updates overlay at the variable level — never partially merged — ensuring clean and intentional metadata after each update.
|
|
469
|
+
|
|
470
|
+
**Critical Design Choice:**
|
|
471
|
+
- When you rename an existing column "Q1" to "Q1a" in data, the associated metadata does not automatically carry over
|
|
472
|
+
- You must explicitly provide new metadata for the newly renamed column "Q1a"
|
|
473
|
+
- No automatic tracking or mapping between old and new names
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
### 🦸 Metaman Functions
|
|
477
|
+
|
|
478
|
+
#### `make_datamap(df, meta, output_format=None)`
|
|
479
|
+
Create a validation datamap from data and metadata.
|
|
480
|
+
|
|
481
|
+
```python
|
|
482
|
+
datamap = ul.make_datamap(df, meta)
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
#### `map_to_excel(df, file_path, **kwargs)`
|
|
486
|
+
Export datamap to formatted Excel with merged cells and alternating colors.
|
|
487
|
+
|
|
488
|
+
```python
|
|
489
|
+
ul.map_to_excel(datamap, "report.xlsx") # Saves datamap to Excel
|
|
490
|
+
ul.map_to_excel(datamap, "report.xlsx", alternating_group_formats=ul.get_color_scheme("pastel_blue"))
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
#### `get_meta(meta, output_path=None, include_all=False)`
|
|
494
|
+
Extract metadata to a Python file or dictionary.
|
|
495
|
+
|
|
496
|
+
```python
|
|
497
|
+
meta_dict = ul.get_meta(meta) # Returns meta_dict in memory
|
|
498
|
+
ul.get_meta(meta, output_path="labels.py") # Saves to file
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
#### `make_labels(input_path, output_path=None)`
|
|
502
|
+
Create label dictionaries from an Excel template.
|
|
503
|
+
|
|
504
|
+
```python
|
|
505
|
+
col_labels, val_labels = ul.make_labels("template.xlsx") # Returns label dicts in memory
|
|
506
|
+
col_labels, val_labels = ul.make_labels("template.xlsx", "labels.py") # Saves to file
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
#### `detect_variable_type(df, meta, column)`
|
|
510
|
+
Detect variable type (single-select, multi-select, numeric, text, date).
|
|
511
|
+
|
|
512
|
+
```python
|
|
513
|
+
var_type = ul.detect_variable_type(df, meta, "Q1")
|
|
514
|
+
```
|
|
515
|
+
|
|
516
|
+
#### `get_color_scheme(name)`
|
|
517
|
+
Get a color scheme for Excel formatting.
|
|
518
|
+
|
|
519
|
+
```python
|
|
520
|
+
scheme = ul.get_color_scheme("pastel_blue")
|
|
521
|
+
# Options: "classic_grey", "pastel_green", "pastel_blue", "pastel_purple", "pastel_indigo"
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
## ⚡ Why "ultrasav"?
|
|
525
|
+
|
|
526
|
+
The name combines "Ultra" (inspired by Ultraman) with "SAV" (SPSS file format), representing the ultra-powerful transformation capabilities of this package. Just like Ultraman's Specium Ray, ultrasav splits and recombines data with precision and power!
|
|
527
|
+
|
|
528
|
+
And **metaman**? He's the metadata superhero who swoops in to inspect, validate, and report on your SPSS data! 🦸
|
|
529
|
+
|
|
530
|
+
## 🤝 Contributing
|
|
531
|
+
|
|
532
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
533
|
+
|
|
534
|
+
## 📄 License
|
|
535
|
+
|
|
536
|
+
MIT License - see LICENSE file for details.
|
|
537
|
+
|
|
538
|
+
## 🙏 Acknowledgments
|
|
539
|
+
|
|
540
|
+
- Built on top of [pyreadstat](https://github.com/Roche/pyreadstat) for SPSS file handling
|
|
541
|
+
- Uses [narwhals](https://github.com/MarcoGorelli/narwhals) for dataframe compatibility
|
|
542
|
+
- Excel export powered by [xlsxwriter](https://github.com/jmcnamara/XlsxWriter)
|
|
543
|
+
|
|
544
|
+
## 📬 Contact
|
|
545
|
+
|
|
546
|
+
- Author: Albert Li
|
|
547
|
+
|
|
548
|
+
## 📄 Version History
|
|
549
|
+
|
|
550
|
+
- **0.1.0**: Initial release with two-track architecture for data/metadata separation and metaman submodule for metadata inspection & reporting
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
ultrasav/__init__.py,sha256=nMMtutS1OU08HCGw_ITnzdx3bdeEdPk_HEuyMaYP9F8,8903
|
|
2
|
+
ultrasav/_add_cases.py,sha256=htbfoty9O4sm3UGmipnPOF02KdqRuM6VYPXbpSOFosE,10147
|
|
3
|
+
ultrasav/_data.py,sha256=MsGr8y9bXgbjMQ8VJXmpFZdwd7Qv_r9g9LxrIDiNVHU,19144
|
|
4
|
+
ultrasav/_make_dummy.py,sha256=nGT7sHRLVYvpi1yyEHYKuADmvdkObRec-1TlNSvWBus,4923
|
|
5
|
+
ultrasav/_merge_data.py,sha256=lrHh64z7PSgf7ClgSmu5V43UaO3LCUi7wvQUPOH94XU,17065
|
|
6
|
+
ultrasav/_merge_meta.py,sha256=YSQSUpinHWN4MnIyIjyuojEcmWgcBBZKLjTPn9SxcIk,10203
|
|
7
|
+
ultrasav/_metadata.py,sha256=amxhgpgE-xEd-qb3coZqDroG6bpkMSA5txOuk8V8VV8,22784
|
|
8
|
+
ultrasav/_read_files.py,sha256=SUkbbebc2QSJaDWvJqPMUzX7DjncIftRarDpj_adpeo,21139
|
|
9
|
+
ultrasav/_write_files.py,sha256=9Q5c4TqJaK6txq215dvFxPTm-HxW8Gi9HB7acBlhoGw,3884
|
|
10
|
+
ultrasav/metaman/__init__.py,sha256=viEySNj_kerwal5B9mZ-TGK_ofH8h7P8WXljwFQ3Ihk,2291
|
|
11
|
+
ultrasav/metaman/def_detect_variable_type.py,sha256=DVutyDhnOl5grIqHf1ICI_3bRZ7pMST_nF7gLGfViKg,17285
|
|
12
|
+
ultrasav/metaman/def_get_meta.py,sha256=Bnw1TbkIbwK2BdFyFw2IrPtV1HpXU2IB8IeAJFT6y7w,18315
|
|
13
|
+
ultrasav/metaman/def_make_datamap.py,sha256=18YOU6A8E90Jgx-nY9IwP6az0VppLIGCMEObuuTxF_Q,4731
|
|
14
|
+
ultrasav/metaman/def_make_labels.py,sha256=Ykk88A6zzzez5jxyxdgIhO9RWSJqnSb9hEl73fleKo4,26655
|
|
15
|
+
ultrasav/metaman/def_map_engine.py,sha256=BN78OoUL7Ba9qfJTCKGjFXRe0nJwy0PwvWBVZ1KIlW8,19525
|
|
16
|
+
ultrasav/metaman/def_map_to_excel.py,sha256=zWebKz7rBtKOrcPt_xvHUNzdvPFqGZqqgGY8Jh9Z8og,10792
|
|
17
|
+
ultrasav/metaman/def_write_excel_engine.py,sha256=B62IEVfl1FIrzUI-g3Rs-NgYj9gq0pTUzPYsL1Sfznc,12904
|
|
18
|
+
ultrasav/metaman/pastel_color_schemes.py,sha256=iVpp9hhEc8Dj7hYOWuVWQpTSTobp6zr82s8KiGFUTf8,4768
|
|
19
|
+
ultrasav-0.1.4.dist-info/WHEEL,sha256=z-mOpxbJHqy3cq6SvUThBZdaLGFZzdZPtgWLcP2NKjQ,79
|
|
20
|
+
ultrasav-0.1.4.dist-info/METADATA,sha256=wjBWebekvE25mcenvCWv50DpWRbj9rlTV8yAUeV8UZI,18904
|
|
21
|
+
ultrasav-0.1.4.dist-info/RECORD,,
|