ultrasav 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,558 @@
1
+ """
2
+ ultrasav: read functions
3
+ v_0.1.0
4
+ """
5
+
6
+
7
+ import os
8
+ from collections.abc import Sequence
9
+ import logging
10
+ from typing import Any
11
+ import narwhals as nw
12
+ import pyreadstat
13
+
14
+ # Shared encoding list
15
+ COMMON_ENCODINGS = [
16
+ None, # Default
17
+ "utf-8",
18
+ "utf-8-sig",
19
+ "latin1",
20
+ "cp1252", # Windows Western European
21
+ "iso-8859-1",
22
+ "cp1251", # Windows Cyrillic
23
+ "cp1250", # Windows Central European
24
+ "gbk", # Chinese Simplified
25
+ "big5", # Chinese Traditional
26
+ "shift_jis", # Japanese
27
+ "euc-kr", # Korean
28
+ ]
29
+
30
+ def read_sav(
31
+ file_path: str | os.PathLike,
32
+ output_format: str = "polars",
33
+ encoding: str | None = None,
34
+ apply_value_formats: bool = False,
35
+ formats_as_category: bool = True,
36
+ formats_as_ordered_category: bool = False,
37
+ auto_detect_encoding: bool = True,
38
+ **kwargs
39
+ ) -> tuple[Any, Any]:
40
+ """
41
+ Read SPSS SAV/ZSAV files with automatic encoding detection.
42
+
43
+ Parameters
44
+ ----------
45
+ file_path : str or Path
46
+ Path to the SAV/ZSAV file
47
+ output_format : str, default "polars"
48
+ Output format: "pandas", "polars", "narwhals", or "dict"
49
+ - "pandas": returns pandas DataFrame
50
+ - "polars": returns polars DataFrame
51
+ - "narwhals": returns narwhals DataFrame (converted from polars)
52
+ - "dict": returns dictionary
53
+ encoding : str, optional
54
+ File encoding. If None and auto_detect_encoding=True, will try multiple encodings
55
+ auto_detect_encoding : bool, default True
56
+ If True and encoding is None, automatically tries multiple encodings
57
+ apply_value_formats : bool, default False
58
+ Apply value labels to the data
59
+ formats_as_category : bool, default True
60
+ Convert formatted variables to categories
61
+ formats_as_ordered_category : bool, default False
62
+ Convert formatted variables to ordered categories
63
+ **kwargs : additional arguments passed to pyreadstat.read_sav()
64
+
65
+ Returns
66
+ -------
67
+ df : DataFrame or dict
68
+ Data in the specified output format
69
+ meta : metadata object
70
+ Metadata from the SPSS file
71
+
72
+ Examples
73
+ --------
74
+ >>> df, meta = read_sav("survey.sav") # Returns polars DataFrame by default
75
+ >>> df, meta = read_sav("survey.sav", output_format="pandas") # Returns pandas DataFrame
76
+ >>> df, meta = read_sav("survey.sav", output_format="narwhals") # Returns narwhals DataFrame
77
+ """
78
+ if output_format not in ["pandas", "polars", "narwhals", "dict"]:
79
+ raise ValueError(f"output_format must be 'pandas', 'polars', 'narwhals', or 'dict', got {output_format}")
80
+
81
+ # For narwhals output, read as polars first
82
+ read_format = "polars" if output_format == "narwhals" else output_format
83
+
84
+ read_kwargs = {
85
+ "apply_value_formats": apply_value_formats,
86
+ "formats_as_category": formats_as_category,
87
+ "formats_as_ordered_category": formats_as_ordered_category,
88
+ "output_format": read_format,
89
+ **kwargs
90
+ }
91
+
92
+ if encoding is not None or not auto_detect_encoding:
93
+ if encoding:
94
+ read_kwargs["encoding"] = encoding
95
+ df, meta = pyreadstat.read_sav(file_path, **read_kwargs)
96
+ if output_format == "narwhals":
97
+ df = nw.from_native(df)
98
+ return df, meta
99
+
100
+ # Auto-detect encoding
101
+ last_error = None
102
+ for enc in COMMON_ENCODINGS:
103
+ try:
104
+ if enc is not None:
105
+ read_kwargs["encoding"] = enc
106
+ elif "encoding" in read_kwargs:
107
+ del read_kwargs["encoding"]
108
+
109
+ df, meta = pyreadstat.read_sav(file_path, **read_kwargs)
110
+
111
+ enc_name = enc or "default"
112
+ logging.info(f"Successfully read SAV file with encoding: {enc_name}")
113
+
114
+ if output_format == "narwhals":
115
+ df = nw.from_native(df)
116
+ return df, meta
117
+
118
+ except (UnicodeDecodeError, UnicodeError) as e:
119
+ last_error = e
120
+ continue
121
+ except Exception as e:
122
+ if any(term in str(e).lower() for term in ["decode", "encode", "codec", "utf", "unicode"]):
123
+ last_error = e
124
+ continue
125
+ raise e
126
+
127
+ raise ValueError(
128
+ f"Failed to read SAV file '{file_path}' with any attempted encoding.\n"
129
+ f"Last error: {last_error}"
130
+ )
131
+
132
+
133
+ # Helper: non-string sequence check
134
+ def _is_non_str_seq(obj: Any) -> bool:
135
+ return isinstance(obj, Sequence) and not isinstance(obj, (str, bytes))
136
+
137
+ # Helper: choose a pandas fallback engine from file extension
138
+ def _pandas_engine_from_ext(ext: str) -> str | None:
139
+ # Best-effort guesser. Users can always override via `engine=...`.
140
+ if ext in {".xlsx", ".xlsm", ".xltx", ".xltm"}:
141
+ return "openpyxl"
142
+ if ext == ".xls":
143
+ return "xlrd"
144
+ if ext == ".xlsb":
145
+ return "pyxlsb"
146
+ if ext == ".ods":
147
+ return "odf"
148
+ return None
149
+
150
+ def read_excel(
151
+ file_path: str | os.PathLike,
152
+ output_format: str = "polars",
153
+ sheet_name: str | int | Sequence[str | int] | None = 0,
154
+ engine: str | None = None,
155
+ **kwargs
156
+ ) -> Any:
157
+ """
158
+ Read Excel files into the requested DataFrame format.
159
+
160
+ Parameters
161
+ ----------
162
+ file_path : str or Path
163
+ Path to the Excel file.
164
+ output_format : {"pandas","polars","narwhals"}, default "polars"
165
+ - "pandas": returns pandas DataFrame or dict[str, DataFrame]
166
+ - "polars": returns polars DataFrame or dict[str, DataFrame]
167
+ - "narwhals": reads via Polars with the same defaults, then converts
168
+ sheet_name : str | int | list[str|int] | None, default 0
169
+ Sheet selector(s).
170
+ - str or list[str]: sheet names.
171
+ - int or list[int]: **0-based indices** (we map to Polars's 1-based sheet_id).
172
+ - None: **read all sheets** (dict is returned).
173
+ engine : str | None, default None
174
+ Backend engine to use.
175
+ - Polars: defaults to "calamine" with "openpyxl" fallback. Options: "calamine", "openpyxl", "xlsx2csv"
176
+ - Pandas: auto-detects based on file extension. Options: "openpyxl" (.xlsx), "xlrd" (.xls), "pyxlsb" (.xlsb), "odf" (.ods)
177
+ If the specified engine fails, we try sensible fallbacks.
178
+
179
+ **kwargs :
180
+ Additional keyword args forwarded to the underlying read function.
181
+ Note that available parameters differ between pandas and polars:
182
+ - pandas supports: header, nrows, usecols, dtype, etc.
183
+ - polars supports: has_header, columns, schema_overrides, etc.
184
+ Excel readers do not take an 'encoding' parameter.
185
+
186
+ Returns
187
+ -------
188
+ DataFrame or dict[str, DataFrame]
189
+ Depending on `output_format` and whether multiple sheets were requested.
190
+
191
+ Notes
192
+ -----
193
+ - Integer sheet indices are **0-based** in this API, even though Polars uses 1-based
194
+ `sheet_id`. We adjust automatically.
195
+ - Passing a mixed list of integers and strings for `sheet_name` is not supported and
196
+ will raise a ValueError.
197
+ """
198
+ if output_format not in {"pandas", "polars", "narwhals"}:
199
+ raise ValueError("output_format must be 'pandas', 'polars', or 'narwhals'.")
200
+
201
+ # Determine how to read (narwhals reads via Polars first)
202
+ backend = "polars" if output_format == "narwhals" else output_format
203
+
204
+ # Normalize sheet selection
205
+ sheet_is_seq = _is_non_str_seq(sheet_name)
206
+ if sheet_is_seq:
207
+ items = list(sheet_name) # type: ignore[arg-type]
208
+ else:
209
+ items = [] if sheet_name is None else [sheet_name] # None handled below
210
+
211
+ # Check for mixed types in sheet_name
212
+ if sheet_is_seq and items:
213
+ first_type = type(items[0])
214
+ if not all(isinstance(item, first_type) for item in items):
215
+ raise ValueError(
216
+ f"Mixed sheet types are not supported. Got: {[type(item).__name__ for item in items]}"
217
+ )
218
+
219
+ # Read based on backend
220
+ file_str = str(file_path)
221
+
222
+ if backend == "polars":
223
+ import polars as pl
224
+
225
+ # Polars-specific kwargs
226
+ polars_kwargs = dict(kwargs)
227
+
228
+ # Note: Polars doesn't have nrows parameter, it uses other mechanisms
229
+ # Remove nrows if present since polars doesn't support it
230
+ if "nrows" in polars_kwargs:
231
+ polars_kwargs.pop("nrows")
232
+ # Could warn user that nrows is not supported in polars
233
+
234
+ # Map sheet selection
235
+ if sheet_name is None:
236
+ # Read all sheets
237
+ polars_kwargs["sheet_name"] = None
238
+ elif sheet_is_seq:
239
+ # Multiple sheets
240
+ if all(isinstance(item, int) for item in items):
241
+ # 0-based to 1-based conversion for sheet indices
242
+ polars_kwargs["sheet_id"] = [i + 1 for i in items]
243
+ else:
244
+ # Sheet names
245
+ polars_kwargs["sheet_name"] = items
246
+ else:
247
+ # Single sheet
248
+ if isinstance(sheet_name, int):
249
+ polars_kwargs["sheet_id"] = sheet_name + 1
250
+ else:
251
+ polars_kwargs["sheet_name"] = sheet_name
252
+
253
+ # Engine handling for Polars
254
+ # Default to calamine if no engine specified
255
+ if engine is None:
256
+ engine = "calamine"
257
+ else:
258
+ # Validate user-provided engine
259
+ valid_polars_engines = {"calamine", "openpyxl", "xlsx2csv"}
260
+ if engine not in valid_polars_engines:
261
+ import warnings
262
+ warnings.warn(
263
+ f"Invalid engine '{engine}' for polars. "
264
+ f"Valid options are: {valid_polars_engines}. "
265
+ f"Defaulting to 'calamine'.",
266
+ UserWarning
267
+ )
268
+ engine = "calamine"
269
+
270
+ polars_kwargs["engine"] = engine
271
+
272
+ try:
273
+ result = pl.read_excel(file_str, **polars_kwargs)
274
+ except Exception as e:
275
+ # If calamine fails, try openpyxl as fallback
276
+ if engine == "calamine" and "calamine" in str(e).lower():
277
+ logging.info("Calamine engine failed, falling back to openpyxl")
278
+ polars_kwargs["engine"] = "openpyxl"
279
+ try:
280
+ result = pl.read_excel(file_str, **polars_kwargs)
281
+ except Exception as e2:
282
+ # If openpyxl also fails, try xlsx2csv as last resort
283
+ logging.info("Openpyxl engine failed, falling back to xlsx2csv")
284
+ polars_kwargs["engine"] = "xlsx2csv"
285
+ result = pl.read_excel(file_str, **polars_kwargs)
286
+ else:
287
+ raise
288
+
289
+ else: # backend == "pandas"
290
+ import pandas as pd
291
+
292
+ # Pandas-specific kwargs
293
+ pandas_kwargs = dict(kwargs)
294
+
295
+ # Sheet selection
296
+ if sheet_name is None:
297
+ pandas_kwargs["sheet_name"] = None
298
+ elif sheet_is_seq:
299
+ # Convert 0-based indices to sheet names if possible
300
+ if all(isinstance(item, int) for item in items):
301
+ # Keep as 0-based for pandas
302
+ pandas_kwargs["sheet_name"] = items
303
+ else:
304
+ pandas_kwargs["sheet_name"] = items
305
+ else:
306
+ pandas_kwargs["sheet_name"] = sheet_name
307
+
308
+ # Engine selection for pandas
309
+ if engine:
310
+ pandas_kwargs["engine"] = engine
311
+ else:
312
+ # Auto-detect engine based on file extension
313
+ ext = os.path.splitext(file_str)[1].lower()
314
+ pandas_engine = _pandas_engine_from_ext(ext)
315
+ if pandas_engine:
316
+ pandas_kwargs["engine"] = pandas_engine
317
+
318
+ try:
319
+ result = pd.read_excel(file_str, **pandas_kwargs)
320
+ except Exception as e:
321
+ # Fallback for engine issues
322
+ if "engine" in pandas_kwargs and "engine" in str(e).lower():
323
+ ext = os.path.splitext(file_str)[1].lower()
324
+ fallback_engine = _pandas_engine_from_ext(ext)
325
+ if fallback_engine and fallback_engine != pandas_kwargs.get("engine"):
326
+ pandas_kwargs["engine"] = fallback_engine
327
+ result = pd.read_excel(file_str, **pandas_kwargs)
328
+ else:
329
+ raise
330
+ else:
331
+ raise
332
+
333
+ # Convert to narwhals if requested
334
+ if output_format == "narwhals":
335
+ if isinstance(result, dict):
336
+ return {name: nw.from_native(df) for name, df in result.items()}
337
+ else:
338
+ return nw.from_native(result)
339
+
340
+ return result
341
+
342
+ def _is_encoding_error(e: Exception) -> bool:
343
+ """Check if an exception is encoding-related."""
344
+ error_str = str(e).lower()
345
+ error_type = type(e).__name__.lower()
346
+
347
+ encoding_indicators = [
348
+ "encode", "decode", "codec", "utf", "unicode",
349
+ "charmap", "ascii", "latin", "gbk", "big5"
350
+ ]
351
+
352
+ return any(ind in error_str or ind in error_type for ind in encoding_indicators)
353
+
354
+ def _normalize_sep_kwargs(kwargs: dict[str, Any], backend: str) -> dict[str, Any]:
355
+ """
356
+ Normalize delimiter-related kwargs for different backends.
357
+ - pandas: uses `sep`
358
+ - polars: uses `separator`
359
+ """
360
+ out = dict(kwargs)
361
+
362
+ if backend == "polars":
363
+ # Prefer explicit 'separator' if provided, else map common pandas spellings
364
+ if "separator" not in out:
365
+ if "sep" in out:
366
+ out["separator"] = out.pop("sep")
367
+ elif "delimiter" in out:
368
+ out["separator"] = out.pop("delimiter")
369
+ else: # backend == "pandas"
370
+ # Prefer explicit 'sep' if provided, else map 'separator'/'delimiter'
371
+ if "sep" not in out:
372
+ if "separator" in out:
373
+ out["sep"] = out.pop("separator")
374
+ elif "delimiter" in out:
375
+ out["sep"] = out.pop("delimiter")
376
+
377
+ return out
378
+
379
+ def _pick_backend_for_narwhals_preference(prefer: str = "polars") -> str:
380
+ """
381
+ Choose a backend module name for Narwhals ('polars' or 'pandas').
382
+ If the preferred backend isn't importable, fall back to the other.
383
+ """
384
+ if prefer == "polars":
385
+ try:
386
+ import polars # noqa: F401
387
+ return "polars"
388
+ except Exception:
389
+ pass
390
+ try:
391
+ import pandas # noqa: F401
392
+ return "pandas"
393
+ except Exception:
394
+ raise RuntimeError(
395
+ "Neither Polars nor pandas is installed. Please install one of them."
396
+ )
397
+ else: # prefer pandas
398
+ try:
399
+ import pandas # noqa: F401
400
+ return "pandas"
401
+ except Exception:
402
+ pass
403
+ try:
404
+ import polars # noqa: F401
405
+ return "polars"
406
+ except Exception:
407
+ raise RuntimeError(
408
+ "Neither pandas nor Polars is installed. Please install one of them."
409
+ )
410
+
411
+ def read_csv(
412
+ file_path: str | os.PathLike,
413
+ output_format: str = "polars",
414
+ encoding: str | None = None,
415
+ auto_detect_encoding: bool = True,
416
+ **kwargs
417
+ ) -> Any:
418
+ """
419
+ Read CSV files into the requested DataFrame format using Narwhals under the hood.
420
+
421
+ Parameters
422
+ ----------
423
+ file_path : str or Path
424
+ Path to the CSV file.
425
+ output_format : {"polars","pandas","narwhals"}, default "polars"
426
+ - "polars": returns a polars.DataFrame
427
+ - "pandas": returns a pandas.DataFrame
428
+ - "narwhals": returns a narwhals.DataFrame (backed by a native df)
429
+ encoding : str, optional
430
+ Encoding hint passed to the native reader. If None and
431
+ auto_detect_encoding=True, we'll try several encodings.
432
+ auto_detect_encoding : bool, default True
433
+ If True and `encoding` is None, attempt multiple encodings.
434
+ **kwargs :
435
+ Extra keywords forwarded to the native CSV reader (through Narwhals).
436
+ These may be backend-specific (e.g., pandas uses `sep`, Polars uses
437
+ `separator`). We normalize only delimiter args (`sep`/`delimiter`↔`separator`)
438
+ to reduce friction.
439
+
440
+ Returns
441
+ -------
442
+ DataFrame
443
+ pandas.DataFrame, polars.DataFrame, or narwhals.DataFrame according to
444
+ `output_format`.
445
+
446
+ Notes
447
+ -----
448
+ - Narwhals API: `nw.read_csv(source, backend=..., **kwargs)` then
449
+ `nw.to_native(df)` if you want the native object. Kwargs pass through to
450
+ the backend reader.
451
+ - Polars CSV reader is UTF‑8‑first; for non‑UTF encodings, pandas tends to
452
+ be more permissive, so we use a pandas fallback during autodetection.
453
+ """
454
+ if output_format not in {"polars", "pandas", "narwhals"}:
455
+ raise ValueError("output_format must be 'polars', 'pandas', or 'narwhals'.")
456
+
457
+ # Decide the first backend to try based on the requested output.
458
+ if output_format == "polars":
459
+ preferred_backend = "polars"
460
+ elif output_format == "pandas":
461
+ preferred_backend = "pandas"
462
+ else: # "narwhals"
463
+ # Prefer polars for performance; fall back to pandas if not installed
464
+ preferred_backend = _pick_backend_for_narwhals_preference("polars")
465
+
466
+ def _read_once(backend: str, enc: str | None, passthrough_kwargs: dict[str, Any]):
467
+ # Normalize delimiter kwargs for this backend
468
+ k = _normalize_sep_kwargs(passthrough_kwargs, backend=backend)
469
+ if enc:
470
+ k = {**k, "encoding": enc}
471
+ # Use Narwhals to create a Narwhals DataFrame backed by the chosen backend
472
+ return nw.read_csv(str(file_path), backend=backend, **k) # narwhals DF
473
+
474
+ last_err: Exception | None = None
475
+
476
+ # Fast path: if encoding is provided or autodetect is off, do a single attempt
477
+ if (encoding is not None) or (not auto_detect_encoding):
478
+ try:
479
+ df_nw = _read_once(preferred_backend, encoding, kwargs)
480
+ except Exception as e:
481
+ # If it's encoding-related and the preferred backend is Polars,
482
+ # try pandas as a one-shot fallback with the same encoding.
483
+ if _is_encoding_error(e) and preferred_backend == "polars":
484
+ try:
485
+ df_nw = _read_once("pandas", encoding, kwargs)
486
+ except Exception as e2:
487
+ raise e2
488
+ else:
489
+ raise
490
+ # Return in the requested format
491
+ if output_format == "narwhals":
492
+ return df_nw
493
+ native = nw.to_native(df_nw) # pandas or polars depending on backend
494
+ if output_format == "polars" and not (type(native).__module__.startswith("polars")):
495
+ # Convert pandas->polars if fallback used
496
+ import polars as pl
497
+ return pl.from_pandas(native)
498
+ return native
499
+
500
+ # Autodetect encoding: try preferred backend with default (utf-8), then iterate COMMON_ENCODINGS
501
+ # Strategy:
502
+ # 1) Try preferred backend with no 'encoding' kwarg (native default).
503
+ # 2) For each enc in COMMON_ENCODINGS (skipping None), try preferred backend.
504
+ # If that fails due to encoding, try pandas (broad support).
505
+ # 3) On success, convert to requested output.
506
+ try:
507
+ df_nw = _read_once(preferred_backend, None, kwargs)
508
+ # Success with default encoding
509
+ if output_format == "narwhals":
510
+ return df_nw
511
+ native = nw.to_native(df_nw)
512
+ if output_format == "polars" and not (type(native).__module__.startswith("polars")):
513
+ import polars as pl
514
+ return pl.from_pandas(native)
515
+ return native
516
+ except Exception as e:
517
+ last_err = e
518
+ if not _is_encoding_error(e):
519
+ # Not encoding-related -> propagate immediately
520
+ raise
521
+
522
+ # Try a list of encodings
523
+ for enc in (e for e in COMMON_ENCODINGS if e is not None):
524
+ try:
525
+ df_nw = _read_once(preferred_backend, enc, kwargs)
526
+ logging.info(f"read_csv succeeded with backend={preferred_backend}, encoding={enc}")
527
+ if output_format == "narwhals":
528
+ return df_nw
529
+ native = nw.to_native(df_nw)
530
+ if output_format == "polars" and not (type(native).__module__.startswith("polars")):
531
+ import polars as pl
532
+ return pl.from_pandas(native)
533
+ return native
534
+ except Exception as e:
535
+ last_err = e
536
+ # If encoding-related and we haven't tried pandas yet (or preferred is polars), try pandas
537
+ if _is_encoding_error(e) and preferred_backend == "polars":
538
+ try:
539
+ df_nw = _read_once("pandas", enc, kwargs)
540
+ logging.info(f"read_csv succeeded with backend=pandas, encoding={enc}")
541
+ if output_format == "narwhals":
542
+ return df_nw
543
+ native = nw.to_native(df_nw) # pandas DF
544
+ if output_format == "polars":
545
+ import polars as pl
546
+ return pl.from_pandas(native)
547
+ return native
548
+ except Exception as e2:
549
+ last_err = e2
550
+ continue
551
+ # Non-encoding error -> raise early
552
+ if not _is_encoding_error(e):
553
+ raise
554
+
555
+ raise ValueError(
556
+ f"Failed to read CSV file '{file_path}' with any attempted encoding. "
557
+ f"Last error: {last_err}"
558
+ )
@@ -0,0 +1,111 @@
1
+ """
2
+ write_functions.py
3
+ v0.1.0
4
+ """
5
+
6
+ import logging
7
+ from pathlib import Path
8
+ import pyreadstat
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def write_sav(data,
14
+ meta,
15
+ dst_path: str | Path,
16
+ **overrides) -> None:
17
+ """
18
+ Write data and metadata to a SPSS SAV file.
19
+
20
+ This is the convergence point where independent data and metadata objects
21
+ reunite. Only columns that exist in the data will have their metadata
22
+ written, regardless of what metadata exists for other columns.
23
+
24
+ Parameters
25
+ ----------
26
+ data : The data to write, pandas.DataFrame, or polars.DataFrame
27
+ meta : Metadata, metadata_container, or None
28
+ Metadata object containing labels, formats, etc. Can be:
29
+ - A Metadata object (from this package)
30
+ - A pyreadstat metadata_container (will be auto-wrapped)
31
+ - None for minimal metadata (no labels, formats, etc.)
32
+ dst_path : str or Path
33
+ Path where the SAV file will be written.
34
+ **overrides : keyword arguments
35
+ Optional overrides for metadata settings. Common overrides:
36
+ - compress (bool): If True, creates a compressed ZSAV file
37
+ - row_compress (bool): If True, uses row compression
38
+ Note: compress and row_compress cannot both be True.
39
+ Any override temporarily replaces the meta object's setting during write.
40
+
41
+ Notes
42
+ -----
43
+ The function follows the tidyspss two-track architecture where data and
44
+ metadata work independently and only converge at write time. Metadata for
45
+ columns that don't exist in the data will be silently ignored by pyreadstat.
46
+
47
+ Examples
48
+ --------
49
+ >>> # Basic write with data and metadata
50
+ >>> write_sav(data, meta, "output.sav")
51
+
52
+ >>> # Write without metadata (minimal SPSS file)
53
+ >>> write_sav(df, dst_path="minimal.sav")
54
+
55
+ >>> # Write with compression override
56
+ >>> write_sav(data, meta, dst_path="output.zsav", compress=True)
57
+ """
58
+ # Convert to native dataframe if needed
59
+ if hasattr(data, 'to_native'):
60
+ df = data.to_native()
61
+ else:
62
+ df = data
63
+
64
+ # Handle metadata
65
+ if meta is not None:
66
+ # Check if it's already a Metadata object or needs wrapping
67
+ if not hasattr(meta, 'get_write_params'):
68
+ # It's likely a pyreadstat metadata_container, wrap it
69
+ from .class_metadata import Metadata
70
+ meta = Metadata(meta)
71
+
72
+ # Apply temporary overrides to metadata object
73
+ originals = {}
74
+ for key, value in overrides.items():
75
+ if value is not None and hasattr(meta, key):
76
+ originals[key] = getattr(meta, key)
77
+ setattr(meta, key, value)
78
+
79
+ # Get write parameters from metadata
80
+ write_params = meta.get_write_params()
81
+
82
+ # Restore original metadata settings
83
+ for key, value in originals.items():
84
+ setattr(meta, key, value)
85
+
86
+ # Extract compression settings from write params
87
+ final_compress = write_params.pop('compress', False)
88
+ final_row_compress = write_params.pop('row_compress', False)
89
+ else:
90
+ # No metadata - use minimal parameters with overrides
91
+ write_params = {}
92
+ final_compress = overrides.get('compress', False)
93
+ final_row_compress = overrides.get('row_compress', False)
94
+
95
+ # Validate compression settings
96
+ if final_compress and final_row_compress:
97
+ raise ValueError("Both 'compress' and 'row_compress' cannot be True at the same time")
98
+
99
+ # Convert path to string
100
+ dst_path = str(dst_path)
101
+
102
+ # Write the file
103
+ pyreadstat.write_sav(
104
+ df=df,
105
+ dst_path=dst_path,
106
+ compress=final_compress,
107
+ row_compress=final_row_compress,
108
+ **write_params
109
+ )
110
+
111
+ logger.info(f"SPSS file saved successfully to {dst_path}")