ssb-konjunk 1.0.0__tar.gz → 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ssb-konjunk
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: SSB Konjunk 422
5
5
  License: MIT
6
6
  Author: Johanne Saxegaard
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ssb-konjunk"
3
- version = "1.0.0"
3
+ version = "1.0.1"
4
4
  description = "SSB Konjunk 422"
5
5
  authors = ["Johanne Saxegaard <jox@ssb.no>"]
6
6
  license = "MIT"
@@ -5,6 +5,7 @@ Follows the the standardization for versioning and names.
5
5
 
6
6
  import glob
7
7
  import re
8
+ import warnings
8
9
 
9
10
  import dapla
10
11
  import pandas as pd
@@ -350,6 +351,7 @@ def read_ssb_file(
350
351
  datatilstand: str = "",
351
352
  undermappe: str | None = None,
352
353
  filetype: str = "parquet",
354
+ columns: list[str] | None = None,
353
355
  version_number: int | None = None,
354
356
  fs: dapla.gcs.GCSFileSystem | None = None,
355
357
  seperator: str = ";",
@@ -371,6 +373,7 @@ def read_ssb_file(
371
373
  undermappe: Optional folder under 'datatilstand'.
372
374
  version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
373
375
  filetype: the filetype to save as. Default: 'parquet'.
376
+ columns: Columns to read from the file. If None (default), all columns are read.
374
377
  fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
375
378
  seperator: the seperator to use it filetype is csv. Default: ';'.
376
379
  encoding: Encoding for file, base is latin1.
@@ -411,15 +414,27 @@ def read_ssb_file(
411
414
  if fs:
412
415
  # Samme som tidligere kan brukes til å lese alle filformater.
413
416
  with fs.open(file_path, "r") as f:
414
- df = pd.read_csv(f, sep=seperator, encoding=encoding)
417
+ df = pd.read_csv(f, sep=seperator, encoding=encoding, usecols=columns)
415
418
  f.close()
416
419
  else:
417
- df = pd.read_csv(file_path, sep=seperator, encoding=encoding)
420
+ df = pd.read_csv(
421
+ file_path, sep=seperator, encoding=encoding, usecols=columns
422
+ )
418
423
  elif filetype == "parquet":
419
- df = pd.read_parquet(file_path, filesystem=fs)
424
+ df = pd.read_parquet(file_path, columns=columns, filesystem=fs)
420
425
  elif filetype == "jsonl":
421
- df = pd.read_json(file_path, lines=True)
426
+ if columns is not None:
427
+ warnings.warn(
428
+ f"Columns argumentet blir ignorert for {filetype} filer, hele filen vil bli lastet inn.",
429
+ stacklevel=2,
430
+ )
431
+ df = pd.read_json(file_path, lines=False)
422
432
  elif filetype == "json":
433
+ if columns is not None:
434
+ warnings.warn(
435
+ f"Columns argumentet blir ignorert for {filetype} filer, hele filen vil bli lastet inn.",
436
+ stacklevel=2,
437
+ )
423
438
  df = pd.read_json(file_path, lines=False)
424
439
  # Returns pandas df.
425
440
  return df
File without changes
File without changes