ssb-konjunk 1.0.0__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ssb-konjunk
3
- Version: 1.0.0
3
+ Version: 2.0.0
4
4
  Summary: SSB Konjunk 422
5
5
  License: MIT
6
6
  Author: Johanne Saxegaard
@@ -14,7 +14,6 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: click (>=8.0.1)
17
- Requires-Dist: dapla-toolbelt (>=3.0.0)
18
17
  Requires-Dist: pandas (>=2.2.0)
19
18
  Requires-Dist: pandas-stubs (>=2.2.2.240807)
20
19
  Requires-Dist: pendulum (>=3.0.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ssb-konjunk"
3
- version = "1.0.0"
3
+ version = "2.0.0"
4
4
  description = "SSB Konjunk 422"
5
5
  authors = ["Johanne Saxegaard <jox@ssb.no>"]
6
6
  license = "MIT"
@@ -18,7 +18,6 @@ python = ">=3.10, <4.0"
18
18
  click = ">=8.0.1"
19
19
  pandas = ">=2.2.0"
20
20
  pendulum = ">=3.0.0"
21
- dapla-toolbelt = ">=3.0.0"
22
21
  pandas-stubs = ">=2.2.2.240807"
23
22
 
24
23
  [tool.poetry.group.dev.dependencies]
@@ -35,7 +34,6 @@ pytest = ">=6.2.5"
35
34
  sphinx = ">=6.2.1"
36
35
  sphinx-autobuild = ">=2021.3.14"
37
36
  sphinx-autodoc-typehints = ">=1.24.0"
38
- sphinx-click = ">=3.0.2"
39
37
  typeguard = ">=2.13.3"
40
38
  xdoctest = { extras = ["colors"], version = ">=0.15.10" }
41
39
  myst-parser = { version = ">=0.16.1" }
@@ -6,11 +6,9 @@ https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
6
6
  """
7
7
 
8
8
  # Importing external packages
9
- import pandas as pd
10
- from dapla import FileClient
9
+ from __future__ import annotations
11
10
 
12
- # Getting filesystem
13
- fs = FileClient.get_gcs_file_system()
11
+ import pandas as pd
14
12
 
15
13
 
16
14
  def change_date_format_fame(series: pd.Series[str]) -> pd.Series[str]:
@@ -34,7 +32,7 @@ def write_out_fame_format_txt(
34
32
  names: pd.Series[str],
35
33
  dates: pd.Series[str],
36
34
  values: pd.Series[float],
37
- gcp_path: str,
35
+ path: str,
38
36
  ) -> None:
39
37
  """Function to write out txt file in fame format.
40
38
 
@@ -42,9 +40,9 @@ def write_out_fame_format_txt(
42
40
  names: Pandas series containing name or type for value.
43
41
  dates: Pandas series containing date for values.
44
42
  values: Pandas series containing values.
45
- gcp_path: String to google cloud.
43
+ path: String to output file.
46
44
  """
47
- with fs.open(gcp_path, "w") as f:
45
+ with open(path, "w") as f:
48
46
  # Write data rows
49
47
  for name, date, value in zip(names, dates, values, strict=False):
50
48
  # Apply format specification
@@ -5,8 +5,8 @@ Follows the the standardization for versioning and names.
5
5
 
6
6
  import glob
7
7
  import re
8
+ import warnings
8
9
 
9
- import dapla
10
10
  import pandas as pd
11
11
 
12
12
  from ssb_konjunk import timestamp
@@ -39,7 +39,6 @@ def _structure_ssb_filepath(
39
39
  undermappe: str | None = None,
40
40
  version_number: int | None = None,
41
41
  filetype: str = "parquet",
42
- fs: dapla.gcs.GCSFileSystem | None = None,
43
42
  ) -> str:
44
43
  """Structure the name of the file to SSB-format and the path.
45
44
 
@@ -53,7 +52,6 @@ def _structure_ssb_filepath(
53
52
  undermappe: Optional string for if you want folders betwen 'datatilstand' and file.
54
53
  version_number: Optional int for reading specific file.
55
54
  filetype: String with default 'parquet', specifies file type.
56
- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
57
55
 
58
56
  Returns:
59
57
  str: the full path to the file.
@@ -61,11 +59,7 @@ def _structure_ssb_filepath(
61
59
  Raises:
62
60
  ValueError: Raise if version number is not None or int.
63
61
  """
64
- # Handle that path starts with / in prodsonen.
65
- if fs is None:
66
- bucket = _remove_edge_slashes(bucket, only_last=True)
67
- else:
68
- bucket = _remove_edge_slashes(bucket)
62
+ bucket = _remove_edge_slashes(bucket)
69
63
  kortnavn = _remove_edge_slashes(kortnavn)
70
64
  datatilstand = _remove_edge_slashes(datatilstand)
71
65
  file_name = _remove_edge_slashes(file_name)
@@ -95,17 +89,12 @@ def _structure_ssb_filepath(
95
89
  return file_path
96
90
 
97
91
 
98
- def _get_files(
99
- folder_path: str, filetype: str, fs: dapla.gcs.GCSFileSystem | None
100
- ) -> list[str]:
92
+ def _get_files(folder_path: str, filetype: str) -> list[str]:
101
93
  """Function to list files in a folder based on base name and timestamp."""
102
94
  filenames = []
103
95
 
104
96
  match_string = f"{folder_path}*"
105
- if fs:
106
- filenames = fs.glob(match_string)
107
- else:
108
- filenames = glob.glob(match_string)
97
+ filenames = glob.glob(match_string)
109
98
 
110
99
  # Only include files with the relevant file extension
111
100
  filenames = [i for i in filenames if i.endswith(filetype)]
@@ -237,28 +226,16 @@ def _save_df(
237
226
  df: pd.DataFrame,
238
227
  file_path: str,
239
228
  filetype: str,
240
- fs: dapla.gcs.GCSFileSystem | None,
241
229
  seperator: str,
242
230
  encoding: str,
243
231
  ) -> None:
244
232
  """Do the actual saving, either as csv or parquet."""
245
233
  # Save as parquet
246
234
  if filetype == "parquet":
247
-
248
- if fs:
249
- with fs.open(file_path, "wb") as f:
250
- df.to_parquet(f, index=False)
251
- f.close()
252
- else:
253
- df.to_parquet(file_path, index=False)
235
+ df.to_parquet(file_path, index=False)
254
236
  # Save as csv
255
237
  elif filetype == "csv":
256
- if fs:
257
- with fs.open(file_path, "wb") as f:
258
- df.to_csv(f, sep=seperator, index=False, encoding=encoding)
259
- f.close()
260
- else:
261
- df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
238
+ df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
262
239
  # Save as jsonl
263
240
  elif filetype == "jsonl":
264
241
  df.to_json(file_path, orient="records", lines=True)
@@ -285,7 +262,6 @@ def write_ssb_file(
285
262
  undermappe: str | None = None,
286
263
  stable_version: bool = True,
287
264
  filetype: str = "parquet",
288
- fs: dapla.gcs.GCSFileSystem | None = None,
289
265
  seperator: str = ";",
290
266
  encoding: str = "latin1",
291
267
  ) -> None:
@@ -302,7 +278,6 @@ def write_ssb_file(
302
278
  undermappe: Optional folder under 'datatilstand'.
303
279
  stable_version: Bool for whether you should have checks in place in case of overwrite.
304
280
  filetype: the filetype to save as. Default: 'parquet'.
305
- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
306
281
  seperator: the seperator to use it filetype is csv. Default: ';'.
307
282
  encoding: Encoding for file, base is latin1.
308
283
 
@@ -326,10 +301,9 @@ def write_ssb_file(
326
301
  datatilstand=datatilstand,
327
302
  file_name=file_name,
328
303
  undermappe=undermappe,
329
- fs=fs,
330
304
  )
331
305
  # Get list with the filenames, if several, ordered by the highest version number at last.
332
- files = _get_files(file_path, filetype, fs=fs)
306
+ files = _get_files(file_path, filetype)
333
307
  # Find version number/decide whether to overwrite or make new version.
334
308
  version_number = _find_version_number(files, stable_version)
335
309
 
@@ -338,7 +312,7 @@ def write_ssb_file(
338
312
  file_path = file_path[:-1]
339
313
  file_path = f"{file_path}_v{version_number}.{filetype}"
340
314
 
341
- _save_df(df, file_path, filetype, fs, seperator, encoding)
315
+ _save_df(df, file_path, filetype, seperator, encoding)
342
316
 
343
317
 
344
318
  def read_ssb_file(
@@ -350,8 +324,8 @@ def read_ssb_file(
350
324
  datatilstand: str = "",
351
325
  undermappe: str | None = None,
352
326
  filetype: str = "parquet",
327
+ columns: list[str] | None = None,
353
328
  version_number: int | None = None,
354
- fs: dapla.gcs.GCSFileSystem | None = None,
355
329
  seperator: str = ";",
356
330
  encoding: str = "latin1",
357
331
  ) -> pd.DataFrame | None:
@@ -371,7 +345,7 @@ def read_ssb_file(
371
345
  undermappe: Optional folder under 'datatilstand'.
372
346
  version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
373
347
  filetype: the filetype to save as. Default: 'parquet'.
374
- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
348
+ columns: Columns to read from the file. If None (default), all columns are read.
375
349
  seperator: the seperator to use it filetype is csv. Default: ';'.
376
350
  encoding: Encoding for file, base is latin1.
377
351
 
@@ -392,12 +366,11 @@ def read_ssb_file(
392
366
  undermappe=undermappe,
393
367
  version_number=version_number,
394
368
  filetype=filetype,
395
- fs=fs,
396
369
  )
397
370
 
398
371
  if not version_number:
399
372
  # If version number not specified then list out versions.
400
- files = _get_files(file_path, filetype, fs=fs)
373
+ files = _get_files(file_path, filetype)
401
374
  # If list is empty, no matching files of any version were found.
402
375
  if not files:
403
376
  raise FileNotFoundError(
@@ -408,18 +381,22 @@ def read_ssb_file(
408
381
 
409
382
  # Different functions used for reading depending on the filetype.
410
383
  if filetype == "csv":
411
- if fs:
412
- # Samme som tidligere kan brukes til å lese alle filformater.
413
- with fs.open(file_path, "r") as f:
414
- df = pd.read_csv(f, sep=seperator, encoding=encoding)
415
- f.close()
416
- else:
417
- df = pd.read_csv(file_path, sep=seperator, encoding=encoding)
384
+ df = pd.read_csv(file_path, sep=seperator, encoding=encoding, usecols=columns)
418
385
  elif filetype == "parquet":
419
- df = pd.read_parquet(file_path, filesystem=fs)
386
+ df = pd.read_parquet(file_path, columns=columns)
420
387
  elif filetype == "jsonl":
421
- df = pd.read_json(file_path, lines=True)
388
+ if columns is not None:
389
+ warnings.warn(
390
+ f"Columns argumentet blir ignorert for {filetype} filer, hele filen vil bli lastet inn.",
391
+ stacklevel=2,
392
+ )
393
+ df = pd.read_json(file_path, lines=False)
422
394
  elif filetype == "json":
395
+ if columns is not None:
396
+ warnings.warn(
397
+ f"Columns argumentet blir ignorert for {filetype} filer, hele filen vil bli lastet inn.",
398
+ stacklevel=2,
399
+ )
423
400
  df = pd.read_json(file_path, lines=False)
424
401
  # Returns pandas df.
425
402
  return df
@@ -2,27 +2,19 @@
2
2
 
3
3
  import xml.etree.ElementTree as ET
4
4
 
5
- import dapla
6
5
 
7
-
8
- def read_xml(xml_file: str, fs: dapla.gcs.GCSFileSystem | None = None) -> ET.Element:
6
+ def read_xml(xml_file: str) -> ET.Element:
9
7
  """Funtion to get xml root from disk.
10
8
 
11
9
  Args:
12
10
  xml_file: Strin value for xml filepath.
13
- fs: filesystem
14
11
 
15
12
  Returns:
16
13
  ET.Element: Root of xml file.
17
14
  """
18
- if fs:
19
- with fs.open(xml_file, mode="r") as file:
20
- single_xml = file.read()
21
- file.close()
22
- else:
23
- with open(xml_file) as file:
24
- single_xml = file.read()
25
- file.close()
15
+ with open(xml_file) as file:
16
+ single_xml = file.read()
17
+ file.close()
26
18
 
27
19
  return ET.fromstring(single_xml)
28
20
 
File without changes
File without changes