ssb-konjunk 1.0.1__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ssb-konjunk
3
- Version: 1.0.1
3
+ Version: 2.0.0
4
4
  Summary: SSB Konjunk 422
5
5
  License: MIT
6
6
  Author: Johanne Saxegaard
@@ -14,7 +14,6 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: click (>=8.0.1)
17
- Requires-Dist: dapla-toolbelt (>=3.0.0)
18
17
  Requires-Dist: pandas (>=2.2.0)
19
18
  Requires-Dist: pandas-stubs (>=2.2.2.240807)
20
19
  Requires-Dist: pendulum (>=3.0.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "ssb-konjunk"
3
- version = "1.0.1"
3
+ version = "2.0.0"
4
4
  description = "SSB Konjunk 422"
5
5
  authors = ["Johanne Saxegaard <jox@ssb.no>"]
6
6
  license = "MIT"
@@ -18,7 +18,6 @@ python = ">=3.10, <4.0"
18
18
  click = ">=8.0.1"
19
19
  pandas = ">=2.2.0"
20
20
  pendulum = ">=3.0.0"
21
- dapla-toolbelt = ">=3.0.0"
22
21
  pandas-stubs = ">=2.2.2.240807"
23
22
 
24
23
  [tool.poetry.group.dev.dependencies]
@@ -35,7 +34,6 @@ pytest = ">=6.2.5"
35
34
  sphinx = ">=6.2.1"
36
35
  sphinx-autobuild = ">=2021.3.14"
37
36
  sphinx-autodoc-typehints = ">=1.24.0"
38
- sphinx-click = ">=3.0.2"
39
37
  typeguard = ">=2.13.3"
40
38
  xdoctest = { extras = ["colors"], version = ">=0.15.10" }
41
39
  myst-parser = { version = ">=0.16.1" }
@@ -6,11 +6,9 @@ https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
6
6
  """
7
7
 
8
8
  # Importing external packages
9
- import pandas as pd
10
- from dapla import FileClient
9
+ from __future__ import annotations
11
10
 
12
- # Getting filesystem
13
- fs = FileClient.get_gcs_file_system()
11
+ import pandas as pd
14
12
 
15
13
 
16
14
  def change_date_format_fame(series: pd.Series[str]) -> pd.Series[str]:
@@ -34,7 +32,7 @@ def write_out_fame_format_txt(
34
32
  names: pd.Series[str],
35
33
  dates: pd.Series[str],
36
34
  values: pd.Series[float],
37
- gcp_path: str,
35
+ path: str,
38
36
  ) -> None:
39
37
  """Function to write out txt file in fame format.
40
38
 
@@ -42,9 +40,9 @@ def write_out_fame_format_txt(
42
40
  names: Pandas series containing name or type for value.
43
41
  dates: Pandas series containing date for values.
44
42
  values: Pandas series containing values.
45
- gcp_path: String to google cloud.
43
+ path: String to output file.
46
44
  """
47
- with fs.open(gcp_path, "w") as f:
45
+ with open(path, "w") as f:
48
46
  # Write data rows
49
47
  for name, date, value in zip(names, dates, values, strict=False):
50
48
  # Apply format specification
@@ -7,7 +7,6 @@ import glob
7
7
  import re
8
8
  import warnings
9
9
 
10
- import dapla
11
10
  import pandas as pd
12
11
 
13
12
  from ssb_konjunk import timestamp
@@ -40,7 +39,6 @@ def _structure_ssb_filepath(
40
39
  undermappe: str | None = None,
41
40
  version_number: int | None = None,
42
41
  filetype: str = "parquet",
43
- fs: dapla.gcs.GCSFileSystem | None = None,
44
42
  ) -> str:
45
43
  """Structure the name of the file to SSB-format and the path.
46
44
 
@@ -54,7 +52,6 @@ def _structure_ssb_filepath(
54
52
  undermappe: Optional string for if you want folders betwen 'datatilstand' and file.
55
53
  version_number: Optional int for reading specific file.
56
54
  filetype: String with default 'parquet', specifies file type.
57
- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
58
55
 
59
56
  Returns:
60
57
  str: the full path to the file.
@@ -62,11 +59,7 @@ def _structure_ssb_filepath(
62
59
  Raises:
63
60
  ValueError: Raise if version number is not None or int.
64
61
  """
65
- # Handle that path starts with / in prodsonen.
66
- if fs is None:
67
- bucket = _remove_edge_slashes(bucket, only_last=True)
68
- else:
69
- bucket = _remove_edge_slashes(bucket)
62
+ bucket = _remove_edge_slashes(bucket)
70
63
  kortnavn = _remove_edge_slashes(kortnavn)
71
64
  datatilstand = _remove_edge_slashes(datatilstand)
72
65
  file_name = _remove_edge_slashes(file_name)
@@ -96,17 +89,12 @@ def _structure_ssb_filepath(
96
89
  return file_path
97
90
 
98
91
 
99
- def _get_files(
100
- folder_path: str, filetype: str, fs: dapla.gcs.GCSFileSystem | None
101
- ) -> list[str]:
92
+ def _get_files(folder_path: str, filetype: str) -> list[str]:
102
93
  """Function to list files in a folder based on base name and timestamp."""
103
94
  filenames = []
104
95
 
105
96
  match_string = f"{folder_path}*"
106
- if fs:
107
- filenames = fs.glob(match_string)
108
- else:
109
- filenames = glob.glob(match_string)
97
+ filenames = glob.glob(match_string)
110
98
 
111
99
  # Only include files with the relevant file extension
112
100
  filenames = [i for i in filenames if i.endswith(filetype)]
@@ -238,28 +226,16 @@ def _save_df(
238
226
  df: pd.DataFrame,
239
227
  file_path: str,
240
228
  filetype: str,
241
- fs: dapla.gcs.GCSFileSystem | None,
242
229
  seperator: str,
243
230
  encoding: str,
244
231
  ) -> None:
245
232
  """Do the actual saving, either as csv or parquet."""
246
233
  # Save as parquet
247
234
  if filetype == "parquet":
248
-
249
- if fs:
250
- with fs.open(file_path, "wb") as f:
251
- df.to_parquet(f, index=False)
252
- f.close()
253
- else:
254
- df.to_parquet(file_path, index=False)
235
+ df.to_parquet(file_path, index=False)
255
236
  # Save as csv
256
237
  elif filetype == "csv":
257
- if fs:
258
- with fs.open(file_path, "wb") as f:
259
- df.to_csv(f, sep=seperator, index=False, encoding=encoding)
260
- f.close()
261
- else:
262
- df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
238
+ df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
263
239
  # Save as jsonl
264
240
  elif filetype == "jsonl":
265
241
  df.to_json(file_path, orient="records", lines=True)
@@ -286,7 +262,6 @@ def write_ssb_file(
286
262
  undermappe: str | None = None,
287
263
  stable_version: bool = True,
288
264
  filetype: str = "parquet",
289
- fs: dapla.gcs.GCSFileSystem | None = None,
290
265
  seperator: str = ";",
291
266
  encoding: str = "latin1",
292
267
  ) -> None:
@@ -303,7 +278,6 @@ def write_ssb_file(
303
278
  undermappe: Optional folder under 'datatilstand'.
304
279
  stable_version: Bool for whether you should have checks in place in case of overwrite.
305
280
  filetype: the filetype to save as. Default: 'parquet'.
306
- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
307
281
  seperator: the seperator to use it filetype is csv. Default: ';'.
308
282
  encoding: Encoding for file, base is latin1.
309
283
 
@@ -327,10 +301,9 @@ def write_ssb_file(
327
301
  datatilstand=datatilstand,
328
302
  file_name=file_name,
329
303
  undermappe=undermappe,
330
- fs=fs,
331
304
  )
332
305
  # Get list with the filenames, if several, ordered by the highest version number at last.
333
- files = _get_files(file_path, filetype, fs=fs)
306
+ files = _get_files(file_path, filetype)
334
307
  # Find version number/decide whether to overwrite or make new version.
335
308
  version_number = _find_version_number(files, stable_version)
336
309
 
@@ -339,7 +312,7 @@ def write_ssb_file(
339
312
  file_path = file_path[:-1]
340
313
  file_path = f"{file_path}_v{version_number}.{filetype}"
341
314
 
342
- _save_df(df, file_path, filetype, fs, seperator, encoding)
315
+ _save_df(df, file_path, filetype, seperator, encoding)
343
316
 
344
317
 
345
318
  def read_ssb_file(
@@ -353,7 +326,6 @@ def read_ssb_file(
353
326
  filetype: str = "parquet",
354
327
  columns: list[str] | None = None,
355
328
  version_number: int | None = None,
356
- fs: dapla.gcs.GCSFileSystem | None = None,
357
329
  seperator: str = ";",
358
330
  encoding: str = "latin1",
359
331
  ) -> pd.DataFrame | None:
@@ -374,7 +346,6 @@ def read_ssb_file(
374
346
  version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
375
347
  filetype: the filetype to save as. Default: 'parquet'.
376
348
  columns: Columns to read from the file. If None (default), all columns are read.
377
- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
378
349
  seperator: the seperator to use it filetype is csv. Default: ';'.
379
350
  encoding: Encoding for file, base is latin1.
380
351
 
@@ -395,12 +366,11 @@ def read_ssb_file(
395
366
  undermappe=undermappe,
396
367
  version_number=version_number,
397
368
  filetype=filetype,
398
- fs=fs,
399
369
  )
400
370
 
401
371
  if not version_number:
402
372
  # If version number not specified then list out versions.
403
- files = _get_files(file_path, filetype, fs=fs)
373
+ files = _get_files(file_path, filetype)
404
374
  # If list is empty, no matching files of any version were found.
405
375
  if not files:
406
376
  raise FileNotFoundError(
@@ -411,17 +381,9 @@ def read_ssb_file(
411
381
 
412
382
  # Different functions used for reading depending on the filetype.
413
383
  if filetype == "csv":
414
- if fs:
415
- # Samme som tidligere kan brukes til å lese alle filformater.
416
- with fs.open(file_path, "r") as f:
417
- df = pd.read_csv(f, sep=seperator, encoding=encoding, usecols=columns)
418
- f.close()
419
- else:
420
- df = pd.read_csv(
421
- file_path, sep=seperator, encoding=encoding, usecols=columns
422
- )
384
+ df = pd.read_csv(file_path, sep=seperator, encoding=encoding, usecols=columns)
423
385
  elif filetype == "parquet":
424
- df = pd.read_parquet(file_path, columns=columns, filesystem=fs)
386
+ df = pd.read_parquet(file_path, columns=columns)
425
387
  elif filetype == "jsonl":
426
388
  if columns is not None:
427
389
  warnings.warn(
@@ -2,27 +2,19 @@
2
2
 
3
3
  import xml.etree.ElementTree as ET
4
4
 
5
- import dapla
6
5
 
7
-
8
- def read_xml(xml_file: str, fs: dapla.gcs.GCSFileSystem | None = None) -> ET.Element:
6
+ def read_xml(xml_file: str) -> ET.Element:
9
7
  """Funtion to get xml root from disk.
10
8
 
11
9
  Args:
12
10
  xml_file: Strin value for xml filepath.
13
- fs: filesystem
14
11
 
15
12
  Returns:
16
13
  ET.Element: Root of xml file.
17
14
  """
18
- if fs:
19
- with fs.open(xml_file, mode="r") as file:
20
- single_xml = file.read()
21
- file.close()
22
- else:
23
- with open(xml_file) as file:
24
- single_xml = file.read()
25
- file.close()
15
+ with open(xml_file) as file:
16
+ single_xml = file.read()
17
+ file.close()
26
18
 
27
19
  return ET.fromstring(single_xml)
28
20
 
File without changes
File without changes