ssb-konjunk 1.0.1__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ssb-konjunk
3
- Version: 1.0.1
3
+ Version: 2.0.1
4
4
  Summary: SSB Konjunk 422
5
5
  License: MIT
6
6
  Author: Johanne Saxegaard
7
- Author-email: jox@ssb.no
7
+ Author-email: jox@ssb.no>, Halvor Steffenssen <hvr@ssb.no
8
8
  Requires-Python: >=3.10,<4.0
9
9
  Classifier: Development Status :: 1 - Planning
10
10
  Classifier: License :: OSI Approved :: MIT License
@@ -14,7 +14,6 @@ Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
15
  Classifier: Programming Language :: Python :: 3.13
16
16
  Requires-Dist: click (>=8.0.1)
17
- Requires-Dist: dapla-toolbelt (>=3.0.0)
18
17
  Requires-Dist: pandas (>=2.2.0)
19
18
  Requires-Dist: pandas-stubs (>=2.2.2.240807)
20
19
  Requires-Dist: pendulum (>=3.0.0)
@@ -1,8 +1,8 @@
1
1
  [tool.poetry]
2
2
  name = "ssb-konjunk"
3
- version = "1.0.1"
3
+ version = "2.0.1"
4
4
  description = "SSB Konjunk 422"
5
- authors = ["Johanne Saxegaard <jox@ssb.no>"]
5
+ authors = ["Johanne Saxegaard <jox@ssb.no>, Halvor Steffenssen <hvr@ssb.no>"]
6
6
  license = "MIT"
7
7
  readme = "README.md"
8
8
  homepage = "https://github.com/statisticsnorway/ssb-konjunk"
@@ -18,7 +18,6 @@ python = ">=3.10, <4.0"
18
18
  click = ">=8.0.1"
19
19
  pandas = ">=2.2.0"
20
20
  pendulum = ">=3.0.0"
21
- dapla-toolbelt = ">=3.0.0"
22
21
  pandas-stubs = ">=2.2.2.240807"
23
22
 
24
23
  [tool.poetry.group.dev.dependencies]
@@ -35,7 +34,6 @@ pytest = ">=6.2.5"
35
34
  sphinx = ">=6.2.1"
36
35
  sphinx-autobuild = ">=2021.3.14"
37
36
  sphinx-autodoc-typehints = ">=1.24.0"
38
- sphinx-click = ">=3.0.2"
39
37
  typeguard = ">=2.13.3"
40
38
  xdoctest = { extras = ["colors"], version = ">=0.15.10" }
41
39
  myst-parser = { version = ">=0.16.1" }
@@ -6,11 +6,9 @@ https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
6
6
  """
7
7
 
8
8
  # Importing external packages
9
- import pandas as pd
10
- from dapla import FileClient
9
+ from __future__ import annotations
11
10
 
12
- # Getting filesystem
13
- fs = FileClient.get_gcs_file_system()
11
+ import pandas as pd
14
12
 
15
13
 
16
14
  def change_date_format_fame(series: pd.Series[str]) -> pd.Series[str]:
@@ -34,7 +32,7 @@ def write_out_fame_format_txt(
34
32
  names: pd.Series[str],
35
33
  dates: pd.Series[str],
36
34
  values: pd.Series[float],
37
- gcp_path: str,
35
+ path: str,
38
36
  ) -> None:
39
37
  """Function to write out txt file in fame format.
40
38
 
@@ -42,9 +40,9 @@ def write_out_fame_format_txt(
42
40
  names: Pandas series containing name or type for value.
43
41
  dates: Pandas series containing date for values.
44
42
  values: Pandas series containing values.
45
- gcp_path: String to google cloud.
43
+ path: String to output file.
46
44
  """
47
- with fs.open(gcp_path, "w") as f:
45
+ with open(path, "w") as f:
48
46
  # Write data rows
49
47
  for name, date, value in zip(names, dates, values, strict=False):
50
48
  # Apply format specification
@@ -7,7 +7,6 @@ import glob
7
7
  import re
8
8
  import warnings
9
9
 
10
- import dapla
11
10
  import pandas as pd
12
11
 
13
12
  from ssb_konjunk import timestamp
@@ -40,7 +39,6 @@ def _structure_ssb_filepath(
40
39
  undermappe: str | None = None,
41
40
  version_number: int | None = None,
42
41
  filetype: str = "parquet",
43
- fs: dapla.gcs.GCSFileSystem | None = None,
44
42
  ) -> str:
45
43
  """Structure the name of the file to SSB-format and the path.
46
44
 
@@ -54,7 +52,6 @@ def _structure_ssb_filepath(
54
52
  undermappe: Optional string for if you want folders betwen 'datatilstand' and file.
55
53
  version_number: Optional int for reading specific file.
56
54
  filetype: String with default 'parquet', specifies file type.
57
- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
58
55
 
59
56
  Returns:
60
57
  str: the full path to the file.
@@ -62,11 +59,7 @@ def _structure_ssb_filepath(
62
59
  Raises:
63
60
  ValueError: Raise if version number is not None or int.
64
61
  """
65
- # Handle that path starts with / in prodsonen.
66
- if fs is None:
67
- bucket = _remove_edge_slashes(bucket, only_last=True)
68
- else:
69
- bucket = _remove_edge_slashes(bucket)
62
+ bucket = _remove_edge_slashes(bucket)
70
63
  kortnavn = _remove_edge_slashes(kortnavn)
71
64
  datatilstand = _remove_edge_slashes(datatilstand)
72
65
  file_name = _remove_edge_slashes(file_name)
@@ -96,17 +89,12 @@ def _structure_ssb_filepath(
96
89
  return file_path
97
90
 
98
91
 
99
- def _get_files(
100
- folder_path: str, filetype: str, fs: dapla.gcs.GCSFileSystem | None
101
- ) -> list[str]:
92
+ def _get_files(folder_path: str, filetype: str) -> list[str]:
102
93
  """Function to list files in a folder based on base name and timestamp."""
103
94
  filenames = []
104
95
 
105
96
  match_string = f"{folder_path}*"
106
- if fs:
107
- filenames = fs.glob(match_string)
108
- else:
109
- filenames = glob.glob(match_string)
97
+ filenames = glob.glob(match_string)
110
98
 
111
99
  # Only include files with the relevant file extension
112
100
  filenames = [i for i in filenames if i.endswith(filetype)]
@@ -143,23 +131,15 @@ def _find_version_number(files: list[str], stable_version: bool) -> str | None:
143
131
  elif stable_version and existing_versions[-1] == "0":
144
132
  return "1"
145
133
  elif stable_version and existing_versions[-1] != "0":
146
- overwrite = input(
147
- f"Vil du overskrive eksisterende versjon (_v{existing_versions[-1]})? Bekreft med 'y'."
134
+ make_new_version = input(
135
+ "Vil du lage en ny versjon (altså øke versjonsnummeret med en)? Bekreft med 'y'."
148
136
  )
149
- if overwrite.lower() == "y":
137
+ if make_new_version.lower() == "y":
150
138
  version = existing_versions[-1]
151
- version = version.split(".")[0]
139
+ version = int(version.split(".")[0]) + 1
152
140
  return str(version)
153
141
  else:
154
- make_new_version = input(
155
- "Vil du lage en ny versjon (altså øke versjonsnummeret med en)? Bekreft med 'y'."
156
- )
157
- if make_new_version.lower() == "y":
158
- version = existing_versions[-1]
159
- version = int(version.split(".")[0]) + 1
160
- return str(version)
161
- else:
162
- return None
142
+ return None
163
143
  else:
164
144
  raise ValueError("Noe gikk galt da rett versjonsnummer skulle settes.")
165
145
 
@@ -238,28 +218,16 @@ def _save_df(
238
218
  df: pd.DataFrame,
239
219
  file_path: str,
240
220
  filetype: str,
241
- fs: dapla.gcs.GCSFileSystem | None,
242
221
  seperator: str,
243
222
  encoding: str,
244
223
  ) -> None:
245
224
  """Do the actual saving, either as csv or parquet."""
246
225
  # Save as parquet
247
226
  if filetype == "parquet":
248
-
249
- if fs:
250
- with fs.open(file_path, "wb") as f:
251
- df.to_parquet(f, index=False)
252
- f.close()
253
- else:
254
- df.to_parquet(file_path, index=False)
227
+ df.to_parquet(file_path, index=False)
255
228
  # Save as csv
256
229
  elif filetype == "csv":
257
- if fs:
258
- with fs.open(file_path, "wb") as f:
259
- df.to_csv(f, sep=seperator, index=False, encoding=encoding)
260
- f.close()
261
- else:
262
- df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
230
+ df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
263
231
  # Save as jsonl
264
232
  elif filetype == "jsonl":
265
233
  df.to_json(file_path, orient="records", lines=True)
@@ -286,7 +254,6 @@ def write_ssb_file(
286
254
  undermappe: str | None = None,
287
255
  stable_version: bool = True,
288
256
  filetype: str = "parquet",
289
- fs: dapla.gcs.GCSFileSystem | None = None,
290
257
  seperator: str = ";",
291
258
  encoding: str = "latin1",
292
259
  ) -> None:
@@ -303,7 +270,6 @@ def write_ssb_file(
303
270
  undermappe: Optional folder under 'datatilstand'.
304
271
  stable_version: Bool for whether you should have checks in place in case of overwrite.
305
272
  filetype: the filetype to save as. Default: 'parquet'.
306
- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
307
273
  seperator: the seperator to use it filetype is csv. Default: ';'.
308
274
  encoding: Encoding for file, base is latin1.
309
275
 
@@ -327,10 +293,9 @@ def write_ssb_file(
327
293
  datatilstand=datatilstand,
328
294
  file_name=file_name,
329
295
  undermappe=undermappe,
330
- fs=fs,
331
296
  )
332
297
  # Get list with the filenames, if several, ordered by the highest version number at last.
333
- files = _get_files(file_path, filetype, fs=fs)
298
+ files = _get_files(file_path, filetype)
334
299
  # Find version number/decide whether to overwrite or make new version.
335
300
  version_number = _find_version_number(files, stable_version)
336
301
 
@@ -339,7 +304,7 @@ def write_ssb_file(
339
304
  file_path = file_path[:-1]
340
305
  file_path = f"{file_path}_v{version_number}.{filetype}"
341
306
 
342
- _save_df(df, file_path, filetype, fs, seperator, encoding)
307
+ _save_df(df, file_path, filetype, seperator, encoding)
343
308
 
344
309
 
345
310
  def read_ssb_file(
@@ -353,7 +318,6 @@ def read_ssb_file(
353
318
  filetype: str = "parquet",
354
319
  columns: list[str] | None = None,
355
320
  version_number: int | None = None,
356
- fs: dapla.gcs.GCSFileSystem | None = None,
357
321
  seperator: str = ";",
358
322
  encoding: str = "latin1",
359
323
  ) -> pd.DataFrame | None:
@@ -374,7 +338,6 @@ def read_ssb_file(
374
338
  version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
375
339
  filetype: the filetype to save as. Default: 'parquet'.
376
340
  columns: Columns to read from the file. If None (default), all columns are read.
377
- fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
378
341
  seperator: the seperator to use it filetype is csv. Default: ';'.
379
342
  encoding: Encoding for file, base is latin1.
380
343
 
@@ -395,12 +358,11 @@ def read_ssb_file(
395
358
  undermappe=undermappe,
396
359
  version_number=version_number,
397
360
  filetype=filetype,
398
- fs=fs,
399
361
  )
400
362
 
401
363
  if not version_number:
402
364
  # If version number not specified then list out versions.
403
- files = _get_files(file_path, filetype, fs=fs)
365
+ files = _get_files(file_path, filetype)
404
366
  # If list is empty, no matching files of any version were found.
405
367
  if not files:
406
368
  raise FileNotFoundError(
@@ -411,17 +373,9 @@ def read_ssb_file(
411
373
 
412
374
  # Different functions used for reading depending on the filetype.
413
375
  if filetype == "csv":
414
- if fs:
415
- # Samme som tidligere kan brukes til å lese alle filformater.
416
- with fs.open(file_path, "r") as f:
417
- df = pd.read_csv(f, sep=seperator, encoding=encoding, usecols=columns)
418
- f.close()
419
- else:
420
- df = pd.read_csv(
421
- file_path, sep=seperator, encoding=encoding, usecols=columns
422
- )
376
+ df = pd.read_csv(file_path, sep=seperator, encoding=encoding, usecols=columns)
423
377
  elif filetype == "parquet":
424
- df = pd.read_parquet(file_path, columns=columns, filesystem=fs)
378
+ df = pd.read_parquet(file_path, columns=columns)
425
379
  elif filetype == "jsonl":
426
380
  if columns is not None:
427
381
  warnings.warn(
@@ -2,27 +2,19 @@
2
2
 
3
3
  import xml.etree.ElementTree as ET
4
4
 
5
- import dapla
6
5
 
7
-
8
- def read_xml(xml_file: str, fs: dapla.gcs.GCSFileSystem | None = None) -> ET.Element:
6
+ def read_xml(xml_file: str) -> ET.Element:
9
7
  """Funtion to get xml root from disk.
10
8
 
11
9
  Args:
12
10
  xml_file: Strin value for xml filepath.
13
- fs: filesystem
14
11
 
15
12
  Returns:
16
13
  ET.Element: Root of xml file.
17
14
  """
18
- if fs:
19
- with fs.open(xml_file, mode="r") as file:
20
- single_xml = file.read()
21
- file.close()
22
- else:
23
- with open(xml_file) as file:
24
- single_xml = file.read()
25
- file.close()
15
+ with open(xml_file) as file:
16
+ single_xml = file.read()
17
+ file.close()
26
18
 
27
19
  return ET.fromstring(single_xml)
28
20
 
File without changes
File without changes