ssb-konjunk 1.0.0__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/PKG-INFO +1 -2
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/pyproject.toml +1 -3
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/fame.py +5 -7
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/saving.py +24 -47
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/xml_handling.py +4 -12
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/LICENSE +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/README.md +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/__init__.py +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/__main__.py +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/_functions.py +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/data_formating.py +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/prompts.py +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/py.typed +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/rounding.py +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/statbank_format.py +0 -0
- {ssb_konjunk-1.0.0 → ssb_konjunk-2.0.0}/src/ssb_konjunk/timestamp.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ssb-konjunk
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: SSB Konjunk 422
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Johanne Saxegaard
|
|
@@ -14,7 +14,6 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
16
|
Requires-Dist: click (>=8.0.1)
|
|
17
|
-
Requires-Dist: dapla-toolbelt (>=3.0.0)
|
|
18
17
|
Requires-Dist: pandas (>=2.2.0)
|
|
19
18
|
Requires-Dist: pandas-stubs (>=2.2.2.240807)
|
|
20
19
|
Requires-Dist: pendulum (>=3.0.0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ssb-konjunk"
|
|
3
|
-
version = "
|
|
3
|
+
version = "2.0.0"
|
|
4
4
|
description = "SSB Konjunk 422"
|
|
5
5
|
authors = ["Johanne Saxegaard <jox@ssb.no>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -18,7 +18,6 @@ python = ">=3.10, <4.0"
|
|
|
18
18
|
click = ">=8.0.1"
|
|
19
19
|
pandas = ">=2.2.0"
|
|
20
20
|
pendulum = ">=3.0.0"
|
|
21
|
-
dapla-toolbelt = ">=3.0.0"
|
|
22
21
|
pandas-stubs = ">=2.2.2.240807"
|
|
23
22
|
|
|
24
23
|
[tool.poetry.group.dev.dependencies]
|
|
@@ -35,7 +34,6 @@ pytest = ">=6.2.5"
|
|
|
35
34
|
sphinx = ">=6.2.1"
|
|
36
35
|
sphinx-autobuild = ">=2021.3.14"
|
|
37
36
|
sphinx-autodoc-typehints = ">=1.24.0"
|
|
38
|
-
sphinx-click = ">=3.0.2"
|
|
39
37
|
typeguard = ">=2.13.3"
|
|
40
38
|
xdoctest = { extras = ["colors"], version = ">=0.15.10" }
|
|
41
39
|
myst-parser = { version = ">=0.16.1" }
|
|
@@ -6,11 +6,9 @@ https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
# Importing external packages
|
|
9
|
-
|
|
10
|
-
from dapla import FileClient
|
|
9
|
+
from __future__ import annotations
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
fs = FileClient.get_gcs_file_system()
|
|
11
|
+
import pandas as pd
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
def change_date_format_fame(series: pd.Series[str]) -> pd.Series[str]:
|
|
@@ -34,7 +32,7 @@ def write_out_fame_format_txt(
|
|
|
34
32
|
names: pd.Series[str],
|
|
35
33
|
dates: pd.Series[str],
|
|
36
34
|
values: pd.Series[float],
|
|
37
|
-
|
|
35
|
+
path: str,
|
|
38
36
|
) -> None:
|
|
39
37
|
"""Function to write out txt file in fame format.
|
|
40
38
|
|
|
@@ -42,9 +40,9 @@ def write_out_fame_format_txt(
|
|
|
42
40
|
names: Pandas series containing name or type for value.
|
|
43
41
|
dates: Pandas series containing date for values.
|
|
44
42
|
values: Pandas series containing values.
|
|
45
|
-
|
|
43
|
+
path: String to output file.
|
|
46
44
|
"""
|
|
47
|
-
with
|
|
45
|
+
with open(path, "w") as f:
|
|
48
46
|
# Write data rows
|
|
49
47
|
for name, date, value in zip(names, dates, values, strict=False):
|
|
50
48
|
# Apply format specification
|
|
@@ -5,8 +5,8 @@ Follows the the standardization for versioning and names.
|
|
|
5
5
|
|
|
6
6
|
import glob
|
|
7
7
|
import re
|
|
8
|
+
import warnings
|
|
8
9
|
|
|
9
|
-
import dapla
|
|
10
10
|
import pandas as pd
|
|
11
11
|
|
|
12
12
|
from ssb_konjunk import timestamp
|
|
@@ -39,7 +39,6 @@ def _structure_ssb_filepath(
|
|
|
39
39
|
undermappe: str | None = None,
|
|
40
40
|
version_number: int | None = None,
|
|
41
41
|
filetype: str = "parquet",
|
|
42
|
-
fs: dapla.gcs.GCSFileSystem | None = None,
|
|
43
42
|
) -> str:
|
|
44
43
|
"""Structure the name of the file to SSB-format and the path.
|
|
45
44
|
|
|
@@ -53,7 +52,6 @@ def _structure_ssb_filepath(
|
|
|
53
52
|
undermappe: Optional string for if you want folders betwen 'datatilstand' and file.
|
|
54
53
|
version_number: Optional int for reading specific file.
|
|
55
54
|
filetype: String with default 'parquet', specifies file type.
|
|
56
|
-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
|
|
57
55
|
|
|
58
56
|
Returns:
|
|
59
57
|
str: the full path to the file.
|
|
@@ -61,11 +59,7 @@ def _structure_ssb_filepath(
|
|
|
61
59
|
Raises:
|
|
62
60
|
ValueError: Raise if version number is not None or int.
|
|
63
61
|
"""
|
|
64
|
-
|
|
65
|
-
if fs is None:
|
|
66
|
-
bucket = _remove_edge_slashes(bucket, only_last=True)
|
|
67
|
-
else:
|
|
68
|
-
bucket = _remove_edge_slashes(bucket)
|
|
62
|
+
bucket = _remove_edge_slashes(bucket)
|
|
69
63
|
kortnavn = _remove_edge_slashes(kortnavn)
|
|
70
64
|
datatilstand = _remove_edge_slashes(datatilstand)
|
|
71
65
|
file_name = _remove_edge_slashes(file_name)
|
|
@@ -95,17 +89,12 @@ def _structure_ssb_filepath(
|
|
|
95
89
|
return file_path
|
|
96
90
|
|
|
97
91
|
|
|
98
|
-
def _get_files(
|
|
99
|
-
folder_path: str, filetype: str, fs: dapla.gcs.GCSFileSystem | None
|
|
100
|
-
) -> list[str]:
|
|
92
|
+
def _get_files(folder_path: str, filetype: str) -> list[str]:
|
|
101
93
|
"""Function to list files in a folder based on base name and timestamp."""
|
|
102
94
|
filenames = []
|
|
103
95
|
|
|
104
96
|
match_string = f"{folder_path}*"
|
|
105
|
-
|
|
106
|
-
filenames = fs.glob(match_string)
|
|
107
|
-
else:
|
|
108
|
-
filenames = glob.glob(match_string)
|
|
97
|
+
filenames = glob.glob(match_string)
|
|
109
98
|
|
|
110
99
|
# Only include files with the relevant file extension
|
|
111
100
|
filenames = [i for i in filenames if i.endswith(filetype)]
|
|
@@ -237,28 +226,16 @@ def _save_df(
|
|
|
237
226
|
df: pd.DataFrame,
|
|
238
227
|
file_path: str,
|
|
239
228
|
filetype: str,
|
|
240
|
-
fs: dapla.gcs.GCSFileSystem | None,
|
|
241
229
|
seperator: str,
|
|
242
230
|
encoding: str,
|
|
243
231
|
) -> None:
|
|
244
232
|
"""Do the actual saving, either as csv or parquet."""
|
|
245
233
|
# Save as parquet
|
|
246
234
|
if filetype == "parquet":
|
|
247
|
-
|
|
248
|
-
if fs:
|
|
249
|
-
with fs.open(file_path, "wb") as f:
|
|
250
|
-
df.to_parquet(f, index=False)
|
|
251
|
-
f.close()
|
|
252
|
-
else:
|
|
253
|
-
df.to_parquet(file_path, index=False)
|
|
235
|
+
df.to_parquet(file_path, index=False)
|
|
254
236
|
# Save as csv
|
|
255
237
|
elif filetype == "csv":
|
|
256
|
-
|
|
257
|
-
with fs.open(file_path, "wb") as f:
|
|
258
|
-
df.to_csv(f, sep=seperator, index=False, encoding=encoding)
|
|
259
|
-
f.close()
|
|
260
|
-
else:
|
|
261
|
-
df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
|
|
238
|
+
df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
|
|
262
239
|
# Save as jsonl
|
|
263
240
|
elif filetype == "jsonl":
|
|
264
241
|
df.to_json(file_path, orient="records", lines=True)
|
|
@@ -285,7 +262,6 @@ def write_ssb_file(
|
|
|
285
262
|
undermappe: str | None = None,
|
|
286
263
|
stable_version: bool = True,
|
|
287
264
|
filetype: str = "parquet",
|
|
288
|
-
fs: dapla.gcs.GCSFileSystem | None = None,
|
|
289
265
|
seperator: str = ";",
|
|
290
266
|
encoding: str = "latin1",
|
|
291
267
|
) -> None:
|
|
@@ -302,7 +278,6 @@ def write_ssb_file(
|
|
|
302
278
|
undermappe: Optional folder under 'datatilstand'.
|
|
303
279
|
stable_version: Bool for whether you should have checks in place in case of overwrite.
|
|
304
280
|
filetype: the filetype to save as. Default: 'parquet'.
|
|
305
|
-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
|
|
306
281
|
seperator: the seperator to use it filetype is csv. Default: ';'.
|
|
307
282
|
encoding: Encoding for file, base is latin1.
|
|
308
283
|
|
|
@@ -326,10 +301,9 @@ def write_ssb_file(
|
|
|
326
301
|
datatilstand=datatilstand,
|
|
327
302
|
file_name=file_name,
|
|
328
303
|
undermappe=undermappe,
|
|
329
|
-
fs=fs,
|
|
330
304
|
)
|
|
331
305
|
# Get list with the filenames, if several, ordered by the highest version number at last.
|
|
332
|
-
files = _get_files(file_path, filetype
|
|
306
|
+
files = _get_files(file_path, filetype)
|
|
333
307
|
# Find version number/decide whether to overwrite or make new version.
|
|
334
308
|
version_number = _find_version_number(files, stable_version)
|
|
335
309
|
|
|
@@ -338,7 +312,7 @@ def write_ssb_file(
|
|
|
338
312
|
file_path = file_path[:-1]
|
|
339
313
|
file_path = f"{file_path}_v{version_number}.{filetype}"
|
|
340
314
|
|
|
341
|
-
_save_df(df, file_path, filetype,
|
|
315
|
+
_save_df(df, file_path, filetype, seperator, encoding)
|
|
342
316
|
|
|
343
317
|
|
|
344
318
|
def read_ssb_file(
|
|
@@ -350,8 +324,8 @@ def read_ssb_file(
|
|
|
350
324
|
datatilstand: str = "",
|
|
351
325
|
undermappe: str | None = None,
|
|
352
326
|
filetype: str = "parquet",
|
|
327
|
+
columns: list[str] | None = None,
|
|
353
328
|
version_number: int | None = None,
|
|
354
|
-
fs: dapla.gcs.GCSFileSystem | None = None,
|
|
355
329
|
seperator: str = ";",
|
|
356
330
|
encoding: str = "latin1",
|
|
357
331
|
) -> pd.DataFrame | None:
|
|
@@ -371,7 +345,7 @@ def read_ssb_file(
|
|
|
371
345
|
undermappe: Optional folder under 'datatilstand'.
|
|
372
346
|
version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
|
|
373
347
|
filetype: the filetype to save as. Default: 'parquet'.
|
|
374
|
-
|
|
348
|
+
columns: Columns to read from the file. If None (default), all columns are read.
|
|
375
349
|
seperator: the seperator to use it filetype is csv. Default: ';'.
|
|
376
350
|
encoding: Encoding for file, base is latin1.
|
|
377
351
|
|
|
@@ -392,12 +366,11 @@ def read_ssb_file(
|
|
|
392
366
|
undermappe=undermappe,
|
|
393
367
|
version_number=version_number,
|
|
394
368
|
filetype=filetype,
|
|
395
|
-
fs=fs,
|
|
396
369
|
)
|
|
397
370
|
|
|
398
371
|
if not version_number:
|
|
399
372
|
# If version number not specified then list out versions.
|
|
400
|
-
files = _get_files(file_path, filetype
|
|
373
|
+
files = _get_files(file_path, filetype)
|
|
401
374
|
# If list is empty, no matching files of any version were found.
|
|
402
375
|
if not files:
|
|
403
376
|
raise FileNotFoundError(
|
|
@@ -408,18 +381,22 @@ def read_ssb_file(
|
|
|
408
381
|
|
|
409
382
|
# Different functions used for reading depending on the filetype.
|
|
410
383
|
if filetype == "csv":
|
|
411
|
-
|
|
412
|
-
# Samme som tidligere kan brukes til å lese alle filformater.
|
|
413
|
-
with fs.open(file_path, "r") as f:
|
|
414
|
-
df = pd.read_csv(f, sep=seperator, encoding=encoding)
|
|
415
|
-
f.close()
|
|
416
|
-
else:
|
|
417
|
-
df = pd.read_csv(file_path, sep=seperator, encoding=encoding)
|
|
384
|
+
df = pd.read_csv(file_path, sep=seperator, encoding=encoding, usecols=columns)
|
|
418
385
|
elif filetype == "parquet":
|
|
419
|
-
df = pd.read_parquet(file_path,
|
|
386
|
+
df = pd.read_parquet(file_path, columns=columns)
|
|
420
387
|
elif filetype == "jsonl":
|
|
421
|
-
|
|
388
|
+
if columns is not None:
|
|
389
|
+
warnings.warn(
|
|
390
|
+
f"Columns argumentet blir ignorert for {filetype} filer, hele filen vil bli lastet inn.",
|
|
391
|
+
stacklevel=2,
|
|
392
|
+
)
|
|
393
|
+
df = pd.read_json(file_path, lines=False)
|
|
422
394
|
elif filetype == "json":
|
|
395
|
+
if columns is not None:
|
|
396
|
+
warnings.warn(
|
|
397
|
+
f"Columns argumentet blir ignorert for {filetype} filer, hele filen vil bli lastet inn.",
|
|
398
|
+
stacklevel=2,
|
|
399
|
+
)
|
|
423
400
|
df = pd.read_json(file_path, lines=False)
|
|
424
401
|
# Returns pandas df.
|
|
425
402
|
return df
|
|
@@ -2,27 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
import xml.etree.ElementTree as ET
|
|
4
4
|
|
|
5
|
-
import dapla
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
def read_xml(xml_file: str, fs: dapla.gcs.GCSFileSystem | None = None) -> ET.Element:
|
|
6
|
+
def read_xml(xml_file: str) -> ET.Element:
|
|
9
7
|
"""Funtion to get xml root from disk.
|
|
10
8
|
|
|
11
9
|
Args:
|
|
12
10
|
xml_file: Strin value for xml filepath.
|
|
13
|
-
fs: filesystem
|
|
14
11
|
|
|
15
12
|
Returns:
|
|
16
13
|
ET.Element: Root of xml file.
|
|
17
14
|
"""
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
file.close()
|
|
22
|
-
else:
|
|
23
|
-
with open(xml_file) as file:
|
|
24
|
-
single_xml = file.read()
|
|
25
|
-
file.close()
|
|
15
|
+
with open(xml_file) as file:
|
|
16
|
+
single_xml = file.read()
|
|
17
|
+
file.close()
|
|
26
18
|
|
|
27
19
|
return ET.fromstring(single_xml)
|
|
28
20
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|