ssb-konjunk 1.0.1__tar.gz → 2.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/PKG-INFO +1 -2
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/pyproject.toml +1 -3
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/fame.py +5 -7
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/saving.py +10 -48
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/xml_handling.py +4 -12
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/LICENSE +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/README.md +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/__init__.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/__main__.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/_functions.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/data_formating.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/prompts.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/py.typed +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/rounding.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/statbank_format.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.0}/src/ssb_konjunk/timestamp.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ssb-konjunk
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.0
|
|
4
4
|
Summary: SSB Konjunk 422
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Johanne Saxegaard
|
|
@@ -14,7 +14,6 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
16
|
Requires-Dist: click (>=8.0.1)
|
|
17
|
-
Requires-Dist: dapla-toolbelt (>=3.0.0)
|
|
18
17
|
Requires-Dist: pandas (>=2.2.0)
|
|
19
18
|
Requires-Dist: pandas-stubs (>=2.2.2.240807)
|
|
20
19
|
Requires-Dist: pendulum (>=3.0.0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ssb-konjunk"
|
|
3
|
-
version = "
|
|
3
|
+
version = "2.0.0"
|
|
4
4
|
description = "SSB Konjunk 422"
|
|
5
5
|
authors = ["Johanne Saxegaard <jox@ssb.no>"]
|
|
6
6
|
license = "MIT"
|
|
@@ -18,7 +18,6 @@ python = ">=3.10, <4.0"
|
|
|
18
18
|
click = ">=8.0.1"
|
|
19
19
|
pandas = ">=2.2.0"
|
|
20
20
|
pendulum = ">=3.0.0"
|
|
21
|
-
dapla-toolbelt = ">=3.0.0"
|
|
22
21
|
pandas-stubs = ">=2.2.2.240807"
|
|
23
22
|
|
|
24
23
|
[tool.poetry.group.dev.dependencies]
|
|
@@ -35,7 +34,6 @@ pytest = ">=6.2.5"
|
|
|
35
34
|
sphinx = ">=6.2.1"
|
|
36
35
|
sphinx-autobuild = ">=2021.3.14"
|
|
37
36
|
sphinx-autodoc-typehints = ">=1.24.0"
|
|
38
|
-
sphinx-click = ">=3.0.2"
|
|
39
37
|
typeguard = ">=2.13.3"
|
|
40
38
|
xdoctest = { extras = ["colors"], version = ">=0.15.10" }
|
|
41
39
|
myst-parser = { version = ">=0.16.1" }
|
|
@@ -6,11 +6,9 @@ https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
# Importing external packages
|
|
9
|
-
|
|
10
|
-
from dapla import FileClient
|
|
9
|
+
from __future__ import annotations
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
fs = FileClient.get_gcs_file_system()
|
|
11
|
+
import pandas as pd
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
def change_date_format_fame(series: pd.Series[str]) -> pd.Series[str]:
|
|
@@ -34,7 +32,7 @@ def write_out_fame_format_txt(
|
|
|
34
32
|
names: pd.Series[str],
|
|
35
33
|
dates: pd.Series[str],
|
|
36
34
|
values: pd.Series[float],
|
|
37
|
-
|
|
35
|
+
path: str,
|
|
38
36
|
) -> None:
|
|
39
37
|
"""Function to write out txt file in fame format.
|
|
40
38
|
|
|
@@ -42,9 +40,9 @@ def write_out_fame_format_txt(
|
|
|
42
40
|
names: Pandas series containing name or type for value.
|
|
43
41
|
dates: Pandas series containing date for values.
|
|
44
42
|
values: Pandas series containing values.
|
|
45
|
-
|
|
43
|
+
path: String to output file.
|
|
46
44
|
"""
|
|
47
|
-
with
|
|
45
|
+
with open(path, "w") as f:
|
|
48
46
|
# Write data rows
|
|
49
47
|
for name, date, value in zip(names, dates, values, strict=False):
|
|
50
48
|
# Apply format specification
|
|
@@ -7,7 +7,6 @@ import glob
|
|
|
7
7
|
import re
|
|
8
8
|
import warnings
|
|
9
9
|
|
|
10
|
-
import dapla
|
|
11
10
|
import pandas as pd
|
|
12
11
|
|
|
13
12
|
from ssb_konjunk import timestamp
|
|
@@ -40,7 +39,6 @@ def _structure_ssb_filepath(
|
|
|
40
39
|
undermappe: str | None = None,
|
|
41
40
|
version_number: int | None = None,
|
|
42
41
|
filetype: str = "parquet",
|
|
43
|
-
fs: dapla.gcs.GCSFileSystem | None = None,
|
|
44
42
|
) -> str:
|
|
45
43
|
"""Structure the name of the file to SSB-format and the path.
|
|
46
44
|
|
|
@@ -54,7 +52,6 @@ def _structure_ssb_filepath(
|
|
|
54
52
|
undermappe: Optional string for if you want folders betwen 'datatilstand' and file.
|
|
55
53
|
version_number: Optional int for reading specific file.
|
|
56
54
|
filetype: String with default 'parquet', specifies file type.
|
|
57
|
-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
|
|
58
55
|
|
|
59
56
|
Returns:
|
|
60
57
|
str: the full path to the file.
|
|
@@ -62,11 +59,7 @@ def _structure_ssb_filepath(
|
|
|
62
59
|
Raises:
|
|
63
60
|
ValueError: Raise if version number is not None or int.
|
|
64
61
|
"""
|
|
65
|
-
|
|
66
|
-
if fs is None:
|
|
67
|
-
bucket = _remove_edge_slashes(bucket, only_last=True)
|
|
68
|
-
else:
|
|
69
|
-
bucket = _remove_edge_slashes(bucket)
|
|
62
|
+
bucket = _remove_edge_slashes(bucket)
|
|
70
63
|
kortnavn = _remove_edge_slashes(kortnavn)
|
|
71
64
|
datatilstand = _remove_edge_slashes(datatilstand)
|
|
72
65
|
file_name = _remove_edge_slashes(file_name)
|
|
@@ -96,17 +89,12 @@ def _structure_ssb_filepath(
|
|
|
96
89
|
return file_path
|
|
97
90
|
|
|
98
91
|
|
|
99
|
-
def _get_files(
|
|
100
|
-
folder_path: str, filetype: str, fs: dapla.gcs.GCSFileSystem | None
|
|
101
|
-
) -> list[str]:
|
|
92
|
+
def _get_files(folder_path: str, filetype: str) -> list[str]:
|
|
102
93
|
"""Function to list files in a folder based on base name and timestamp."""
|
|
103
94
|
filenames = []
|
|
104
95
|
|
|
105
96
|
match_string = f"{folder_path}*"
|
|
106
|
-
|
|
107
|
-
filenames = fs.glob(match_string)
|
|
108
|
-
else:
|
|
109
|
-
filenames = glob.glob(match_string)
|
|
97
|
+
filenames = glob.glob(match_string)
|
|
110
98
|
|
|
111
99
|
# Only include files with the relevant file extension
|
|
112
100
|
filenames = [i for i in filenames if i.endswith(filetype)]
|
|
@@ -238,28 +226,16 @@ def _save_df(
|
|
|
238
226
|
df: pd.DataFrame,
|
|
239
227
|
file_path: str,
|
|
240
228
|
filetype: str,
|
|
241
|
-
fs: dapla.gcs.GCSFileSystem | None,
|
|
242
229
|
seperator: str,
|
|
243
230
|
encoding: str,
|
|
244
231
|
) -> None:
|
|
245
232
|
"""Do the actual saving, either as csv or parquet."""
|
|
246
233
|
# Save as parquet
|
|
247
234
|
if filetype == "parquet":
|
|
248
|
-
|
|
249
|
-
if fs:
|
|
250
|
-
with fs.open(file_path, "wb") as f:
|
|
251
|
-
df.to_parquet(f, index=False)
|
|
252
|
-
f.close()
|
|
253
|
-
else:
|
|
254
|
-
df.to_parquet(file_path, index=False)
|
|
235
|
+
df.to_parquet(file_path, index=False)
|
|
255
236
|
# Save as csv
|
|
256
237
|
elif filetype == "csv":
|
|
257
|
-
|
|
258
|
-
with fs.open(file_path, "wb") as f:
|
|
259
|
-
df.to_csv(f, sep=seperator, index=False, encoding=encoding)
|
|
260
|
-
f.close()
|
|
261
|
-
else:
|
|
262
|
-
df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
|
|
238
|
+
df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
|
|
263
239
|
# Save as jsonl
|
|
264
240
|
elif filetype == "jsonl":
|
|
265
241
|
df.to_json(file_path, orient="records", lines=True)
|
|
@@ -286,7 +262,6 @@ def write_ssb_file(
|
|
|
286
262
|
undermappe: str | None = None,
|
|
287
263
|
stable_version: bool = True,
|
|
288
264
|
filetype: str = "parquet",
|
|
289
|
-
fs: dapla.gcs.GCSFileSystem | None = None,
|
|
290
265
|
seperator: str = ";",
|
|
291
266
|
encoding: str = "latin1",
|
|
292
267
|
) -> None:
|
|
@@ -303,7 +278,6 @@ def write_ssb_file(
|
|
|
303
278
|
undermappe: Optional folder under 'datatilstand'.
|
|
304
279
|
stable_version: Bool for whether you should have checks in place in case of overwrite.
|
|
305
280
|
filetype: the filetype to save as. Default: 'parquet'.
|
|
306
|
-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
|
|
307
281
|
seperator: the seperator to use it filetype is csv. Default: ';'.
|
|
308
282
|
encoding: Encoding for file, base is latin1.
|
|
309
283
|
|
|
@@ -327,10 +301,9 @@ def write_ssb_file(
|
|
|
327
301
|
datatilstand=datatilstand,
|
|
328
302
|
file_name=file_name,
|
|
329
303
|
undermappe=undermappe,
|
|
330
|
-
fs=fs,
|
|
331
304
|
)
|
|
332
305
|
# Get list with the filenames, if several, ordered by the highest version number at last.
|
|
333
|
-
files = _get_files(file_path, filetype
|
|
306
|
+
files = _get_files(file_path, filetype)
|
|
334
307
|
# Find version number/decide whether to overwrite or make new version.
|
|
335
308
|
version_number = _find_version_number(files, stable_version)
|
|
336
309
|
|
|
@@ -339,7 +312,7 @@ def write_ssb_file(
|
|
|
339
312
|
file_path = file_path[:-1]
|
|
340
313
|
file_path = f"{file_path}_v{version_number}.{filetype}"
|
|
341
314
|
|
|
342
|
-
_save_df(df, file_path, filetype,
|
|
315
|
+
_save_df(df, file_path, filetype, seperator, encoding)
|
|
343
316
|
|
|
344
317
|
|
|
345
318
|
def read_ssb_file(
|
|
@@ -353,7 +326,6 @@ def read_ssb_file(
|
|
|
353
326
|
filetype: str = "parquet",
|
|
354
327
|
columns: list[str] | None = None,
|
|
355
328
|
version_number: int | None = None,
|
|
356
|
-
fs: dapla.gcs.GCSFileSystem | None = None,
|
|
357
329
|
seperator: str = ";",
|
|
358
330
|
encoding: str = "latin1",
|
|
359
331
|
) -> pd.DataFrame | None:
|
|
@@ -374,7 +346,6 @@ def read_ssb_file(
|
|
|
374
346
|
version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
|
|
375
347
|
filetype: the filetype to save as. Default: 'parquet'.
|
|
376
348
|
columns: Columns to read from the file. If None (default), all columns are read.
|
|
377
|
-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
|
|
378
349
|
seperator: the seperator to use it filetype is csv. Default: ';'.
|
|
379
350
|
encoding: Encoding for file, base is latin1.
|
|
380
351
|
|
|
@@ -395,12 +366,11 @@ def read_ssb_file(
|
|
|
395
366
|
undermappe=undermappe,
|
|
396
367
|
version_number=version_number,
|
|
397
368
|
filetype=filetype,
|
|
398
|
-
fs=fs,
|
|
399
369
|
)
|
|
400
370
|
|
|
401
371
|
if not version_number:
|
|
402
372
|
# If version number not specified then list out versions.
|
|
403
|
-
files = _get_files(file_path, filetype
|
|
373
|
+
files = _get_files(file_path, filetype)
|
|
404
374
|
# If list is empty, no matching files of any version were found.
|
|
405
375
|
if not files:
|
|
406
376
|
raise FileNotFoundError(
|
|
@@ -411,17 +381,9 @@ def read_ssb_file(
|
|
|
411
381
|
|
|
412
382
|
# Different functions used for reading depending on the filetype.
|
|
413
383
|
if filetype == "csv":
|
|
414
|
-
|
|
415
|
-
# Samme som tidligere kan brukes til å lese alle filformater.
|
|
416
|
-
with fs.open(file_path, "r") as f:
|
|
417
|
-
df = pd.read_csv(f, sep=seperator, encoding=encoding, usecols=columns)
|
|
418
|
-
f.close()
|
|
419
|
-
else:
|
|
420
|
-
df = pd.read_csv(
|
|
421
|
-
file_path, sep=seperator, encoding=encoding, usecols=columns
|
|
422
|
-
)
|
|
384
|
+
df = pd.read_csv(file_path, sep=seperator, encoding=encoding, usecols=columns)
|
|
423
385
|
elif filetype == "parquet":
|
|
424
|
-
df = pd.read_parquet(file_path, columns=columns
|
|
386
|
+
df = pd.read_parquet(file_path, columns=columns)
|
|
425
387
|
elif filetype == "jsonl":
|
|
426
388
|
if columns is not None:
|
|
427
389
|
warnings.warn(
|
|
@@ -2,27 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
import xml.etree.ElementTree as ET
|
|
4
4
|
|
|
5
|
-
import dapla
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
def read_xml(xml_file: str, fs: dapla.gcs.GCSFileSystem | None = None) -> ET.Element:
|
|
6
|
+
def read_xml(xml_file: str) -> ET.Element:
|
|
9
7
|
"""Funtion to get xml root from disk.
|
|
10
8
|
|
|
11
9
|
Args:
|
|
12
10
|
xml_file: Strin value for xml filepath.
|
|
13
|
-
fs: filesystem
|
|
14
11
|
|
|
15
12
|
Returns:
|
|
16
13
|
ET.Element: Root of xml file.
|
|
17
14
|
"""
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
file.close()
|
|
22
|
-
else:
|
|
23
|
-
with open(xml_file) as file:
|
|
24
|
-
single_xml = file.read()
|
|
25
|
-
file.close()
|
|
15
|
+
with open(xml_file) as file:
|
|
16
|
+
single_xml = file.read()
|
|
17
|
+
file.close()
|
|
26
18
|
|
|
27
19
|
return ET.fromstring(single_xml)
|
|
28
20
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|