ssb-konjunk 1.0.1__tar.gz → 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/PKG-INFO +2 -3
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/pyproject.toml +2 -4
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/fame.py +5 -7
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/saving.py +15 -61
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/xml_handling.py +4 -12
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/LICENSE +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/README.md +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/__init__.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/__main__.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/_functions.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/data_formating.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/prompts.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/py.typed +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/rounding.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/statbank_format.py +0 -0
- {ssb_konjunk-1.0.1 → ssb_konjunk-2.0.1}/src/ssb_konjunk/timestamp.py +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ssb-konjunk
|
|
3
|
-
Version:
|
|
3
|
+
Version: 2.0.1
|
|
4
4
|
Summary: SSB Konjunk 422
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Johanne Saxegaard
|
|
7
|
-
Author-email: jox@ssb.no
|
|
7
|
+
Author-email: jox@ssb.no>, Halvor Steffenssen <hvr@ssb.no
|
|
8
8
|
Requires-Python: >=3.10,<4.0
|
|
9
9
|
Classifier: Development Status :: 1 - Planning
|
|
10
10
|
Classifier: License :: OSI Approved :: MIT License
|
|
@@ -14,7 +14,6 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3.12
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.13
|
|
16
16
|
Requires-Dist: click (>=8.0.1)
|
|
17
|
-
Requires-Dist: dapla-toolbelt (>=3.0.0)
|
|
18
17
|
Requires-Dist: pandas (>=2.2.0)
|
|
19
18
|
Requires-Dist: pandas-stubs (>=2.2.2.240807)
|
|
20
19
|
Requires-Dist: pendulum (>=3.0.0)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "ssb-konjunk"
|
|
3
|
-
version = "
|
|
3
|
+
version = "2.0.1"
|
|
4
4
|
description = "SSB Konjunk 422"
|
|
5
|
-
authors = ["Johanne Saxegaard <jox@ssb.no>"]
|
|
5
|
+
authors = ["Johanne Saxegaard <jox@ssb.no>, Halvor Steffenssen <hvr@ssb.no>"]
|
|
6
6
|
license = "MIT"
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
homepage = "https://github.com/statisticsnorway/ssb-konjunk"
|
|
@@ -18,7 +18,6 @@ python = ">=3.10, <4.0"
|
|
|
18
18
|
click = ">=8.0.1"
|
|
19
19
|
pandas = ">=2.2.0"
|
|
20
20
|
pendulum = ">=3.0.0"
|
|
21
|
-
dapla-toolbelt = ">=3.0.0"
|
|
22
21
|
pandas-stubs = ">=2.2.2.240807"
|
|
23
22
|
|
|
24
23
|
[tool.poetry.group.dev.dependencies]
|
|
@@ -35,7 +34,6 @@ pytest = ">=6.2.5"
|
|
|
35
34
|
sphinx = ">=6.2.1"
|
|
36
35
|
sphinx-autobuild = ">=2021.3.14"
|
|
37
36
|
sphinx-autodoc-typehints = ">=1.24.0"
|
|
38
|
-
sphinx-click = ">=3.0.2"
|
|
39
37
|
typeguard = ">=2.13.3"
|
|
40
38
|
xdoctest = { extras = ["colors"], version = ">=0.15.10" }
|
|
41
39
|
myst-parser = { version = ">=0.16.1" }
|
|
@@ -6,11 +6,9 @@ https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
# Importing external packages
|
|
9
|
-
|
|
10
|
-
from dapla import FileClient
|
|
9
|
+
from __future__ import annotations
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
fs = FileClient.get_gcs_file_system()
|
|
11
|
+
import pandas as pd
|
|
14
12
|
|
|
15
13
|
|
|
16
14
|
def change_date_format_fame(series: pd.Series[str]) -> pd.Series[str]:
|
|
@@ -34,7 +32,7 @@ def write_out_fame_format_txt(
|
|
|
34
32
|
names: pd.Series[str],
|
|
35
33
|
dates: pd.Series[str],
|
|
36
34
|
values: pd.Series[float],
|
|
37
|
-
|
|
35
|
+
path: str,
|
|
38
36
|
) -> None:
|
|
39
37
|
"""Function to write out txt file in fame format.
|
|
40
38
|
|
|
@@ -42,9 +40,9 @@ def write_out_fame_format_txt(
|
|
|
42
40
|
names: Pandas series containing name or type for value.
|
|
43
41
|
dates: Pandas series containing date for values.
|
|
44
42
|
values: Pandas series containing values.
|
|
45
|
-
|
|
43
|
+
path: String to output file.
|
|
46
44
|
"""
|
|
47
|
-
with
|
|
45
|
+
with open(path, "w") as f:
|
|
48
46
|
# Write data rows
|
|
49
47
|
for name, date, value in zip(names, dates, values, strict=False):
|
|
50
48
|
# Apply format specification
|
|
@@ -7,7 +7,6 @@ import glob
|
|
|
7
7
|
import re
|
|
8
8
|
import warnings
|
|
9
9
|
|
|
10
|
-
import dapla
|
|
11
10
|
import pandas as pd
|
|
12
11
|
|
|
13
12
|
from ssb_konjunk import timestamp
|
|
@@ -40,7 +39,6 @@ def _structure_ssb_filepath(
|
|
|
40
39
|
undermappe: str | None = None,
|
|
41
40
|
version_number: int | None = None,
|
|
42
41
|
filetype: str = "parquet",
|
|
43
|
-
fs: dapla.gcs.GCSFileSystem | None = None,
|
|
44
42
|
) -> str:
|
|
45
43
|
"""Structure the name of the file to SSB-format and the path.
|
|
46
44
|
|
|
@@ -54,7 +52,6 @@ def _structure_ssb_filepath(
|
|
|
54
52
|
undermappe: Optional string for if you want folders betwen 'datatilstand' and file.
|
|
55
53
|
version_number: Optional int for reading specific file.
|
|
56
54
|
filetype: String with default 'parquet', specifies file type.
|
|
57
|
-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
|
|
58
55
|
|
|
59
56
|
Returns:
|
|
60
57
|
str: the full path to the file.
|
|
@@ -62,11 +59,7 @@ def _structure_ssb_filepath(
|
|
|
62
59
|
Raises:
|
|
63
60
|
ValueError: Raise if version number is not None or int.
|
|
64
61
|
"""
|
|
65
|
-
|
|
66
|
-
if fs is None:
|
|
67
|
-
bucket = _remove_edge_slashes(bucket, only_last=True)
|
|
68
|
-
else:
|
|
69
|
-
bucket = _remove_edge_slashes(bucket)
|
|
62
|
+
bucket = _remove_edge_slashes(bucket)
|
|
70
63
|
kortnavn = _remove_edge_slashes(kortnavn)
|
|
71
64
|
datatilstand = _remove_edge_slashes(datatilstand)
|
|
72
65
|
file_name = _remove_edge_slashes(file_name)
|
|
@@ -96,17 +89,12 @@ def _structure_ssb_filepath(
|
|
|
96
89
|
return file_path
|
|
97
90
|
|
|
98
91
|
|
|
99
|
-
def _get_files(
|
|
100
|
-
folder_path: str, filetype: str, fs: dapla.gcs.GCSFileSystem | None
|
|
101
|
-
) -> list[str]:
|
|
92
|
+
def _get_files(folder_path: str, filetype: str) -> list[str]:
|
|
102
93
|
"""Function to list files in a folder based on base name and timestamp."""
|
|
103
94
|
filenames = []
|
|
104
95
|
|
|
105
96
|
match_string = f"{folder_path}*"
|
|
106
|
-
|
|
107
|
-
filenames = fs.glob(match_string)
|
|
108
|
-
else:
|
|
109
|
-
filenames = glob.glob(match_string)
|
|
97
|
+
filenames = glob.glob(match_string)
|
|
110
98
|
|
|
111
99
|
# Only include files with the relevant file extension
|
|
112
100
|
filenames = [i for i in filenames if i.endswith(filetype)]
|
|
@@ -143,23 +131,15 @@ def _find_version_number(files: list[str], stable_version: bool) -> str | None:
|
|
|
143
131
|
elif stable_version and existing_versions[-1] == "0":
|
|
144
132
|
return "1"
|
|
145
133
|
elif stable_version and existing_versions[-1] != "0":
|
|
146
|
-
|
|
147
|
-
|
|
134
|
+
make_new_version = input(
|
|
135
|
+
"Vil du lage en ny versjon (altså øke versjonsnummeret med en)? Bekreft med 'y'."
|
|
148
136
|
)
|
|
149
|
-
if
|
|
137
|
+
if make_new_version.lower() == "y":
|
|
150
138
|
version = existing_versions[-1]
|
|
151
|
-
version = version.split(".")[0]
|
|
139
|
+
version = int(version.split(".")[0]) + 1
|
|
152
140
|
return str(version)
|
|
153
141
|
else:
|
|
154
|
-
|
|
155
|
-
"Vil du lage en ny versjon (altså øke versjonsnummeret med en)? Bekreft med 'y'."
|
|
156
|
-
)
|
|
157
|
-
if make_new_version.lower() == "y":
|
|
158
|
-
version = existing_versions[-1]
|
|
159
|
-
version = int(version.split(".")[0]) + 1
|
|
160
|
-
return str(version)
|
|
161
|
-
else:
|
|
162
|
-
return None
|
|
142
|
+
return None
|
|
163
143
|
else:
|
|
164
144
|
raise ValueError("Noe gikk galt da rett versjonsnummer skulle settes.")
|
|
165
145
|
|
|
@@ -238,28 +218,16 @@ def _save_df(
|
|
|
238
218
|
df: pd.DataFrame,
|
|
239
219
|
file_path: str,
|
|
240
220
|
filetype: str,
|
|
241
|
-
fs: dapla.gcs.GCSFileSystem | None,
|
|
242
221
|
seperator: str,
|
|
243
222
|
encoding: str,
|
|
244
223
|
) -> None:
|
|
245
224
|
"""Do the actual saving, either as csv or parquet."""
|
|
246
225
|
# Save as parquet
|
|
247
226
|
if filetype == "parquet":
|
|
248
|
-
|
|
249
|
-
if fs:
|
|
250
|
-
with fs.open(file_path, "wb") as f:
|
|
251
|
-
df.to_parquet(f, index=False)
|
|
252
|
-
f.close()
|
|
253
|
-
else:
|
|
254
|
-
df.to_parquet(file_path, index=False)
|
|
227
|
+
df.to_parquet(file_path, index=False)
|
|
255
228
|
# Save as csv
|
|
256
229
|
elif filetype == "csv":
|
|
257
|
-
|
|
258
|
-
with fs.open(file_path, "wb") as f:
|
|
259
|
-
df.to_csv(f, sep=seperator, index=False, encoding=encoding)
|
|
260
|
-
f.close()
|
|
261
|
-
else:
|
|
262
|
-
df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
|
|
230
|
+
df.to_csv(file_path, sep=seperator, index=False, encoding=encoding)
|
|
263
231
|
# Save as jsonl
|
|
264
232
|
elif filetype == "jsonl":
|
|
265
233
|
df.to_json(file_path, orient="records", lines=True)
|
|
@@ -286,7 +254,6 @@ def write_ssb_file(
|
|
|
286
254
|
undermappe: str | None = None,
|
|
287
255
|
stable_version: bool = True,
|
|
288
256
|
filetype: str = "parquet",
|
|
289
|
-
fs: dapla.gcs.GCSFileSystem | None = None,
|
|
290
257
|
seperator: str = ";",
|
|
291
258
|
encoding: str = "latin1",
|
|
292
259
|
) -> None:
|
|
@@ -303,7 +270,6 @@ def write_ssb_file(
|
|
|
303
270
|
undermappe: Optional folder under 'datatilstand'.
|
|
304
271
|
stable_version: Bool for whether you should have checks in place in case of overwrite.
|
|
305
272
|
filetype: the filetype to save as. Default: 'parquet'.
|
|
306
|
-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
|
|
307
273
|
seperator: the seperator to use it filetype is csv. Default: ';'.
|
|
308
274
|
encoding: Encoding for file, base is latin1.
|
|
309
275
|
|
|
@@ -327,10 +293,9 @@ def write_ssb_file(
|
|
|
327
293
|
datatilstand=datatilstand,
|
|
328
294
|
file_name=file_name,
|
|
329
295
|
undermappe=undermappe,
|
|
330
|
-
fs=fs,
|
|
331
296
|
)
|
|
332
297
|
# Get list with the filenames, if several, ordered by the highest version number at last.
|
|
333
|
-
files = _get_files(file_path, filetype
|
|
298
|
+
files = _get_files(file_path, filetype)
|
|
334
299
|
# Find version number/decide whether to overwrite or make new version.
|
|
335
300
|
version_number = _find_version_number(files, stable_version)
|
|
336
301
|
|
|
@@ -339,7 +304,7 @@ def write_ssb_file(
|
|
|
339
304
|
file_path = file_path[:-1]
|
|
340
305
|
file_path = f"{file_path}_v{version_number}.{filetype}"
|
|
341
306
|
|
|
342
|
-
_save_df(df, file_path, filetype,
|
|
307
|
+
_save_df(df, file_path, filetype, seperator, encoding)
|
|
343
308
|
|
|
344
309
|
|
|
345
310
|
def read_ssb_file(
|
|
@@ -353,7 +318,6 @@ def read_ssb_file(
|
|
|
353
318
|
filetype: str = "parquet",
|
|
354
319
|
columns: list[str] | None = None,
|
|
355
320
|
version_number: int | None = None,
|
|
356
|
-
fs: dapla.gcs.GCSFileSystem | None = None,
|
|
357
321
|
seperator: str = ";",
|
|
358
322
|
encoding: str = "latin1",
|
|
359
323
|
) -> pd.DataFrame | None:
|
|
@@ -374,7 +338,6 @@ def read_ssb_file(
|
|
|
374
338
|
version_number: possibility to get another version, than the newest (i.e. highest version number). Default: np.nan.
|
|
375
339
|
filetype: the filetype to save as. Default: 'parquet'.
|
|
376
340
|
columns: Columns to read from the file. If None (default), all columns are read.
|
|
377
|
-
fs: the filesystem, pass with gsc Filesystem if Dapla. Default: None.
|
|
378
341
|
seperator: the seperator to use it filetype is csv. Default: ';'.
|
|
379
342
|
encoding: Encoding for file, base is latin1.
|
|
380
343
|
|
|
@@ -395,12 +358,11 @@ def read_ssb_file(
|
|
|
395
358
|
undermappe=undermappe,
|
|
396
359
|
version_number=version_number,
|
|
397
360
|
filetype=filetype,
|
|
398
|
-
fs=fs,
|
|
399
361
|
)
|
|
400
362
|
|
|
401
363
|
if not version_number:
|
|
402
364
|
# If version number not specified then list out versions.
|
|
403
|
-
files = _get_files(file_path, filetype
|
|
365
|
+
files = _get_files(file_path, filetype)
|
|
404
366
|
# If list is empty, no matching files of any version were found.
|
|
405
367
|
if not files:
|
|
406
368
|
raise FileNotFoundError(
|
|
@@ -411,17 +373,9 @@ def read_ssb_file(
|
|
|
411
373
|
|
|
412
374
|
# Different functions used for reading depending on the filetype.
|
|
413
375
|
if filetype == "csv":
|
|
414
|
-
|
|
415
|
-
# Samme som tidligere kan brukes til å lese alle filformater.
|
|
416
|
-
with fs.open(file_path, "r") as f:
|
|
417
|
-
df = pd.read_csv(f, sep=seperator, encoding=encoding, usecols=columns)
|
|
418
|
-
f.close()
|
|
419
|
-
else:
|
|
420
|
-
df = pd.read_csv(
|
|
421
|
-
file_path, sep=seperator, encoding=encoding, usecols=columns
|
|
422
|
-
)
|
|
376
|
+
df = pd.read_csv(file_path, sep=seperator, encoding=encoding, usecols=columns)
|
|
423
377
|
elif filetype == "parquet":
|
|
424
|
-
df = pd.read_parquet(file_path, columns=columns
|
|
378
|
+
df = pd.read_parquet(file_path, columns=columns)
|
|
425
379
|
elif filetype == "jsonl":
|
|
426
380
|
if columns is not None:
|
|
427
381
|
warnings.warn(
|
|
@@ -2,27 +2,19 @@
|
|
|
2
2
|
|
|
3
3
|
import xml.etree.ElementTree as ET
|
|
4
4
|
|
|
5
|
-
import dapla
|
|
6
5
|
|
|
7
|
-
|
|
8
|
-
def read_xml(xml_file: str, fs: dapla.gcs.GCSFileSystem | None = None) -> ET.Element:
|
|
6
|
+
def read_xml(xml_file: str) -> ET.Element:
|
|
9
7
|
"""Funtion to get xml root from disk.
|
|
10
8
|
|
|
11
9
|
Args:
|
|
12
10
|
xml_file: Strin value for xml filepath.
|
|
13
|
-
fs: filesystem
|
|
14
11
|
|
|
15
12
|
Returns:
|
|
16
13
|
ET.Element: Root of xml file.
|
|
17
14
|
"""
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
file.close()
|
|
22
|
-
else:
|
|
23
|
-
with open(xml_file) as file:
|
|
24
|
-
single_xml = file.read()
|
|
25
|
-
file.close()
|
|
15
|
+
with open(xml_file) as file:
|
|
16
|
+
single_xml = file.read()
|
|
17
|
+
file.close()
|
|
26
18
|
|
|
27
19
|
return ET.fromstring(single_xml)
|
|
28
20
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|