ssb-nudb-use 2025.12.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ssb_nudb_use-2025.12.4/LICENSE +21 -0
- ssb_nudb_use-2025.12.4/PKG-INFO +164 -0
- ssb_nudb_use-2025.12.4/README.md +134 -0
- ssb_nudb_use-2025.12.4/pyproject.toml +162 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/.gitignore +3 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/CLEANUP.md +30 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/__init__.py +48 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/exceptions/__init__.py +1 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/exceptions/exception_classes.py +13 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/exceptions/groups.py +32 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/__init__.py +21 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/external_apis/brreg_api.py +211 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/external_apis/skolereg.py +25 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_config/__init__.py +26 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_config/find_var_missing.py +106 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_config/get_variable_info.py +38 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_config/map_get_dtypes.py +147 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_config/variable_names.py +298 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_klass/__init__.py +6 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_klass/codes.py +227 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_klass/correspondence.py +44 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_klass/klass_utils.py +126 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/metadata/nudb_klass/variants.py +123 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/nudb_logger.py +236 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/paths/__init__.py +6 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/paths/latest.py +91 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/paths/path_parse.py +52 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/paths/path_utils.py +33 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/IDEAS.md +8 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/__init__.py +5 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/check_drop_cols.py +84 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/colored_views.py +81 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/duplicated_columns.py +32 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/missing.py +202 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/outdated_variables.py +46 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/__init__.py +11 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/gro_elevstatus.py +63 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/grunnskolepoeng.py +57 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/kommune.py +119 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/land.py +72 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/nus2000.py +223 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/run_all.py +74 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/sn07.py +61 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/snr_fnr.py +41 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/unique_per_person.py +81 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/utils.py +46 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/specific_variables/vg_fullfoertkode_detaljert.py +80 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/suite.py +67 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/thresholds.py +69 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/values.py +52 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/quality/widths.py +85 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/utils/packages.py +115 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/__init__.py +5 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/checks.py +264 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/cleanup.py +42 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/derive/__init__.py +41 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/derive/derive_decorator.py +217 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/derive/nus_correspondences.py +36 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/derive/nus_variants.py +129 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/derive/uh_univ_eller_hogskole.py +51 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/derive/utd_skoleaar.py +29 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/specific_vars/__init__.py +1 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/var_utils/duped_columns.py +19 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/var_utils/find_var.py +25 -0
- ssb_nudb_use-2025.12.4/src/nudb_use/variables/var_utils/pick_id_col.py +34 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright © 2025 Statistics Norway
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ssb-nudb-use
|
|
3
|
+
Version: 2025.12.4
|
|
4
|
+
Summary: NUDB Use - is a usage-package for the Norwegian National Education Database cloud-data. Both for data-consumers and data-deliverers
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Author: Carl F. Corneil
|
|
8
|
+
Author-email: cfc@ssb.no
|
|
9
|
+
Maintainer: Statistics Norway, Education statistics Department (360)
|
|
10
|
+
Requires-Python: >=3.11
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Requires-Dist: brreg (>=1.3.0)
|
|
18
|
+
Requires-Dist: pandas (>=2.3.3)
|
|
19
|
+
Requires-Dist: pydantic (>=2.12.4)
|
|
20
|
+
Requires-Dist: requests (>=2.32.5)
|
|
21
|
+
Requires-Dist: ssb-fagfunksjoner (>=1.1.2) ; python_version >= "3.11" and python_version < "4.0"
|
|
22
|
+
Requires-Dist: ssb-klass-python (>=1.0.4)
|
|
23
|
+
Requires-Dist: ssb-nudb-config (>=2025.12.6)
|
|
24
|
+
Project-URL: Changelog, https://github.com/statisticsnorway/ssb-nudb-use/releases
|
|
25
|
+
Project-URL: Documentation, https://statisticsnorway.github.io/ssb-nudb-use
|
|
26
|
+
Project-URL: Homepage, https://github.com/statisticsnorway/ssb-nudb-use
|
|
27
|
+
Project-URL: Repository, https://github.com/statisticsnorway/ssb-nudb-use
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# SSB-NUDB-USE
|
|
31
|
+
|
|
32
|
+
[][pypi status]
|
|
33
|
+
[][pypi status]
|
|
34
|
+
[][pypi status]
|
|
35
|
+
[][license]
|
|
36
|
+
|
|
37
|
+
[][documentation]
|
|
38
|
+
[][tests]
|
|
39
|
+
[][sonarcov]
|
|
40
|
+
[][sonarquality]
|
|
41
|
+
|
|
42
|
+
[][pre-commit]
|
|
43
|
+
[][black]
|
|
44
|
+
[](https://github.com/astral-sh/ruff)
|
|
45
|
+
[][poetry]
|
|
46
|
+
|
|
47
|
+
[pypi status]: https://pypi.org/project/ssb-nudb-use/
|
|
48
|
+
[documentation]: https://statisticsnorway.github.io/ssb-nudb-use
|
|
49
|
+
[tests]: https://github.com/statisticsnorway/ssb-nudb-use/actions?workflow=Tests
|
|
50
|
+
[sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-nudb-use
|
|
51
|
+
[sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-nudb-use
|
|
52
|
+
[pre-commit]: https://github.com/pre-commit/pre-commit
|
|
53
|
+
[black]: https://github.com/psf/black
|
|
54
|
+
[poetry]: https://python-poetry.org/
|
|
55
|
+
|
|
56
|
+
# Description
|
|
57
|
+
|
|
58
|
+
NUDB is the National Education Database of Norway. It is operated by Statsitics Norway - section 360.
|
|
59
|
+
This package is the main "usage-package" for those seeking to use NUDB-data, or deliver data to NUDB.
|
|
60
|
+
|
|
61
|
+
NUDBs data is kept as parquet files in GCP, and you will need seperate access to this data to utilize this package.
|
|
62
|
+
Some features in this package might require access to other data, like BRREG (Brønnøysundregisteret), BOF (befolkningsregisteret), VOF (virksomhetsregisteret) etc.
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
## Installation
|
|
66
|
+
|
|
67
|
+
You can install _SSB Nudb Use_ via [poetry] from [PyPI]:
|
|
68
|
+
|
|
69
|
+
```console
|
|
70
|
+
poetry add ssb-nudb-use
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Dependencies
|
|
74
|
+
|
|
75
|
+
This package depends on the package "ssb-nudb-config", which contains metadata, but also points to content in other metadatasystems like Vardef, Klass and Datadoc.
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
## Usage
|
|
79
|
+
|
|
80
|
+
Please see the [Reference Guide] for details.
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
### Usage for extraction (data from NUDB)
|
|
84
|
+
|
|
85
|
+
Find the latest of each file shared.
|
|
86
|
+
```python
|
|
87
|
+
from nudb_use import latest_shared_paths
|
|
88
|
+
latest_shared_paths()
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Get the periods out of any paths following the SSB-naming standard.
|
|
92
|
+
```python
|
|
93
|
+
from nudb_use import get_periods_from_path
|
|
94
|
+
get_periods_from_path(path)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Deriving variables not stored in data, is done by the derive module:
|
|
98
|
+
```python
|
|
99
|
+
from nudb_use import derive
|
|
100
|
+
df = derive.utd_skoleaar_slutt(df)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
### Usage for delivery (data to NUDB)
|
|
105
|
+
|
|
106
|
+
We have renamed a lot of our variables transitioning from the old on-prem systems. If you are looking for the new or old names of variables, you can use the find_var or find_vars functions:
|
|
107
|
+
```python
|
|
108
|
+
from nudb_use import find_vars
|
|
109
|
+
find_vars(["snr", "sosbak"])
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
If you want to update the column names you have in a pandas dataframe, to the new column names - there's a function for that:
|
|
113
|
+
```python
|
|
114
|
+
from nudb_use import update_colnames
|
|
115
|
+
df = update_colnames(df)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
After renaming, you can get the pandas dtypes the columns should have with get_dtypes:
|
|
119
|
+
```python
|
|
120
|
+
from nudb_use import get_dtypes
|
|
121
|
+
dtypes = get_dtypes(df)
|
|
122
|
+
df = df.astype(dtypes)
|
|
123
|
+
```
|
|
124
|
+
If you are delivering to NUDB, we want you to run our quality suite before sharing the data with us:
|
|
125
|
+
```python
|
|
126
|
+
from nudb_use import run_quality_suite
|
|
127
|
+
run_quality_suite(df, "avslutta")
|
|
128
|
+
```
|
|
129
|
+
Data about your delivery, like "avslutta", should first have its data entered into, and released in the ssb-nudb-config package before available in this function. Contact the NUDB-team to define a new delivery.
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
## Contributing
|
|
135
|
+
|
|
136
|
+
Contributions are very welcome.
|
|
137
|
+
To learn more, see the [Contributor Guide].
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
Distributed under the terms of the [MIT license][license],
|
|
142
|
+
_SSB Nudb Use_ is free and open source software.
|
|
143
|
+
|
|
144
|
+
## Issues
|
|
145
|
+
|
|
146
|
+
If you encounter any problems,
|
|
147
|
+
please [file an issue] along with a detailed description.
|
|
148
|
+
|
|
149
|
+
## Credits
|
|
150
|
+
|
|
151
|
+
This project was generated from [Statistics Norway]'s [SSB PyPI Template].
|
|
152
|
+
|
|
153
|
+
[statistics norway]: https://www.ssb.no/en
|
|
154
|
+
[pypi]: https://pypi.org/
|
|
155
|
+
[ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
|
|
156
|
+
[file an issue]: https://github.com/statisticsnorway/ssb-nudb-use/issues
|
|
157
|
+
[pip]: https://pip.pypa.io/
|
|
158
|
+
|
|
159
|
+
<!-- github-only -->
|
|
160
|
+
|
|
161
|
+
[license]: https://github.com/statisticsnorway/ssb-nudb-use/blob/main/LICENSE
|
|
162
|
+
[contributor guide]: https://github.com/statisticsnorway/ssb-nudb-use/blob/main/CONTRIBUTING.md
|
|
163
|
+
[reference guide]: https://statisticsnorway.github.io/ssb-nudb-use/reference.html
|
|
164
|
+
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# SSB-NUDB-USE
|
|
2
|
+
|
|
3
|
+
[][pypi status]
|
|
4
|
+
[][pypi status]
|
|
5
|
+
[][pypi status]
|
|
6
|
+
[][license]
|
|
7
|
+
|
|
8
|
+
[][documentation]
|
|
9
|
+
[][tests]
|
|
10
|
+
[][sonarcov]
|
|
11
|
+
[][sonarquality]
|
|
12
|
+
|
|
13
|
+
[][pre-commit]
|
|
14
|
+
[][black]
|
|
15
|
+
[](https://github.com/astral-sh/ruff)
|
|
16
|
+
[][poetry]
|
|
17
|
+
|
|
18
|
+
[pypi status]: https://pypi.org/project/ssb-nudb-use/
|
|
19
|
+
[documentation]: https://statisticsnorway.github.io/ssb-nudb-use
|
|
20
|
+
[tests]: https://github.com/statisticsnorway/ssb-nudb-use/actions?workflow=Tests
|
|
21
|
+
[sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-nudb-use
|
|
22
|
+
[sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-nudb-use
|
|
23
|
+
[pre-commit]: https://github.com/pre-commit/pre-commit
|
|
24
|
+
[black]: https://github.com/psf/black
|
|
25
|
+
[poetry]: https://python-poetry.org/
|
|
26
|
+
|
|
27
|
+
# Description
|
|
28
|
+
|
|
29
|
+
NUDB is the National Education Database of Norway. It is operated by Statsitics Norway - section 360.
|
|
30
|
+
This package is the main "usage-package" for those seeking to use NUDB-data, or deliver data to NUDB.
|
|
31
|
+
|
|
32
|
+
NUDBs data is kept as parquet files in GCP, and you will need seperate access to this data to utilize this package.
|
|
33
|
+
Some features in this package might require access to other data, like BRREG (Brønnøysundregisteret), BOF (befolkningsregisteret), VOF (virksomhetsregisteret) etc.
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
You can install _SSB Nudb Use_ via [poetry] from [PyPI]:
|
|
39
|
+
|
|
40
|
+
```console
|
|
41
|
+
poetry add ssb-nudb-use
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Dependencies
|
|
45
|
+
|
|
46
|
+
This package depends on the package "ssb-nudb-config", which contains metadata, but also points to content in other metadatasystems like Vardef, Klass and Datadoc.
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
## Usage
|
|
50
|
+
|
|
51
|
+
Please see the [Reference Guide] for details.
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
### Usage for extraction (data from NUDB)
|
|
55
|
+
|
|
56
|
+
Find the latest of each file shared.
|
|
57
|
+
```python
|
|
58
|
+
from nudb_use import latest_shared_paths
|
|
59
|
+
latest_shared_paths()
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Get the periods out of any paths following the SSB-naming standard.
|
|
63
|
+
```python
|
|
64
|
+
from nudb_use import get_periods_from_path
|
|
65
|
+
get_periods_from_path(path)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Deriving variables not stored in data, is done by the derive module:
|
|
69
|
+
```python
|
|
70
|
+
from nudb_use import derive
|
|
71
|
+
df = derive.utd_skoleaar_slutt(df)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
### Usage for delivery (data to NUDB)
|
|
76
|
+
|
|
77
|
+
We have renamed a lot of our variables transitioning from the old on-prem systems. If you are looking for the new or old names of variables, you can use the find_var or find_vars functions:
|
|
78
|
+
```python
|
|
79
|
+
from nudb_use import find_vars
|
|
80
|
+
find_vars(["snr", "sosbak"])
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
If you want to update the column names you have in a pandas dataframe, to the new column names - there's a function for that:
|
|
84
|
+
```python
|
|
85
|
+
from nudb_use import update_colnames
|
|
86
|
+
df = update_colnames(df)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
After renaming, you can get the pandas dtypes the columns should have with get_dtypes:
|
|
90
|
+
```python
|
|
91
|
+
from nudb_use import get_dtypes
|
|
92
|
+
dtypes = get_dtypes(df)
|
|
93
|
+
df = df.astype(dtypes)
|
|
94
|
+
```
|
|
95
|
+
If you are delivering to NUDB, we want you to run our quality suite before sharing the data with us:
|
|
96
|
+
```python
|
|
97
|
+
from nudb_use import run_quality_suite
|
|
98
|
+
run_quality_suite(df, "avslutta")
|
|
99
|
+
```
|
|
100
|
+
Data about your delivery, like "avslutta", should first have its data entered into, and released in the ssb-nudb-config package before available in this function. Contact the NUDB-team to define a new delivery.
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
## Contributing
|
|
106
|
+
|
|
107
|
+
Contributions are very welcome.
|
|
108
|
+
To learn more, see the [Contributor Guide].
|
|
109
|
+
|
|
110
|
+
## License
|
|
111
|
+
|
|
112
|
+
Distributed under the terms of the [MIT license][license],
|
|
113
|
+
_SSB Nudb Use_ is free and open source software.
|
|
114
|
+
|
|
115
|
+
## Issues
|
|
116
|
+
|
|
117
|
+
If you encounter any problems,
|
|
118
|
+
please [file an issue] along with a detailed description.
|
|
119
|
+
|
|
120
|
+
## Credits
|
|
121
|
+
|
|
122
|
+
This project was generated from [Statistics Norway]'s [SSB PyPI Template].
|
|
123
|
+
|
|
124
|
+
[statistics norway]: https://www.ssb.no/en
|
|
125
|
+
[pypi]: https://pypi.org/
|
|
126
|
+
[ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
|
|
127
|
+
[file an issue]: https://github.com/statisticsnorway/ssb-nudb-use/issues
|
|
128
|
+
[pip]: https://pip.pypa.io/
|
|
129
|
+
|
|
130
|
+
<!-- github-only -->
|
|
131
|
+
|
|
132
|
+
[license]: https://github.com/statisticsnorway/ssb-nudb-use/blob/main/LICENSE
|
|
133
|
+
[contributor guide]: https://github.com/statisticsnorway/ssb-nudb-use/blob/main/CONTRIBUTING.md
|
|
134
|
+
[reference guide]: https://statisticsnorway.github.io/ssb-nudb-use/reference.html
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "ssb-nudb-use"
|
|
3
|
+
version = "2025.12.4" # Year.Month.Patch - So the last number is not day of month, but patch number within month
|
|
4
|
+
description = "NUDB Use - is a usage-package for the Norwegian National Education Database cloud-data. Both for data-consumers and data-deliverers"
|
|
5
|
+
authors = [{ name = "Carl F. Corneil", email = "cfc@ssb.no" }, { name = "Kjell Slupphaug", email = "pph@ssb.no" }, { name = "Markus Storeide", email = "rku@ssb.no" }]
|
|
6
|
+
maintainers = [{ name = "Statistics Norway, Education statistics Department (360)" }]
|
|
7
|
+
license = "MIT"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
dynamic = ["classifiers"]
|
|
11
|
+
dependencies = [
|
|
12
|
+
"ssb-klass-python >=1.0.4",
|
|
13
|
+
"pandas >=2.3.3",
|
|
14
|
+
"ssb-fagfunksjoner >=1.1.2; python_version >= '3.11' and python_version < '4.0'",
|
|
15
|
+
"pydantic >=2.12.4",
|
|
16
|
+
"brreg >=1.3.0",
|
|
17
|
+
"requests >=2.32.5",
|
|
18
|
+
"ssb-nudb-config >=2025.12.6"
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[project.urls]
|
|
22
|
+
homepage = "https://github.com/statisticsnorway/ssb-nudb-use"
|
|
23
|
+
repository = "https://github.com/statisticsnorway/ssb-nudb-use"
|
|
24
|
+
documentation = "https://statisticsnorway.github.io/ssb-nudb-use"
|
|
25
|
+
Changelog = "https://github.com/statisticsnorway/ssb-nudb-use/releases"
|
|
26
|
+
|
|
27
|
+
[tool.poetry]
|
|
28
|
+
classifiers = ["Development Status :: 3 - Alpha"]
|
|
29
|
+
requires-poetry = ">=2.0"
|
|
30
|
+
packages = [{ include = "nudb_use", from = "src" }]
|
|
31
|
+
|
|
32
|
+
[tool.poetry.group.dev.dependencies]
|
|
33
|
+
pygments = ">=2.10.0"
|
|
34
|
+
black = { extras = ["jupyter"], version = ">=23.1.0" }
|
|
35
|
+
coverage = { extras = ["toml"], version = ">=6.2" }
|
|
36
|
+
furo = ">=2021.11.12"
|
|
37
|
+
mypy = ">=0.930"
|
|
38
|
+
pre-commit = ">=2.16.0"
|
|
39
|
+
pre-commit-hooks = ">=4.1.0"
|
|
40
|
+
pydoclint = { version = ">=0.8.3" }
|
|
41
|
+
ruff = ">=0.0.284"
|
|
42
|
+
pytest = ">=6.2.5"
|
|
43
|
+
sphinx = ">=6.2.1"
|
|
44
|
+
sphinx-autobuild = ">=2021.3.14"
|
|
45
|
+
sphinx-autodoc-typehints = ">=1.24.0"
|
|
46
|
+
sphinx-click = ">=3.0.2"
|
|
47
|
+
typeguard = ">=2.13.3"
|
|
48
|
+
xdoctest = { extras = ["colors"], version = ">=0.15.10" }
|
|
49
|
+
myst-parser = { version = ">=0.16.1" }
|
|
50
|
+
deptry = {version=">=0.23.0", python = ">=3.10, <4.0"}
|
|
51
|
+
colorama = ">=0.4.6"
|
|
52
|
+
|
|
53
|
+
[tool.poetry.requires-plugins]
|
|
54
|
+
poetry-plugin-export = ">=1.9"
|
|
55
|
+
|
|
56
|
+
[tool.pytest.ini_options]
|
|
57
|
+
pythonpath = ["src"]
|
|
58
|
+
|
|
59
|
+
[tool.coverage.paths]
|
|
60
|
+
source = ["src", "*/site-packages"]
|
|
61
|
+
tests = ["tests", "*/tests"]
|
|
62
|
+
|
|
63
|
+
[tool.coverage.run]
|
|
64
|
+
branch = true
|
|
65
|
+
source = ["nudb_use", "tests"]
|
|
66
|
+
relative_files = true
|
|
67
|
+
|
|
68
|
+
[tool.coverage.report]
|
|
69
|
+
show_missing = true
|
|
70
|
+
fail_under = 50
|
|
71
|
+
|
|
72
|
+
[tool.deptry.per_rule_ignores]
|
|
73
|
+
DEP001 = ["nox", "nox_poetry"] # packages available by default
|
|
74
|
+
|
|
75
|
+
[tool.mypy]
|
|
76
|
+
strict = true
|
|
77
|
+
warn_unreachable = true
|
|
78
|
+
pretty = true
|
|
79
|
+
show_column_numbers = true
|
|
80
|
+
show_error_context = true
|
|
81
|
+
|
|
82
|
+
[tool.ruff]
|
|
83
|
+
force-exclude = true # Apply excludes to pre-commit
|
|
84
|
+
show-fixes = true
|
|
85
|
+
src = ["src", "tests"]
|
|
86
|
+
target-version = "py311" # Minimum Python version supported
|
|
87
|
+
include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
|
|
88
|
+
extend-exclude = [
|
|
89
|
+
"__pycache__",
|
|
90
|
+
"old",
|
|
91
|
+
".ipynb_checkpoints",
|
|
92
|
+
"noxfile.py",
|
|
93
|
+
"docs/conf.py"
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
# Ruff rules may be customized as desired: https://docs.astral.sh/ruff/rules/
|
|
97
|
+
[tool.ruff.lint]
|
|
98
|
+
select = [
|
|
99
|
+
"A", # prevent using keywords that clobber python builtins
|
|
100
|
+
"ANN", # check type annotations
|
|
101
|
+
"B", # bugbear: security warnings
|
|
102
|
+
"D", # documentation
|
|
103
|
+
"E", # pycodestyle
|
|
104
|
+
"F", # pyflakes
|
|
105
|
+
"ISC", # implicit string concatenation
|
|
106
|
+
"I", # sort imports
|
|
107
|
+
"UP", # alert you when better syntax is available in your python version
|
|
108
|
+
"RUF", # the ruff developer's own rules
|
|
109
|
+
]
|
|
110
|
+
ignore = [
|
|
111
|
+
"ANN202", # Don't require return type annotation for private functions.
|
|
112
|
+
"ANN401", # Allow type annotation with type Any.
|
|
113
|
+
"D100", # Supress undocumented-public-module. Only doc of public api required.
|
|
114
|
+
"D107", # We are of the opinion that inits dont need a docstring, when the class has one...
|
|
115
|
+
"FBT001", # Allow boolean positional arguments in a function.
|
|
116
|
+
"FBT002", # Allow boolean default positional arguments in a function.
|
|
117
|
+
"E402", # Supress module-import-not-at-top-of-file, needed in jupyter notebooks.
|
|
118
|
+
"E501", # Supress line-too-long warnings: trust black's judgement on this one.
|
|
119
|
+
"PLR2004", # Allow to compare with unnamed numerical constants.
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
[tool.ruff.lint.isort]
|
|
123
|
+
force-single-line = true
|
|
124
|
+
|
|
125
|
+
[tool.ruff.lint.mccabe]
|
|
126
|
+
max-complexity = 15
|
|
127
|
+
|
|
128
|
+
[tool.ruff.lint.pydocstyle]
|
|
129
|
+
convention = "google" # You can also use "numpy".
|
|
130
|
+
|
|
131
|
+
[tool.ruff.lint.pylint]
|
|
132
|
+
max-args = 8
|
|
133
|
+
|
|
134
|
+
[tool.ruff.lint.pep8-naming]
|
|
135
|
+
classmethod-decorators = ["classmethod", "validator", "root_validator", "pydantic.validator"]
|
|
136
|
+
|
|
137
|
+
[tool.ruff.lint.per-file-ignores]
|
|
138
|
+
"*/__init__.py" = ["F401"]
|
|
139
|
+
"**/tests/*" = [
|
|
140
|
+
"ANN001", # type annotations don't add value for test functions
|
|
141
|
+
"ANN002", # type annotations don't add value for test functions
|
|
142
|
+
"ANN003", # type annotations don't add value for test functions
|
|
143
|
+
"ANN201", # type annotations don't add value for test functions
|
|
144
|
+
"ANN204", # type annotations don't add value for test functions
|
|
145
|
+
"ANN205", # type annotations don't add value for test functions
|
|
146
|
+
"ANN206", # type annotations don't add value for test functions
|
|
147
|
+
"D100", # docstrings are overkill for test functions
|
|
148
|
+
"D101",
|
|
149
|
+
"D102",
|
|
150
|
+
"D103",
|
|
151
|
+
"S101", # asserts are encouraged in pytest
|
|
152
|
+
]
|
|
153
|
+
"src/nudb_use/variables/derive/*.py" = ["RUF100"] # If we dont have this RUFF strips our inlined noqas for pydoclint
|
|
154
|
+
|
|
155
|
+
[tool.pydoclint]
|
|
156
|
+
style = "google"
|
|
157
|
+
arg-type-hints-in-docstring = false
|
|
158
|
+
native-mode-noqa-location = "definition"
|
|
159
|
+
|
|
160
|
+
[build-system]
|
|
161
|
+
requires = ["poetry-core>=1.0.0"]
|
|
162
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
1. Klass "check against codelist" / getting klass data
|
|
2
|
+
1. metadata/nudb_klass/codes
|
|
3
|
+
2. variables/checks
|
|
4
|
+
3. variables/var_utils/find_var
|
|
5
|
+
2. Column renames from config:
|
|
6
|
+
1. variables/var_utils/renames
|
|
7
|
+
2. metadata/nudb_config/variable_names
|
|
8
|
+
3. nudb_build/variables/var_utils/renames
|
|
9
|
+
3. Nudb config around:
|
|
10
|
+
1. config.py
|
|
11
|
+
2. config_tomls/
|
|
12
|
+
3. metadata/nudb_config
|
|
13
|
+
4. Checking column "presence" / in config / drop / keep
|
|
14
|
+
1. variables/var_utils/find_var
|
|
15
|
+
2. variables/checks.py
|
|
16
|
+
3. metadata/nudb_config/variable_names
|
|
17
|
+
|
|
18
|
+
# Build vs. Use
|
|
19
|
+
1. Moves from Build to Use?
|
|
20
|
+
1. nudb_build/variables/cleanup.py -> nudb_use/variables/
|
|
21
|
+
2. metadata/datadoc.py
|
|
22
|
+
3. metadata/klass_nudb.py
|
|
23
|
+
4. metadata/merge.py (inn i datadoc.py)?
|
|
24
|
+
5. quality?
|
|
25
|
+
6. "columns in pyarrow metadata" better place in use: nudb_build/variables/checks.py
|
|
26
|
+
1. Paths in build, what is in use?
|
|
27
|
+
1. Remove hardcoding in functions, depend on config?
|
|
28
|
+
2. Moves in build package
|
|
29
|
+
1. Make "exceptions" folder, move temporality-exeption there
|
|
30
|
+
2.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""NUDB Use - is a usage-package for the Norwegian National Education Database cloud-data. Both for data-consumers and data-deliverers. Requires access to NUDBs shared data in most instances."""
|
|
2
|
+
|
|
3
|
+
import importlib.metadata
|
|
4
|
+
|
|
5
|
+
from nudb_config import settings
|
|
6
|
+
|
|
7
|
+
from nudb_use.metadata import find_var
|
|
8
|
+
from nudb_use.metadata import find_vars
|
|
9
|
+
from nudb_use.metadata import get_dtypes
|
|
10
|
+
from nudb_use.metadata import update_colnames
|
|
11
|
+
from nudb_use.nudb_logger import LoggerStack
|
|
12
|
+
from nudb_use.nudb_logger import logger
|
|
13
|
+
from nudb_use.paths import get_periods_from_path
|
|
14
|
+
from nudb_use.paths import latest_shared_paths
|
|
15
|
+
from nudb_use.quality import run_quality_suite
|
|
16
|
+
from nudb_use.utils.packages import _check_ssb_nudb_config_version
|
|
17
|
+
from nudb_use.variables import derive
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"LoggerStack",
|
|
21
|
+
"derive",
|
|
22
|
+
"find_var",
|
|
23
|
+
"find_vars",
|
|
24
|
+
"get_dtypes",
|
|
25
|
+
"get_periods_from_path",
|
|
26
|
+
"latest_shared_paths",
|
|
27
|
+
"logger",
|
|
28
|
+
"run_quality_suite",
|
|
29
|
+
"settings",
|
|
30
|
+
"update_colnames",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
_check_ssb_nudb_config_version()
|
|
36
|
+
except Exception as err:
|
|
37
|
+
logger.warning(f"Unable to validate `ssb-nudb-config` version!\nMessage: {err}")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
try:
|
|
42
|
+
__version__ = importlib.metadata.version(__name__)
|
|
43
|
+
except Exception:
|
|
44
|
+
__version__ = importlib.metadata.version("ssb-nudb-use")
|
|
45
|
+
|
|
46
|
+
except Exception as err:
|
|
47
|
+
__version__ = "0.0.0"
|
|
48
|
+
logger.warning(f"Unable to determine package version!\nMessage: {err}")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Custom exception types and helpers for nudb_use."""
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Exception types used across nudb_use."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class NudbQualityError(Exception):
|
|
5
|
+
"""Domain-specific error for NUDB quality validations."""
|
|
6
|
+
|
|
7
|
+
...
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NudbDerivedFromNotFoundError(Exception):
|
|
11
|
+
"""Domain-specific error for NUDB deriving issues when looking for columns needed."""
|
|
12
|
+
|
|
13
|
+
...
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Utilities for raising or logging grouped exceptions."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
|
|
5
|
+
from nudb_use.nudb_logger import logger
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def raise_exception_group(errors: Sequence[Exception]) -> None:
|
|
9
|
+
"""Raise grouped exceptions as ExceptionGroup (Py3.11+) or ValueError.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
errors: Exceptions that should be raised together.
|
|
13
|
+
|
|
14
|
+
Raises:
|
|
15
|
+
ExceptionGroup: On Python 3.11+, raised with every collected error.
|
|
16
|
+
"""
|
|
17
|
+
if not errors:
|
|
18
|
+
return # No errors to raise
|
|
19
|
+
raise ExceptionGroup("Multiple errors occurred", list(errors))
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def warn_exception_group(errors: Sequence[Exception]) -> None:
|
|
23
|
+
"""Log each exception in the provided list using the active logger.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
errors: Exceptions that should be emitted as warnings in sequence.
|
|
27
|
+
"""
|
|
28
|
+
if not errors:
|
|
29
|
+
return # No errors to raise
|
|
30
|
+
|
|
31
|
+
for error in errors:
|
|
32
|
+
logger.warning(str(error))
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Expose NUDB metadata helpers for convenient imports."""
|
|
2
|
+
|
|
3
|
+
from nudb_use.metadata.nudb_config import find_var
|
|
4
|
+
from nudb_use.metadata.nudb_config import find_vars
|
|
5
|
+
from nudb_use.metadata.nudb_config import get_cols2drop
|
|
6
|
+
from nudb_use.metadata.nudb_config import get_cols2keep
|
|
7
|
+
from nudb_use.metadata.nudb_config import get_dtypes
|
|
8
|
+
from nudb_use.metadata.nudb_config import get_var_metadata
|
|
9
|
+
from nudb_use.metadata.nudb_config import sort_cols_by_unit
|
|
10
|
+
from nudb_use.metadata.nudb_config import update_colnames
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"find_var",
|
|
14
|
+
"find_vars",
|
|
15
|
+
"get_cols2drop",
|
|
16
|
+
"get_cols2keep",
|
|
17
|
+
"get_dtypes",
|
|
18
|
+
"get_var_metadata",
|
|
19
|
+
"sort_cols_by_unit",
|
|
20
|
+
"update_colnames",
|
|
21
|
+
]
|