ssb-timeseries 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright © 2024 Statistics Norway
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.1
2
+ Name: ssb-timeseries
3
+ Version: 0.0.0
4
+ Summary: SSB Timeseries
5
+ Home-page: https://github.com/statisticsnorway/ssb-timeseries
6
+ License: MIT
7
+ Author: Bernhard Ryeng
8
+ Author-email: bernhard.ryeng@ssb.no
9
+ Requires-Python: >=3.10,<3.13
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Dist: bigtree (>=0.17.0,<0.18.0)
17
+ Requires-Dist: click (>=8.1.7,<9.0.0)
18
+ Requires-Dist: dapla-toolbelt (>=1.3.2)
19
+ Requires-Dist: duckdb (>=0.10.0,<0.11.0)
20
+ Requires-Dist: google-cloud-logging (>=3.8.0,<4.0.0)
21
+ Requires-Dist: pandas (>=2.1.1,<3.0.0)
22
+ Requires-Dist: polars (>=0.19.18,<0.20.0)
23
+ Requires-Dist: pyarrow (>=14.0.0,<15.0.0)
24
+ Requires-Dist: pytest (>=7.4.3,<8.0.0)
25
+ Requires-Dist: pytz (>=2023.3.post1,<2024.0)
26
+ Requires-Dist: ssb-klass-python (>=0.0.7,<0.0.8)
27
+ Requires-Dist: typing-extensions (>=4.11.0,<5.0.0)
28
+ Project-URL: Changelog, https://github.com/statisticsnorway/ssb-timeseries/releases
29
+ Project-URL: Documentation, https://statisticsnorway.github.io/ssb-timeseries
30
+ Project-URL: Repository, https://github.com/statisticsnorway/ssb-timeseries
31
+ Description-Content-Type: text/markdown
32
+
33
+ # SSB Timeseries
34
+
35
+ [![PyPI](https://img.shields.io/pypi/v/ssb-timeseries.svg)][pypi status]
36
+ [![Status](https://img.shields.io/pypi/status/ssb-timeseries.svg)][pypi status]
37
+ [![Python Version](https://img.shields.io/pypi/pyversions/ssb-timeseries)][pypi status]
38
+ [![License](https://img.shields.io/pypi/l/ssb-timeseries)][license]
39
+
40
+ [![Documentation](https://github.com/statisticsnorway/ssb-timeseries/actions/workflows/docs.yml/badge.svg)][documentation]
41
+ [![Tests](https://github.com/statisticsnorway/ssb-timeseries/actions/workflows/tests.yml/badge.svg)][tests]
42
+ [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_ssb-timeseries&metric=coverage)][sonarcov]
43
+ [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_ssb-timeseries&metric=alert_status)][sonarquality]
44
+
45
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit]
46
+ [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black]
47
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
48
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)][poetry]
49
+
50
+ [pypi status]: https://pypi.org/project/ssb-timeseries/
51
+ [documentation]: https://statisticsnorway.github.io/ssb-timeseries
52
+ [tests]: https://github.com/statisticsnorway/ssb-timeseries/actions?workflow=Tests
53
+
54
+ [sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-timeseries
55
+ [sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-timeseries
56
+ [pre-commit]: https://github.com/pre-commit/pre-commit
57
+ [black]: https://github.com/psf/black
58
+ [poetry]: https://python-poetry.org/
59
+
60
+ ## Background
61
+
62
+ Statistics Norway is building a new procuction system in the cloud.
63
+
64
+ Moving towards modern architecture, development methodology and open source technologies: Python and R are replacing SAS for statistics production code. Oracle databases and ODI for ETL are being replaced by a data lake architecture relying heavily on Parquet files.
65
+
66
+ Another big issue has been time series.Time series are essential to statistics production, so the decision to phase out FAME while not having landed precisely what should replace it has left a huge gap.
67
+
68
+ A complete solution will touch several areas of functionality:
69
+
70
+ * The core is storage with performant read and write, search and filtering
71
+ * Good descriptive metadata is key to findability
72
+ * A wide selection of math and statistics libraries is key for calculations and models
73
+ * Visualisation tools play a role both in ad hoc and routine inspection and quality control
74
+ * Workflow integration with automation and process monitoring help keeping consistent quality
75
+ * Data lineage and process metadata is essential for quality control
76
+
77
+ In Statistics Norway strict requirements for transparency and data quality are mandated by law and commitment to international standards. The data itself has a wide variety, but time resolution and publishing frequencies are typically low. While volumes are some times significant, they are far from extreme. This shifts the focus from performance towards process and data control.
78
+
79
+ This project came out of a PoC to demonstrate how the key functionality may be provided with the core technologies Python and Parquet, in alignment with architecture decisions and process model requirements. Constructed to be an abstraction between the storage layer and the statistics production code, it provides a way forward while postponing some the technical choices.
80
+
81
+ * Basic functionality for read/write, calculations, time aggregation and plotting was demonstrated December 2023.
82
+ * Persisting snapshots in alignment with the process model, simple descriptive tagging and integrations with GCS buckets was added Q1 2024.
83
+
84
+ ## How to get started?
85
+
86
+ See notebook files and tests, `demo.ipynb` and `tests/test_*.py` for examples of usage, and what works and in some cases what does not.
87
+
88
+ Note that
89
+ * The library is constructed to be platform independent, but top priority is making it work in a Linux environment.
90
+ * Install by way of `poetry add ssb_timeseries`.
91
+ * The library should work out of the box with default settings. Note that the defaults are for local testing, ie not be suitable for the production setting.
92
+ * To apply custom settings: The environment variable TIMESERIES_CONFIG should point to a JSON file with configurations.
93
+ * The command `poetry run timeseries-config <...>` can be run from a terminal in order to shift between defauls.
94
+ * Run `poetry run timeseries-config home` to create the environment variable and a file with default configurations in the home directory, ie `/home/jovyan` in the Jupyter environment (or the equivalent running elsewhere.
95
+ * The similar `poetry run timeseries-config gcs` will put configurations and logs in the home directory and time series data in a shared bucket `gs://ssb-prod-dapla-felles-data-delt/poc-tidsserier`.
96
+ * With the environment variable set and the configuration in place `poetry run pytest` should succeed.
97
+
98
+
99
+ While the library is in a workable state and should work both locally and in JupyterLab, it is still in an exploratory phase. There is a risk that fundamental choices are reversed and breaking changes introduced.
100
+
101
+ With that disclaimer, feel free to explore and experiment, and do not be shy about asking questions or giving feedback. At this stage, feedback is all important.
102
+
103
+ Assuming you have Python working with a standard SSB setup for git and poetry etc, the following should get you going:
104
+
105
+ ``` bash
106
+ # Get the poc package
107
+ git clone https://github.com/statisticsnorway/arkitektur-poc-tidsserier.git
108
+
109
+ # Run inside a poetry controlled venv:
110
+ poetry shell
111
+ ## Create default config
112
+ poetry run timeseries-config home
113
+ # Run the tests to check that everything is OK:
114
+ poetry run pytest
115
+ # A couple of the test cases *are expected* fail when running for the first time in a new location.
116
+ # They should create the structures they need and should succeed in subsequent runs.
117
+ ```
118
+ ~~ No longer needed:~~
119
+ ~~ Create and set a location for data and log files. This could be anywhere, but separated from the code is preferrable.~~
120
+ ~~ mkdir series~~
121
+ ~~ export TIMESERIES_ROOT=${PWD}/series ~~
122
+ ~~ export LOG_LOCATION=${PWD}/series ~~
123
+
124
+
125
+ ## Functionality overview
126
+
127
+ The core of the library is the Dataset class. This is essentially a wrapper around a DataFrame (for now Pandas, later probably Polars) in the .data attribute.
128
+
129
+ The .data attribute should comply to conventions implied by the underlying *information model*. These will start out as pure conventions and subject to evalutation. At a later stage they are likely to be enforced by Parquet schemas. Failing to obey them will cause some methods to fail.
130
+
131
+ The Dataset.io attribute connects the dataset to a helper class that takes care of reading and writing data. This structure abstracts away the IO mechanics, so that the user do not need to know about the "physical" details, only the *information model meaning* of the choices made.
132
+
133
+ * Read and write for both versioned and unversioned data types.
134
+ * Search for sets by name, regex and (planned for later) metadata.
135
+ * Basic filtering of sets (selecting series within a selected set).
136
+ * Basic linear algebra: Datasets can be added, subtracted, multiplied and divided with each other and dataframes, matrices, vectors (untested) and scalars according to normal rules.
137
+ * Basic plotting: Dataset.plot() as shorthand for Dataset.data.plot(<and sensible defaults>).
138
+ * Basic time aggregation:
139
+ `Dataset.groupby(<frequency>, 'sum'|'mean'|'auto')`
140
+ *
141
+
142
+
143
+ ## The information model
144
+
145
+ ### TLDR
146
+
147
+ * **Types** are defined by
148
+ * **Versioning** defines how updated versions of the truth are represented: NONE overwrites a single version, NAMED or AS_OF maintaines new "logical" versions identified by name or date.
149
+ * **Temporality** describes the "real world" valid_at or valid_from - valid_to datetime of the data. It will translate into columns, datetime or period indexes of Dataset.data.
150
+ * Value type (only scalars for now) of Dataset.data "cells".
151
+ * **Datasets** can consists of multiple series. (Later: possible extension with sets of sets.)
152
+ * All series in a set must be of the same type.
153
+ * **Series** are value columns in Datasets.data, rows identified by date(s) or index corresponding temporality.
154
+ * The combination `<Dataset.name>.<Series.name>` will serve as a globally unique series identifier.
155
+ * `<Dataset.name>` identifies a "directory", hence must be unique. (Caveat: Directories per type creates room for error.)
156
+ * `<Series.name>` (.data column name) must be unique within the set.
157
+ * Series names *should* be related to (preferrably constructed from) codes or meta data in such a way that they can be mapped to "tags" via a format mask (and if needed a translation table).
158
+
159
+ Yes, that *was* the short version. The long version is still pending production.
160
+
161
+ To be continued ...
162
+
163
+ ### How to contribute
164
+
165
+ More information about this will come later, but contributions are welcome. If you want to contribute, just let us know.
166
+
167
+ ### Other sources of documentation:
168
+
169
+ * https://statistics-norway.atlassian.net/wiki/spaces/Arkitektur/pages/3581313026/Statistikkproduksjon
170
+ * https://statistics-norway.atlassian.net/wiki/spaces/Arkitektur/pages/3595665419/Lagring+av+tidsserier
171
+
172
+ ## Contributing
173
+
174
+ Contributions are very welcome.
175
+ To learn more, see the [Contributor Guide].
176
+
177
+ ## License
178
+
179
+ Distributed under the terms of the [MIT license][license],
180
+ _SSB Timeseries_ is free and open source software.
181
+
182
+ ## Issues
183
+
184
+ If you encounter any problems,
185
+ please [file an issue] along with a detailed description.
186
+
187
+ ## Credits
188
+
189
+ This project was generated from [Statistics Norway]'s [SSB PyPI Template].
190
+
191
+ [statistics norway]: https://www.ssb.no/en
192
+ [pypi]: https://pypi.org/
193
+ [ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
194
+ [file an issue]: https://github.com/statisticsnorway/ssb-timeseries/issues
195
+ [pip]: https://pip.pypa.io/
196
+
197
+ <!-- github-only -->
198
+
199
+ [license]: https://github.com/statisticsnorway/ssb-timeseries/blob/main/LICENSE
200
+ [contributor guide]: https://github.com/statisticsnorway/ssb-timeseries/blob/main/CONTRIBUTING.md
201
+ [reference guide]: https://statisticsnorway.github.io/ssb-timeseries/reference.html
202
+
@@ -0,0 +1,169 @@
1
+ # SSB Timeseries
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/ssb-timeseries.svg)][pypi status]
4
+ [![Status](https://img.shields.io/pypi/status/ssb-timeseries.svg)][pypi status]
5
+ [![Python Version](https://img.shields.io/pypi/pyversions/ssb-timeseries)][pypi status]
6
+ [![License](https://img.shields.io/pypi/l/ssb-timeseries)][license]
7
+
8
+ [![Documentation](https://github.com/statisticsnorway/ssb-timeseries/actions/workflows/docs.yml/badge.svg)][documentation]
9
+ [![Tests](https://github.com/statisticsnorway/ssb-timeseries/actions/workflows/tests.yml/badge.svg)][tests]
10
+ [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_ssb-timeseries&metric=coverage)][sonarcov]
11
+ [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_ssb-timeseries&metric=alert_status)][sonarquality]
12
+
13
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit]
14
+ [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black]
15
+ [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
16
+ [![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)][poetry]
17
+
18
+ [pypi status]: https://pypi.org/project/ssb-timeseries/
19
+ [documentation]: https://statisticsnorway.github.io/ssb-timeseries
20
+ [tests]: https://github.com/statisticsnorway/ssb-timeseries/actions?workflow=Tests
21
+
22
+ [sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-timeseries
23
+ [sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-timeseries
24
+ [pre-commit]: https://github.com/pre-commit/pre-commit
25
+ [black]: https://github.com/psf/black
26
+ [poetry]: https://python-poetry.org/
27
+
28
+ ## Background
29
+
30
+ Statistics Norway is building a new procuction system in the cloud.
31
+
32
+ Moving towards modern architecture, development methodology and open source technologies: Python and R are replacing SAS for statistics production code. Oracle databases and ODI for ETL are being replaced by a data lake architecture relying heavily on Parquet files.
33
+
34
+ Another big issue has been time series.Time series are essential to statistics production, so the decision to phase out FAME while not having landed precisely what should replace it has left a huge gap.
35
+
36
+ A complete solution will touch several areas of functionality:
37
+
38
+ * The core is storage with performant read and write, search and filtering
39
+ * Good descriptive metadata is key to findability
40
+ * A wide selection of math and statistics libraries is key for calculations and models
41
+ * Visualisation tools play a role both in ad hoc and routine inspection and quality control
42
+ * Workflow integration with automation and process monitoring help keeping consistent quality
43
+ * Data lineage and process metadata is essential for quality control
44
+
45
+ In Statistics Norway strict requirements for transparency and data quality are mandated by law and commitment to international standards. The data itself has a wide variety, but time resolution and publishing frequencies are typically low. While volumes are some times significant, they are far from extreme. This shifts the focus from performance towards process and data control.
46
+
47
+ This project came out of a PoC to demonstrate how the key functionality may be provided with the core technologies Python and Parquet, in alignment with architecture decisions and process model requirements. Constructed to be an abstraction between the storage layer and the statistics production code, it provides a way forward while postponing some the technical choices.
48
+
49
+ * Basic functionality for read/write, calculations, time aggregation and plotting was demonstrated December 2023.
50
+ * Persisting snapshots in alignment with the process model, simple descriptive tagging and integrations with GCS buckets was added Q1 2024.
51
+
52
+ ## How to get started?
53
+
54
+ See notebook files and tests, `demo.ipynb` and `tests/test_*.py` for examples of usage, and what works and in some cases what does not.
55
+
56
+ Note that
57
+ * The library is constructed to be platform independent, but top priority is making it work in a Linux environment.
58
+ * Install by way of `poetry add ssb_timeseries`.
59
+ * The library should work out of the box with default settings. Note that the defaults are for local testing, ie not be suitable for the production setting.
60
+ * To apply custom settings: The environment variable TIMESERIES_CONFIG should point to a JSON file with configurations.
61
+ * The command `poetry run timeseries-config <...>` can be run from a terminal in order to shift between defauls.
62
+ * Run `poetry run timeseries-config home` to create the environment variable and a file with default configurations in the home directory, ie `/home/jovyan` in the Jupyter environment (or the equivalent running elsewhere.
63
+ * The similar `poetry run timeseries-config gcs` will put configurations and logs in the home directory and time series data in a shared bucket `gs://ssb-prod-dapla-felles-data-delt/poc-tidsserier`.
64
+ * With the environment variable set and the configuration in place `poetry run pytest` should succeed.
65
+
66
+
67
+ While the library is in a workable state and should work both locally and in JupyterLab, it is still in an exploratory phase. There is a risk that fundamental choices are reversed and breaking changes introduced.
68
+
69
+ With that disclaimer, feel free to explore and experiment, and do not be shy about asking questions or giving feedback. At this stage, feedback is all important.
70
+
71
+ Assuming you have Python working with a standard SSB setup for git and poetry etc, the following should get you going:
72
+
73
+ ``` bash
74
+ # Get the poc package
75
+ git clone https://github.com/statisticsnorway/arkitektur-poc-tidsserier.git
76
+
77
+ # Run inside a poetry controlled venv:
78
+ poetry shell
79
+ ## Create default config
80
+ poetry run timeseries-config home
81
+ # Run the tests to check that everything is OK:
82
+ poetry run pytest
83
+ # A couple of the test cases *are expected* fail when running for the first time in a new location.
84
+ # They should create the structures they need and should succeed in subsequent runs.
85
+ ```
86
+ ~~ No longer needed:~~
87
+ ~~ Create and set a location for data and log files. This could be anywhere, but separated from the code is preferrable.~~
88
+ ~~ mkdir series~~
89
+ ~~ export TIMESERIES_ROOT=${PWD}/series ~~
90
+ ~~ export LOG_LOCATION=${PWD}/series ~~
91
+
92
+
93
+ ## Functionality overview
94
+
95
+ The core of the library is the Dataset class. This is essentially a wrapper around a DataFrame (for now Pandas, later probably Polars) in the .data attribute.
96
+
97
+ The .data attribute should comply to conventions implied by the underlying *information model*. These will start out as pure conventions and subject to evalutation. At a later stage they are likely to be enforced by Parquet schemas. Failing to obey them will cause some methods to fail.
98
+
99
+ The Dataset.io attribute connects the dataset to a helper class that takes care of reading and writing data. This structure abstracts away the IO mechanics, so that the user do not need to know about the "physical" details, only the *information model meaning* of the choices made.
100
+
101
+ * Read and write for both versioned and unversioned data types.
102
+ * Search for sets by name, regex and (planned for later) metadata.
103
+ * Basic filtering of sets (selecting series within a selected set).
104
+ * Basic linear algebra: Datasets can be added, subtracted, multiplied and divided with each other and dataframes, matrices, vectors (untested) and scalars according to normal rules.
105
+ * Basic plotting: Dataset.plot() as shorthand for Dataset.data.plot(<and sensible defaults>).
106
+ * Basic time aggregation:
107
+ `Dataset.groupby(<frequency>, 'sum'|'mean'|'auto')`
108
+ *
109
+
110
+
111
+ ## The information model
112
+
113
+ ### TLDR
114
+
115
+ * **Types** are defined by
116
+ * **Versioning** defines how updated versions of the truth are represented: NONE overwrites a single version, NAMED or AS_OF maintaines new "logical" versions identified by name or date.
117
+ * **Temporality** describes the "real world" valid_at or valid_from - valid_to datetime of the data. It will translate into columns, datetime or period indexes of Dataset.data.
118
+ * Value type (only scalars for now) of Dataset.data "cells".
119
+ * **Datasets** can consists of multiple series. (Later: possible extension with sets of sets.)
120
+ * All series in a set must be of the same type.
121
+ * **Series** are value columns in Datasets.data, rows identified by date(s) or index corresponding temporality.
122
+ * The combination `<Dataset.name>.<Series.name>` will serve as a globally unique series identifier.
123
+ * `<Dataset.name>` identifies a "directory", hence must be unique. (Caveat: Directories per type creates room for error.)
124
+ * `<Series.name>` (.data column name) must be unique within the set.
125
+ * Series names *should* be related to (preferrably constructed from) codes or meta data in such a way that they can be mapped to "tags" via a format mask (and if needed a translation table).
126
+
127
+ Yes, that *was* the short version. The long version is still pending production.
128
+
129
+ To be continued ...
130
+
131
+ ### How to contribute
132
+
133
+ More information about this will come later, but contributions are welcome. If you want to contribute, just let us know.
134
+
135
+ ### Other sources of documentation:
136
+
137
+ * https://statistics-norway.atlassian.net/wiki/spaces/Arkitektur/pages/3581313026/Statistikkproduksjon
138
+ * https://statistics-norway.atlassian.net/wiki/spaces/Arkitektur/pages/3595665419/Lagring+av+tidsserier
139
+
140
+ ## Contributing
141
+
142
+ Contributions are very welcome.
143
+ To learn more, see the [Contributor Guide].
144
+
145
+ ## License
146
+
147
+ Distributed under the terms of the [MIT license][license],
148
+ _SSB Timeseries_ is free and open source software.
149
+
150
+ ## Issues
151
+
152
+ If you encounter any problems,
153
+ please [file an issue] along with a detailed description.
154
+
155
+ ## Credits
156
+
157
+ This project was generated from [Statistics Norway]'s [SSB PyPI Template].
158
+
159
+ [statistics norway]: https://www.ssb.no/en
160
+ [pypi]: https://pypi.org/
161
+ [ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
162
+ [file an issue]: https://github.com/statisticsnorway/ssb-timeseries/issues
163
+ [pip]: https://pip.pypa.io/
164
+
165
+ <!-- github-only -->
166
+
167
+ [license]: https://github.com/statisticsnorway/ssb-timeseries/blob/main/LICENSE
168
+ [contributor guide]: https://github.com/statisticsnorway/ssb-timeseries/blob/main/CONTRIBUTING.md
169
+ [reference guide]: https://statisticsnorway.github.io/ssb-timeseries/reference.html
@@ -0,0 +1,146 @@
1
+ [tool.poetry]
2
+ name = "ssb-timeseries"
3
+ version = "0.0.0"
4
+ description = "SSB Timeseries"
5
+ authors = ["Bernhard Ryeng <bernhard.ryeng@ssb.no>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ homepage = "https://github.com/statisticsnorway/ssb-timeseries"
9
+ repository = "https://github.com/statisticsnorway/ssb-timeseries"
10
+ documentation = "https://statisticsnorway.github.io/ssb-timeseries"
11
+ classifiers = ["Development Status :: 4 - Beta"]
12
+
13
+ [tool.poetry.urls]
14
+ Changelog = "https://github.com/statisticsnorway/ssb-timeseries/releases"
15
+
16
+ [tool.poetry.dependencies]
17
+ python = ">=3.10,<3.13"
18
+ dapla-toolbelt = ">=1.3.2"
19
+ pandas = "^2.1.1"
20
+ pytest = "^7.4.3"
21
+ ssb-klass-python = "^0.0.7"
22
+ pyarrow = "^14.0.0"
23
+ google-cloud-logging = "^3.8.0"
24
+ pytz = "^2023.3.post1"
25
+ polars = "^0.19.18"
26
+ duckdb = "^0.10.0"
27
+ bigtree = "^0.17.0"
28
+ click = "^8.1.7"
29
+ typing-extensions = "^4.11.0"
30
+
31
+ [tool.poetry.group.dev.dependencies]
32
+ pygments = ">=2.10.0"
33
+ black = { extras = ["jupyter"], version = ">=23.1.0" }
34
+ coverage = { extras = ["toml"], version = ">=6.2" }
35
+ darglint = ">=1.8.1"
36
+ furo = ">=2021.11.12"
37
+ mypy = ">=0.930"
38
+ pre-commit = ">=2.16.0"
39
+ pre-commit-hooks = ">=4.1.0"
40
+ ruff = ">=0.0.284"
41
+ pytest = ">=6.2.5"
42
+ sphinx = ">=6.2.1"
43
+ sphinx-autobuild = ">=2021.3.14"
44
+ sphinx-autodoc-typehints = ">=1.24.0"
45
+ sphinx-click = ">=3.0.2"
46
+ typeguard = ">=2.13.3"
47
+ xdoctest = { extras = ["colors"], version = ">=0.15.10" }
48
+ myst-parser = { version = ">=0.16.1" }
49
+
50
+ [tool.pytest.ini_options]
51
+ pythonpath = ["src"]
52
+
53
+ [tool.poetry.scripts]
54
+ ssb-timeseries = "ssb_timeseries.__main__:main"
55
+ timeseries-config = "ssb_timeseries.config:main"
56
+
57
+ [tool.coverage.paths]
58
+ source = ["src", "*/site-packages"]
59
+ tests = ["tests", "*/tests"]
60
+
61
+ [tool.coverage.run]
62
+ branch = true
63
+ source = ["ssb_timeseries", "tests"]
64
+ relative_files = true
65
+
66
+ [tool.coverage.report]
67
+ show_missing = true
68
+ fail_under = 50
69
+
70
+ [tool.mypy]
71
+ strict = true
72
+ warn_unreachable = true
73
+ pretty = true
74
+ show_column_numbers = true
75
+ show_error_context = true
76
+
77
+ [tool.ruff]
78
+ force-exclude = true # Apply excludes to pre-commit
79
+ show-fixes = true
80
+ src = ["src", "tests"]
81
+ target-version = "py310" # Minimum Python version supported
82
+ include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
83
+ extend-exclude = [
84
+ "__pycache__",
85
+ "old",
86
+ ".ipynb_checkpoints",
87
+ "noxfile.py",
88
+ "docs/conf.py",
89
+ ]
90
+
91
+ # Ruff rules may be customized as desired: https://docs.astral.sh/ruff/rules/
92
+ [tool.ruff.lint]
93
+ select = [
94
+ "A", # prevent using keywords that clobber python builtins
95
+ "ANN", # check type annotations
96
+ "B", # bugbear: security warnings
97
+ "D", # documentation
98
+ "E", # pycodestyle
99
+ "F", # pyflakes
100
+ "ISC", # implicit string concatenation
101
+ "I", # sort imports
102
+ "UP", # alert you when better syntax is available in your python version
103
+ "RUF", # the ruff developer's own rules
104
+ ]
105
+ ignore = [
106
+ "ANN101", # Supress missing-type-self.
107
+ "ANN102", # Supress missing-type-cls.
108
+ "ANN202", # Don't requiere return type annotation for private functions.
109
+ "ANN401", # Allow type annotation with type Any.
110
+ "D100", # Supress undocumented-public-module. Only doc of public api required.
111
+ "E402", # Supress module-import-not-at-top-of-file, needed in jupyter notebooks.
112
+ "E501", # Supress line-too-long warnings: trust black's judgement on this one.
113
+ ]
114
+
115
+ [tool.ruff.lint.isort]
116
+ force-single-line = true
117
+
118
+ [tool.ruff.lint.mccabe]
119
+ max-complexity = 15
120
+
121
+ [tool.ruff.lint.pydocstyle]
122
+ convention = "google" # You can also use "numpy".
123
+
124
+ [tool.ruff.lint.pep8-naming]
125
+ classmethod-decorators = ["classmethod", "validator", "root_validator", "pydantic.validator"]
126
+
127
+ [tool.ruff.lint.per-file-ignores]
128
+ "*/__init__.py" = ["F401"]
129
+ "**/tests/*" = [
130
+ "ANN001", # type annotations don't add value for test functions
131
+ "ANN002", # type annotations don't add value for test functions
132
+ "ANN003", # type annotations don't add value for test functions
133
+ "ANN201", # type annotations don't add value for test functions
134
+ "ANN204", # type annotations don't add value for test functions
135
+ "ANN205", # type annotations don't add value for test functions
136
+ "ANN206", # type annotations don't add value for test functions
137
+ "D100", # docstrings are overkill for test functions
138
+ "D101",
139
+ "D102",
140
+ "D103",
141
+ "S101", # asserts are encouraged in pytest
142
+ ]
143
+
144
+ [build-system]
145
+ requires = ["poetry-core>=1.0.0"]
146
+ build-backend = "poetry.core.masonry.api"
@@ -0,0 +1,17 @@
1
+ """SSB timeseries is a helper library for statistics production and analytics.
2
+
3
+ It provides storage and search functionality with meta data integrations.
4
+
5
+ The approach is a dataset centric: the DATASET is the basis for storage and workflow integration, and basic linear algebra operations can be performed on datasets.
6
+ """
7
+
8
+ __all__ = [
9
+ "dataset",
10
+ "dates",
11
+ "io",
12
+ "fs",
13
+ "config",
14
+ "logging",
15
+ "properties",
16
+ "sample_data",
17
+ ]
@@ -0,0 +1,24 @@
1
+ import os
2
+
3
+ # from ssb_timeseries import config
4
+
5
+ """Use this module for running / validating setups?
6
+ ISSUE: the code is not found when trying to run `python timeseries`
7
+ ... but is working fine for `python -m timeseries`
8
+ --> have a closer look at pyproject.toml?
9
+ """
10
+
11
+
12
+ def main() -> None:
13
+ """Validate setup."""
14
+ config_file = os.environ["TIMESERIES_CONFIG"]
15
+ greeting = f"Hello Timeseries!\n... the configuration file is: {config_file}. \nAdditional set up steps may be added later!"
16
+
17
+ print(greeting)
18
+ # perform set up steps:
19
+ # os.environ["TIMESERIES_CONFIG"] = DEFAULT_CONFIG_LOCATION
20
+
21
+
22
+ if __name__ == "__main__":
23
+ """Running `python timeseries` or `python -m timeseries` should run or validate setup."""
24
+ main()