PyPI - ssb-timeseries - Versions diffs - 0.0.0__tar.gz - Mend

ssb-timeseries 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

ssb_timeseries-0.0.0/LICENSE +21 -0
ssb_timeseries-0.0.0/PKG-INFO +202 -0
ssb_timeseries-0.0.0/README.md +169 -0
ssb_timeseries-0.0.0/pyproject.toml +146 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/__init__.py +17 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/__main__.py +24 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/config.py +209 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/dataset.py +796 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/dates.py +196 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/fs.py +327 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/functions.py +35 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/io.py +436 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/logging.py +150 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/meta.py +277 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/properties.py +160 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/py.typed +0 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/sample_data.py +149 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/sample_metadata.py +75 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/setup.py +59 -0
ssb_timeseries-0.0.0/src/ssb_timeseries/types.py +12 -0

ssb_timeseries-0.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright © 2024 Statistics Norway
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

ssb_timeseries-0.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,202 @@
+Metadata-Version: 2.1
+Name: ssb-timeseries
+Version: 0.0.0
+Summary: SSB Timeseries
+Home-page: https://github.com/statisticsnorway/ssb-timeseries
+License: MIT
+Author: Bernhard Ryeng
+Author-email: bernhard.ryeng@ssb.no
+Requires-Python: >=3.10,<3.13
+Classifier: Development Status :: 4 - Beta
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: bigtree (>=0.17.0,<0.18.0)
+Requires-Dist: click (>=8.1.7,<9.0.0)
+Requires-Dist: dapla-toolbelt (>=1.3.2)
+Requires-Dist: duckdb (>=0.10.0,<0.11.0)
+Requires-Dist: google-cloud-logging (>=3.8.0,<4.0.0)
+Requires-Dist: pandas (>=2.1.1,<3.0.0)
+Requires-Dist: polars (>=0.19.18,<0.20.0)
+Requires-Dist: pyarrow (>=14.0.0,<15.0.0)
+Requires-Dist: pytest (>=7.4.3,<8.0.0)
+Requires-Dist: pytz (>=2023.3.post1,<2024.0)
+Requires-Dist: ssb-klass-python (>=0.0.7,<0.0.8)
+Requires-Dist: typing-extensions (>=4.11.0,<5.0.0)
+Project-URL: Changelog, https://github.com/statisticsnorway/ssb-timeseries/releases
+Project-URL: Documentation, https://statisticsnorway.github.io/ssb-timeseries
+Project-URL: Repository, https://github.com/statisticsnorway/ssb-timeseries
+Description-Content-Type: text/markdown
+# SSB Timeseries
+[![PyPI](https://img.shields.io/pypi/v/ssb-timeseries.svg)][pypi status]
+[![Status](https://img.shields.io/pypi/status/ssb-timeseries.svg)][pypi status]
+[![Python Version](https://img.shields.io/pypi/pyversions/ssb-timeseries)][pypi status]
+[![License](https://img.shields.io/pypi/l/ssb-timeseries)][license]
+[![Documentation](https://github.com/statisticsnorway/ssb-timeseries/actions/workflows/docs.yml/badge.svg)][documentation]
+[![Tests](https://github.com/statisticsnorway/ssb-timeseries/actions/workflows/tests.yml/badge.svg)][tests]
+[![Coverage](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_ssb-timeseries&metric=coverage)][sonarcov]
+[![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_ssb-timeseries&metric=alert_status)][sonarquality]
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit]
+[![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black]
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
+[![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)][poetry]
+[pypi status]: https://pypi.org/project/ssb-timeseries/
+[documentation]: https://statisticsnorway.github.io/ssb-timeseries
+[tests]: https://github.com/statisticsnorway/ssb-timeseries/actions?workflow=Tests
+[sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-timeseries
+[sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-timeseries
+[pre-commit]: https://github.com/pre-commit/pre-commit
+[black]: https://github.com/psf/black
+[poetry]: https://python-poetry.org/
+## Background
+Statistics Norway is building a new procuction system in the cloud.
+Moving towards modern architecture, development methodology and open source technologies: Python and R are replacing SAS for statistics production code. Oracle databases and ODI for ETL are being replaced by a data lake architecture relying heavily on Parquet files.
+Another big issue has been time series.Time series are essential to statistics production, so  the decision to phase out FAME while not having landed precisely what should replace it has left a huge gap.
+A complete solution will touch several areas of functionality:
+ * The core is storage with performant read and write, search and filtering
+ * Good descriptive metadata is key to findability
+ * A wide selection of math and statistics libraries is key for calculations and models
+ * Visualisation tools play a role both in ad hoc and routine inspection and quality control
+ * Workflow integration with automation and process monitoring help keeping consistent quality
+ * Data lineage and process metadata is essential for quality control
+ In Statistics Norway strict requirements for transparency and data quality are mandated by law and  commitment to international standards. The data itself has a wide variety, but time resolution and publishing frequencies are typically low. While volumes are some times significant, they are far from extreme. This shifts the focus from performance towards process and data control.
+This project came out of a PoC to demonstrate how the key functionality may be provided with the core technologies Python and Parquet, in alignment with architecture decisions and process model requirements. Constructed to be an abstraction between the storage layer and the statistics production code, it provides a way forward while postponing some the technical choices.
+ * Basic functionality for read/write, calculations, time aggregation and plotting was demonstrated December 2023.
+ * Persisting snapshots in alignment with the process model, simple descriptive tagging and integrations with GCS buckets was added Q1 2024.
+## How to get started?
+See notebook files and tests, `demo.ipynb` and `tests/test_*.py` for examples of usage, and what works and in some cases what does not.
+Note that
+ * The library is constructed to be platform independent, but top priority is making it work in  a Linux environment.
+* Install by way of `poetry add ssb_timeseries`.
+* The library should work out of the box with default settings. Note that the defaults are for local testing, ie not be suitable for the production setting.
+* To apply custom settings: The environment variable TIMESERIES_CONFIG should point to a JSON file with configurations.
+* The command `poetry run timeseries-config <...>` can be run from a terminal in order to shift between defauls.
+ * Run `poetry run timeseries-config home` to create the environment variable and a file with default configurations in the home directory, ie `/home/jovyan` in the Jupyter environment (or the equivalent running elsewhere.
+* The similar `poetry run timeseries-config gcs` will put configurations and logs in the home directory and time series data in a shared bucket `gs://ssb-prod-dapla-felles-data-delt/poc-tidsserier`.
+* With the environment variable set and the configuration in place `poetry run pytest` should succeed.
+While the library is in a workable state and should work both locally and in JupyterLab, it is still in an exploratory phase. There is a risk that fundamental choices are reversed and breaking changes introduced.
+With that disclaimer, feel free to explore and experiment, and do not be shy about asking questions or giving feedback. At this stage, feedback is all important.
+Assuming you have Python working with a standard SSB setup for git and poetry etc, the following should get you going:
+``` bash
+# Get the poc package
+git clone https://github.com/statisticsnorway/arkitektur-poc-tidsserier.git
+# Run inside a poetry controlled venv:
+poetry shell
+## Create default config
+poetry run timeseries-config home
+# Run the tests to check that everything is OK:
+poetry run pytest
+# A couple of the test cases *are expected* fail when running for the first time in a new location.
+# They should create the structures they need and should succeed in subsequent runs.
+```
+~~ No longer needed:~~
+~~ Create and set a location for data and log files. This could be anywhere, but separated from the code is preferrable.~~
+~~ mkdir series~~
+~~ export TIMESERIES_ROOT=${PWD}/series ~~
+~~ export LOG_LOCATION=${PWD}/series ~~
+## Functionality overview
+The core of the library is the Dataset class. This is essentially a wrapper around a DataFrame (for now Pandas, later probably Polars) in the .data attribute.
+The .data attribute should comply to conventions implied by the underlying *information model*. These will start out as pure conventions and subject to evalutation. At a later stage they are likely to be enforced by Parquet schemas. Failing to obey them will cause some methods to fail.
+The Dataset.io attribute connects the dataset to a helper class that takes care of reading and writing data. This structure abstracts away the IO mechanics, so that the user do not need to know about the "physical" details, only the *information model meaning* of the choices made.
+ * Read and write for both versioned and unversioned data types.
+ * Search for sets by name, regex and (planned for later) metadata.
+ * Basic filtering of sets (selecting series within a selected set).
+ * Basic linear algebra: Datasets can be added, subtracted, multiplied and divided with each other and dataframes, matrices, vectors (untested) and scalars according to normal rules.
+ * Basic plotting: Dataset.plot() as shorthand for Dataset.data.plot(<and sensible defaults>).
+ * Basic time aggregation:
+ `Dataset.groupby(<frequency>, 'sum'|'mean'|'auto')`
+ *
+ ## The information model
+ ### TLDR
+ * **Types** are defined by
+  * **Versioning** defines how updated versions of the truth are represented: NONE overwrites a single version, NAMED or AS_OF maintaines new "logical" versions identified by name or date.
+  * **Temporality** describes the "real world" valid_at or valid_from - valid_to datetime of the data. It will translate into columns, datetime or period indexes of Dataset.data.
+  * Value type (only scalars for now) of Dataset.data "cells".
+* **Datasets** can consists of multiple series. (Later: possible extension with sets of sets.)
+* All series in a set must be of the same type.
+* **Series** are value columns in Datasets.data, rows identified by date(s) or index corresponding temporality.
+* The combination `<Dataset.name>.<Series.name>` will serve as a globally unique series identifier.
+* `<Dataset.name>` identifies a "directory", hence must be unique. (Caveat: Directories per type creates room for error.)
+* `<Series.name>` (.data column name) must be unique within the set.
+* Series names *should* be related to (preferrably constructed from) codes or meta data in such a way that they can be mapped to "tags" via a format mask (and if needed a translation table).
+Yes, that *was* the short version. The long version is still pending production.
+To be continued ...
+### How to contribute
+More information about this will come later, but contributions are welcome. If you want to contribute, just let us know.
+### Other sources of documentation:
+* https://statistics-norway.atlassian.net/wiki/spaces/Arkitektur/pages/3581313026/Statistikkproduksjon
+* https://statistics-norway.atlassian.net/wiki/spaces/Arkitektur/pages/3595665419/Lagring+av+tidsserier
+## Contributing
+Contributions are very welcome.
+To learn more, see the [Contributor Guide].
+## License
+Distributed under the terms of the [MIT license][license],
+_SSB Timeseries_ is free and open source software.
+## Issues
+If you encounter any problems,
+please [file an issue] along with a detailed description.
+## Credits
+This project was generated from [Statistics Norway]'s [SSB PyPI Template].
+[statistics norway]: https://www.ssb.no/en
+[pypi]: https://pypi.org/
+[ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
+[file an issue]: https://github.com/statisticsnorway/ssb-timeseries/issues
+[pip]: https://pip.pypa.io/
+<!-- github-only -->
+[license]: https://github.com/statisticsnorway/ssb-timeseries/blob/main/LICENSE
+[contributor guide]: https://github.com/statisticsnorway/ssb-timeseries/blob/main/CONTRIBUTING.md
+[reference guide]: https://statisticsnorway.github.io/ssb-timeseries/reference.html

ssb_timeseries-0.0.0/README.md ADDED Viewed

@@ -0,0 +1,169 @@
+# SSB Timeseries
+[![PyPI](https://img.shields.io/pypi/v/ssb-timeseries.svg)][pypi status]
+[![Status](https://img.shields.io/pypi/status/ssb-timeseries.svg)][pypi status]
+[![Python Version](https://img.shields.io/pypi/pyversions/ssb-timeseries)][pypi status]
+[![License](https://img.shields.io/pypi/l/ssb-timeseries)][license]
+[![Documentation](https://github.com/statisticsnorway/ssb-timeseries/actions/workflows/docs.yml/badge.svg)][documentation]
+[![Tests](https://github.com/statisticsnorway/ssb-timeseries/actions/workflows/tests.yml/badge.svg)][tests]
+[![Coverage](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_ssb-timeseries&metric=coverage)][sonarcov]
+[![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=statisticsnorway_ssb-timeseries&metric=alert_status)][sonarquality]
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)][pre-commit]
+[![Black](https://img.shields.io/badge/code%20style-black-000000.svg)][black]
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
+[![Poetry](https://img.shields.io/endpoint?url=https://python-poetry.org/badge/v0.json)][poetry]
+[pypi status]: https://pypi.org/project/ssb-timeseries/
+[documentation]: https://statisticsnorway.github.io/ssb-timeseries
+[tests]: https://github.com/statisticsnorway/ssb-timeseries/actions?workflow=Tests
+[sonarcov]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-timeseries
+[sonarquality]: https://sonarcloud.io/summary/overall?id=statisticsnorway_ssb-timeseries
+[pre-commit]: https://github.com/pre-commit/pre-commit
+[black]: https://github.com/psf/black
+[poetry]: https://python-poetry.org/
+## Background
+Statistics Norway is building a new procuction system in the cloud.
+Moving towards modern architecture, development methodology and open source technologies: Python and R are replacing SAS for statistics production code. Oracle databases and ODI for ETL are being replaced by a data lake architecture relying heavily on Parquet files.
+Another big issue has been time series.Time series are essential to statistics production, so  the decision to phase out FAME while not having landed precisely what should replace it has left a huge gap.
+A complete solution will touch several areas of functionality:
+ * The core is storage with performant read and write, search and filtering
+ * Good descriptive metadata is key to findability
+ * A wide selection of math and statistics libraries is key for calculations and models
+ * Visualisation tools play a role both in ad hoc and routine inspection and quality control
+ * Workflow integration with automation and process monitoring help keeping consistent quality
+ * Data lineage and process metadata is essential for quality control
+ In Statistics Norway strict requirements for transparency and data quality are mandated by law and  commitment to international standards. The data itself has a wide variety, but time resolution and publishing frequencies are typically low. While volumes are some times significant, they are far from extreme. This shifts the focus from performance towards process and data control.
+This project came out of a PoC to demonstrate how the key functionality may be provided with the core technologies Python and Parquet, in alignment with architecture decisions and process model requirements. Constructed to be an abstraction between the storage layer and the statistics production code, it provides a way forward while postponing some the technical choices.
+ * Basic functionality for read/write, calculations, time aggregation and plotting was demonstrated December 2023.
+ * Persisting snapshots in alignment with the process model, simple descriptive tagging and integrations with GCS buckets was added Q1 2024.
+## How to get started?
+See notebook files and tests, `demo.ipynb` and `tests/test_*.py` for examples of usage, and what works and in some cases what does not.
+Note that
+ * The library is constructed to be platform independent, but top priority is making it work in  a Linux environment.
+* Install by way of `poetry add ssb_timeseries`.
+* The library should work out of the box with default settings. Note that the defaults are for local testing, ie not be suitable for the production setting.
+* To apply custom settings: The environment variable TIMESERIES_CONFIG should point to a JSON file with configurations.
+* The command `poetry run timeseries-config <...>` can be run from a terminal in order to shift between defauls.
+ * Run `poetry run timeseries-config home` to create the environment variable and a file with default configurations in the home directory, ie `/home/jovyan` in the Jupyter environment (or the equivalent running elsewhere.
+* The similar `poetry run timeseries-config gcs` will put configurations and logs in the home directory and time series data in a shared bucket `gs://ssb-prod-dapla-felles-data-delt/poc-tidsserier`.
+* With the environment variable set and the configuration in place `poetry run pytest` should succeed.
+While the library is in a workable state and should work both locally and in JupyterLab, it is still in an exploratory phase. There is a risk that fundamental choices are reversed and breaking changes introduced.
+With that disclaimer, feel free to explore and experiment, and do not be shy about asking questions or giving feedback. At this stage, feedback is all important.
+Assuming you have Python working with a standard SSB setup for git and poetry etc, the following should get you going:
+``` bash
+# Get the poc package
+git clone https://github.com/statisticsnorway/arkitektur-poc-tidsserier.git
+# Run inside a poetry controlled venv:
+poetry shell
+## Create default config
+poetry run timeseries-config home
+# Run the tests to check that everything is OK:
+poetry run pytest
+# A couple of the test cases *are expected* fail when running for the first time in a new location.
+# They should create the structures they need and should succeed in subsequent runs.
+```
+~~ No longer needed:~~
+~~ Create and set a location for data and log files. This could be anywhere, but separated from the code is preferrable.~~
+~~ mkdir series~~
+~~ export TIMESERIES_ROOT=${PWD}/series ~~
+~~ export LOG_LOCATION=${PWD}/series ~~
+## Functionality overview
+The core of the library is the Dataset class. This is essentially a wrapper around a DataFrame (for now Pandas, later probably Polars) in the .data attribute.
+The .data attribute should comply to conventions implied by the underlying *information model*. These will start out as pure conventions and subject to evalutation. At a later stage they are likely to be enforced by Parquet schemas. Failing to obey them will cause some methods to fail.
+The Dataset.io attribute connects the dataset to a helper class that takes care of reading and writing data. This structure abstracts away the IO mechanics, so that the user do not need to know about the "physical" details, only the *information model meaning* of the choices made.
+ * Read and write for both versioned and unversioned data types.
+ * Search for sets by name, regex and (planned for later) metadata.
+ * Basic filtering of sets (selecting series within a selected set).
+ * Basic linear algebra: Datasets can be added, subtracted, multiplied and divided with each other and dataframes, matrices, vectors (untested) and scalars according to normal rules.
+ * Basic plotting: Dataset.plot() as shorthand for Dataset.data.plot(<and sensible defaults>).
+ * Basic time aggregation:
+ `Dataset.groupby(<frequency>, 'sum'|'mean'|'auto')`
+ *
+ ## The information model
+ ### TLDR
+ * **Types** are defined by
+  * **Versioning** defines how updated versions of the truth are represented: NONE overwrites a single version, NAMED or AS_OF maintaines new "logical" versions identified by name or date.
+  * **Temporality** describes the "real world" valid_at or valid_from - valid_to datetime of the data. It will translate into columns, datetime or period indexes of Dataset.data.
+  * Value type (only scalars for now) of Dataset.data "cells".
+* **Datasets** can consists of multiple series. (Later: possible extension with sets of sets.)
+* All series in a set must be of the same type.
+* **Series** are value columns in Datasets.data, rows identified by date(s) or index corresponding temporality.
+* The combination `<Dataset.name>.<Series.name>` will serve as a globally unique series identifier.
+* `<Dataset.name>` identifies a "directory", hence must be unique. (Caveat: Directories per type creates room for error.)
+* `<Series.name>` (.data column name) must be unique within the set.
+* Series names *should* be related to (preferrably constructed from) codes or meta data in such a way that they can be mapped to "tags" via a format mask (and if needed a translation table).
+Yes, that *was* the short version. The long version is still pending production.
+To be continued ...
+### How to contribute
+More information about this will come later, but contributions are welcome. If you want to contribute, just let us know.
+### Other sources of documentation:
+* https://statistics-norway.atlassian.net/wiki/spaces/Arkitektur/pages/3581313026/Statistikkproduksjon
+* https://statistics-norway.atlassian.net/wiki/spaces/Arkitektur/pages/3595665419/Lagring+av+tidsserier
+## Contributing
+Contributions are very welcome.
+To learn more, see the [Contributor Guide].
+## License
+Distributed under the terms of the [MIT license][license],
+_SSB Timeseries_ is free and open source software.
+## Issues
+If you encounter any problems,
+please [file an issue] along with a detailed description.
+## Credits
+This project was generated from [Statistics Norway]'s [SSB PyPI Template].
+[statistics norway]: https://www.ssb.no/en
+[pypi]: https://pypi.org/
+[ssb pypi template]: https://github.com/statisticsnorway/ssb-pypitemplate
+[file an issue]: https://github.com/statisticsnorway/ssb-timeseries/issues
+[pip]: https://pip.pypa.io/
+<!-- github-only -->
+[license]: https://github.com/statisticsnorway/ssb-timeseries/blob/main/LICENSE
+[contributor guide]: https://github.com/statisticsnorway/ssb-timeseries/blob/main/CONTRIBUTING.md
+[reference guide]: https://statisticsnorway.github.io/ssb-timeseries/reference.html

ssb_timeseries-0.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,146 @@
+[tool.poetry]
+name = "ssb-timeseries"
+version = "0.0.0"
+description = "SSB Timeseries"
+authors = ["Bernhard Ryeng <bernhard.ryeng@ssb.no>"]
+license = "MIT"
+readme = "README.md"
+homepage = "https://github.com/statisticsnorway/ssb-timeseries"
+repository = "https://github.com/statisticsnorway/ssb-timeseries"
+documentation = "https://statisticsnorway.github.io/ssb-timeseries"
+classifiers = ["Development Status :: 4 - Beta"]
+[tool.poetry.urls]
+Changelog = "https://github.com/statisticsnorway/ssb-timeseries/releases"
+[tool.poetry.dependencies]
+python = ">=3.10,<3.13"
+dapla-toolbelt = ">=1.3.2"
+pandas = "^2.1.1"
+pytest = "^7.4.3"
+ssb-klass-python = "^0.0.7"
+pyarrow = "^14.0.0"
+google-cloud-logging = "^3.8.0"
+pytz = "^2023.3.post1"
+polars = "^0.19.18"
+duckdb = "^0.10.0"
+bigtree = "^0.17.0"
+click = "^8.1.7"
+typing-extensions = "^4.11.0"
+[tool.poetry.group.dev.dependencies]
+pygments = ">=2.10.0"
+black = { extras = ["jupyter"], version = ">=23.1.0" }
+coverage = { extras = ["toml"], version = ">=6.2" }
+darglint = ">=1.8.1"
+furo = ">=2021.11.12"
+mypy = ">=0.930"
+pre-commit = ">=2.16.0"
+pre-commit-hooks = ">=4.1.0"
+ruff = ">=0.0.284"
+pytest = ">=6.2.5"
+sphinx = ">=6.2.1"
+sphinx-autobuild = ">=2021.3.14"
+sphinx-autodoc-typehints = ">=1.24.0"
+sphinx-click = ">=3.0.2"
+typeguard = ">=2.13.3"
+xdoctest = { extras = ["colors"], version = ">=0.15.10" }
+myst-parser = { version = ">=0.16.1" }
+[tool.pytest.ini_options]
+pythonpath = ["src"]
+[tool.poetry.scripts]
+ssb-timeseries = "ssb_timeseries.__main__:main"
+timeseries-config = "ssb_timeseries.config:main"
+[tool.coverage.paths]
+source = ["src", "*/site-packages"]
+tests = ["tests", "*/tests"]
+[tool.coverage.run]
+branch = true
+source = ["ssb_timeseries", "tests"]
+relative_files = true
+[tool.coverage.report]
+show_missing = true
+fail_under = 50
+[tool.mypy]
+strict = true
+warn_unreachable = true
+pretty = true
+show_column_numbers = true
+show_error_context = true
+[tool.ruff]
+force-exclude = true  # Apply excludes to pre-commit
+show-fixes = true
+src = ["src", "tests"]
+target-version = "py310"  # Minimum Python version supported
+include = ["*.py", "*.pyi", "**/pyproject.toml", "*.ipynb"]
+extend-exclude = [
+    "__pycache__",
+    "old",
+    ".ipynb_checkpoints",
+    "noxfile.py",
+    "docs/conf.py",
+]
+# Ruff rules may be customized as desired: https://docs.astral.sh/ruff/rules/
+[tool.ruff.lint]
+select = [
+    "A",    # prevent using keywords that clobber python builtins
+    "ANN",  # check type annotations
+    "B",    # bugbear: security warnings
+    "D",    # documentation
+    "E",    # pycodestyle
+    "F",    # pyflakes
+    "ISC",  # implicit string concatenation
+    "I",    # sort imports
+    "UP",   # alert you when better syntax is available in your python version
+    "RUF",  # the ruff developer's own rules
+]
+ignore = [
+    "ANN101", # Supress missing-type-self.
+    "ANN102", # Supress missing-type-cls.
+    "ANN202", # Don't requiere return type annotation for private functions.
+    "ANN401", # Allow type annotation with type Any.
+    "D100",   # Supress undocumented-public-module. Only doc of public api required.
+    "E402",   # Supress module-import-not-at-top-of-file, needed in jupyter notebooks.
+    "E501",   # Supress line-too-long warnings: trust black's judgement on this one.
+]
+[tool.ruff.lint.isort]
+force-single-line = true
+[tool.ruff.lint.mccabe]
+max-complexity = 15
+[tool.ruff.lint.pydocstyle]
+convention = "google"  # You can also use "numpy".
+[tool.ruff.lint.pep8-naming]
+classmethod-decorators = ["classmethod", "validator", "root_validator", "pydantic.validator"]
+[tool.ruff.lint.per-file-ignores]
+"*/__init__.py" = ["F401"]
+"**/tests/*" = [
+    "ANN001",  # type annotations don't add value for test functions
+    "ANN002",  # type annotations don't add value for test functions
+    "ANN003",  # type annotations don't add value for test functions
+    "ANN201",  # type annotations don't add value for test functions
+    "ANN204",  # type annotations don't add value for test functions
+    "ANN205",  # type annotations don't add value for test functions
+    "ANN206",  # type annotations don't add value for test functions
+    "D100",    # docstrings are overkill for test functions
+    "D101",
+    "D102",
+    "D103",
+    "S101",    # asserts are encouraged in pytest
+]
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"

ssb_timeseries-0.0.0/src/ssb_timeseries/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""SSB timeseries is a helper library for statistics production and analytics.
+It provides storage and search functionality with meta data integrations.
+The approach is a dataset centric: the DATASET is the basis for storage and workflow integration, and basic linear algebra operations can be performed on datasets.
+"""
+__all__ = [
+    "dataset",
+    "dates",
+    "io",
+    "fs",
+    "config",
+    "logging",
+    "properties",
+    "sample_data",
+]

ssb_timeseries-0.0.0/src/ssb_timeseries/__main__.py ADDED Viewed

@@ -0,0 +1,24 @@
+import os
+# from ssb_timeseries import config
+"""Use this module for running / validating setups?
+ISSUE: the code is not found when trying to run `python timeseries`
+... but is working fine for `python -m timeseries`
+--> have a closer look at pyproject.toml?
+"""
+def main() -> None:
+    """Validate setup."""
+    config_file = os.environ["TIMESERIES_CONFIG"]
+    greeting = f"Hello Timeseries!\n... the configuration file is: {config_file}. \nAdditional set up steps may be added later!"
+    print(greeting)
+    # perform set up steps:
+    # os.environ["TIMESERIES_CONFIG"] = DEFAULT_CONFIG_LOCATION
+if __name__ == "__main__":
+    """Running `python timeseries` or `python -m timeseries` should run or validate setup."""
+    main()