toolsos 0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toolsos-0.1/PKG-INFO ADDED
@@ -0,0 +1,45 @@
1
+ Metadata-Version: 2.1
2
+ Name: toolsos
3
+ Version: 0.1
4
+ Summary: OS tools
5
+ Author-email: OS <d.schmitz@amsterdam.nl>
6
+ Keywords: feed,reader,tutorial
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Requires-Python: >=3.11
11
+ Description-Content-Type: text/markdown
12
+ Provides-Extra: dev
13
+ Requires-Dist: black; extra == "dev"
14
+ Requires-Dist: bumpver; extra == "dev"
15
+ Requires-Dist: isort; extra == "dev"
16
+ Requires-Dist: pip-tools; extra == "dev"
17
+ Requires-Dist: pytest; extra == "dev"
18
+ Provides-Extra: all
19
+ Requires-Dist: keyring; extra == "all"
20
+ Requires-Dist: plotly; extra == "all"
21
+ Requires-Dist: openpyxl; extra == "all"
22
+ Requires-Dist: sqlalchemy; extra == "all"
23
+ Requires-Dist: pyyaml; extra == "all"
24
+ Requires-Dist: requests; extra == "all"
25
+
26
+ # Tools Onderzoek & Statistiek
27
+
28
+ This repository contains the tools used by the data scientist/researchers working at Onderzoek & Statistiek
29
+
30
+ ## Installation instructions
31
+
32
+ The package can be installed using:
33
+ - pip
34
+ - Use pip install toolsos[all]
35
+ - conda.
36
+ - Use pip install toolsos. The user has to download the dependencies themselves
37
+
38
+ ## Building the package
39
+
40
+ Instructions on building a package can be found [here](https://packaging.python.org/en/latest/tutorials/packaging-projects/)
41
+
42
+ - py -m pip install --upgrade build
43
+ - py -m build
44
+
45
+ ## Uploading the package to PyPi
toolsos-0.1/README.md ADDED
@@ -0,0 +1,20 @@
1
+ # Tools Onderzoek & Statistiek
2
+
3
+ This repository contains the tools used by the data scientist/researchers working at Onderzoek & Statistiek
4
+
5
+ ## Installation instructions
6
+
7
+ The package can be installed using:
8
+ - pip
9
+ - Use pip install toolsos[all]
10
+ - conda.
11
+ - Use pip install toolsos. The user has to download the dependencies themselves
12
+
13
+ ## Building the package
14
+
15
+ Instructions on building a package can be found [here](https://packaging.python.org/en/latest/tutorials/packaging-projects/)
16
+
17
+ - py -m pip install --upgrade build
18
+ - py -m build
19
+
20
+ ## Uploading the package to PyPi
@@ -0,0 +1,37 @@
1
+ # pyproject.toml
2
+
3
+ [build-system]
4
+ requires = ["setuptools>=61.0.0", "wheel"]
5
+ build-backend = "setuptools.build_meta"
6
+
7
+ [project]
8
+ name = "toolsos"
9
+ version = "0.1"
10
+ description = "OS tools"
11
+ readme = "README.md"
12
+ authors = [{ name = "OS", email = "d.schmitz@amsterdam.nl" }]
13
+ # license = { file = "LICENSE" }
14
+ classifiers = [
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python",
17
+ "Programming Language :: Python :: 3",
18
+ ]
19
+ keywords = ["feed", "reader", "tutorial"]
20
+ # dependencies are kept empty until to be able to install in conda enviroment
21
+ # use pip install toolsos[all] to pip install with al dependencies
22
+ dependencies = []
23
+ requires-python = ">=3.11"
24
+
25
+ [project.optional-dependencies]
26
+ dev = ["black", "bumpver", "isort", "pip-tools", "pytest"]
27
+ all = [
28
+ "keyring",
29
+ "plotly",
30
+ "openpyxl",
31
+ "sqlalchemy",
32
+ "pyyaml",
33
+ "requests"
34
+ ]
35
+
36
+ #[project.urls]
37
+ #Homepage = "https://github.com/realpython/reader"
toolsos-0.1/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import pickle
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Iterator, Optional, Any
7
+
8
+ import pandas as pd
9
+ import pyarrow as pa
10
+ import pyarrow.parquet as pq
11
+ from pyreadstat import pyreadstat as prs
12
+
13
+ if TYPE_CHECKING:
14
+ import pyreadstat
15
+
16
+
17
+ class SavToParquet:
18
+ def __init__(
19
+ self,
20
+ file: str,
21
+ folder_out: str,
22
+ chunksize: Optional[int] = None,
23
+ verbose: bool = False,
24
+ ) -> None:
25
+ self.file = file
26
+ self.folder_out = folder_out
27
+ self.verbose = verbose
28
+ self.chunksize = 5_000_000 if not chunksize else chunksize
29
+
30
+ @property
31
+ def path_out(self) -> str:
32
+ return str(Path(self.file)).replace(".sav", ".parquet")
33
+
34
+ @property
35
+ def chunks(self) -> Iterator[tuple["pyreadstat.metadata_container", pd.DataFrame]]:
36
+ return prs.read_file_in_chunks(
37
+ prs.read_sav, self.file, chunksize=self.chunksize
38
+ )
39
+
40
+ def get_meta(self) -> Iterator:
41
+ return prs.read_sav(self.file, row_limit=10)
42
+
43
+ def write_meta_to_json(self) -> None:
44
+ json_path = self.path_out.replace(".parquet", "_meta.json")
45
+
46
+ meta_dict = {}
47
+ for attr in dir(self.meta):
48
+ if not attr.startswith("__"):
49
+ meta_dict[attr] = getattr(self.meta, attr)
50
+
51
+ with open(json_path, "w") as file:
52
+ json.dump(meta_dict, file)
53
+
54
+ def write_meta_to_pickle(self) -> None:
55
+ pickle_path = self.path_out.replace(".parquet", "_meta.pickle")
56
+
57
+ with open(pickle_path, "wb") as file:
58
+ pickle.dump(self.meta, file)
59
+
60
+ def write_to_parquet(self) -> None:
61
+ meta_df, self.meta = self.get_meta()
62
+ schema = table = pa.Table.from_pandas(meta_df).schema
63
+
64
+ print("Writing table")
65
+ with pq.ParquetWriter(self.path_out, schema) as writer:
66
+ for idx, (df, _) in enumerate(self.chunks):
67
+ if self.verbose:
68
+ print(f"Writing chunk: {idx: >4}")
69
+
70
+ table = pa.Table.from_pandas(df)
71
+ writer.write_table(table)
72
+
73
+ print("Writing metadata")
74
+ self.write_meta_to_json()
75
+ self.write_meta_to_pickle()
76
+ print("Done")
77
+
78
+
79
+ def read_parquet_in_chunks(
80
+ path: str, columns: Optional[list[str]] = None
81
+ ) -> Iterator[pd.DataFrame]:
82
+ parquet_file = pq.ParquetFile(path)
83
+ for table in parquet_file.iter_batches(columns=columns):
84
+ df = table.to_pandas()
85
+ yield df
86
+
87
+
88
+ def read_metadata_container(path: str) -> dict[str, Any]:
89
+ with open(path, "rb") as file:
90
+ return pickle.load(file)
91
+
92
+
93
+ def read_meta_from_json(path: str) -> dict[str, Any]:
94
+ with open(path) as file:
95
+ return json.load(file)
@@ -0,0 +1,114 @@
1
+ from __future__ import annotations
2
+
3
+ import getpass
4
+ import json
5
+ import subprocess
6
+ from json import JSONDecodeError
7
+ from typing import Optional
8
+
9
+ import keyring
10
+ import yaml
11
+
12
+
13
+ def get_db_connection_strings(
14
+ path: str, reset_pw: Optional[list[str]] = None
15
+ ) -> DbStringCollection:
16
+ """Creates object containing all database connection strings based on yaml
17
+ file containg the database connection settings. Password for the specific
18
+ database will be prompted and stored in the keyring of the device
19
+
20
+ Args:
21
+ path (str): _description_
22
+ flush_pw (Optional[list[str]], optional): List with passwords to be reset. Use the
23
+ name of the database connection in the config file. Defaults to None.
24
+
25
+ Returns:
26
+ DbStringCollection: Simple class with an attribute for each connection string
27
+ """
28
+ with open(path) as f:
29
+ db_info = yaml.safe_load(f)
30
+
31
+ dsc = DbStringCollection()
32
+
33
+ for dbname, params in db_info.items():
34
+ flush = dbname in reset_pw if reset_pw else False
35
+
36
+ if params["pw"] == "acces_token":
37
+ pw = get_azure_access_token()
38
+ else:
39
+ pw = get_pw_from_keyring(dbname=dbname, user=params["user"], reset_pw=flush)
40
+
41
+ engine = build_conn_string(pw=pw, **params)
42
+ dsc.add_conn_string(dbname, engine)
43
+
44
+ return dsc
45
+
46
+
47
+ def build_conn_string(user: str, pw: str, host: str, port: str, dbname: str) -> str:
48
+ """Builds the connection string for the database
49
+
50
+ @@TODO
51
+ Add possibility to use different database types
52
+
53
+ Args:
54
+ user (str): username
55
+ pw (str): database password
56
+ host (str): database host
57
+ port (str): database port
58
+ dbname (str): database name
59
+
60
+ Returns:
61
+ str: engine string
62
+ """
63
+ return f"postgresql://{user}:{pw}@{host}:{port}/{dbname}"
64
+
65
+
66
+ def get_pw_from_keyring(dbname: str, user: str, reset_pw: Optional[bool] = None) -> str:
67
+ """_summary_
68
+
69
+ Args:
70
+ db_name (str): database name
71
+ user (str): username
72
+
73
+ Returns:
74
+ str: password
75
+ """
76
+ pw = keyring.get_password(dbname, user)
77
+
78
+ if not pw or reset_pw:
79
+ pw = getpass.getpass(f"Input password for {dbname}: ")
80
+ keyring.set_password(dbname, user, pw)
81
+
82
+ return pw
83
+
84
+
85
+ class DbStringCollection:
86
+ """_summary_"""
87
+
88
+ def add_conn_string(self, db_name: str, connection_str: str) -> None:
89
+ setattr(self, db_name, connection_str)
90
+
91
+
92
+ def get_azure_access_token():
93
+ command = "az account get-access-token --resource-type oss-rdbms"
94
+ result = subprocess.run(command, capture_output=True, shell=True, text=True)
95
+
96
+ try:
97
+ json.loads(result.stdout)["accessToken"]
98
+ except JSONDecodeError:
99
+ subprocess.run("az login", shell=True)
100
+
101
+
102
+ if __name__ == "__main__":
103
+ ...
104
+ # Examples
105
+
106
+ # Get database connection settings from yaml
107
+ # engine_strings = get_db_connection_strings("python/database_config.yml")
108
+ # print(engine_strings.ruimte_analyse222)
109
+
110
+ # Get database connection settings from yaml and reset password
111
+ # engine_strings = get_db_connection_strings(
112
+ # "python/database_config.yml", reset_pw=["ruimte_analyse222"]
113
+ # )
114
+ # print(engine_strings.ruimte_analyse222)
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from sqlalchemy import MetaData, create_engine
6
+ from sqlalchemy.exc import ProgrammingError
7
+ from sqlalchemy.ext.automap import automap_base
8
+ from sqlalchemy.orm import Session
9
+
10
+
11
+ def query_as_dict(rs):
12
+ result = []
13
+ for idx, row in enumerate(rs):
14
+ try:
15
+ result.append(row._as_dict())
16
+ except AttributeError:
17
+ print(idx)
18
+
19
+
20
+ def table_from_db_to_db(
21
+ conn_string_db_from: str,
22
+ conn_string_db_to: str,
23
+ table: str,
24
+ schema_from: Optional[str] = None,
25
+ schema_to: Optional[str] = None,
26
+ rename_table: Optional[str] = None,
27
+ if_exist: Optional[str] = None,
28
+ ):
29
+ engine_from = create_engine(conn_string_db_from)
30
+ engine_to = create_engine(conn_string_db_to)
31
+
32
+ print("Reflecting table")
33
+ metadata_from = MetaData()
34
+ metadata_from.reflect(engine_from, schema=schema_from, only=[table])
35
+ Base = automap_base(metadata=metadata_from)
36
+ table_meta = metadata_from.tables[table]
37
+ Base_to = automap_base(metadata=Base.metadata)
38
+
39
+ print("Querying table")
40
+ with Session(engine_from) as s:
41
+ rs = s.query(table_meta).all()
42
+ rs = [row._asdict() for row in rs]
43
+
44
+ print("Setting schema")
45
+ if rename_table:
46
+ Base_to.metadata.tables[table].name = rename_table
47
+
48
+ Base_to.metadata.tables[table].schema = schema_to
49
+
50
+ if if_exist == "drop":
51
+ print("Dropping table")
52
+ try:
53
+ table_meta.drop(engine_to)
54
+ except ProgrammingError as pe:
55
+ print(f"Exception Caught: {pe}")
56
+
57
+ print("Creating table")
58
+ Base_to.metadata.create_all(engine_to)
59
+
60
+ print("Writing table")
61
+ with Session(engine_to) as s:
62
+ s.execute(table_meta.insert(), rs)
63
+ s.commit()
@@ -0,0 +1,98 @@
1
+ import pathlib
2
+ import shutil
3
+ import zipfile
4
+
5
+ import requests
6
+
7
+
8
+ def download(url: str, dest_path: pathlib.Path):
9
+ r = requests.get(url, stream=True)
10
+ if not r.ok:
11
+ raise ValueError("Download failed: check repo and language")
12
+
13
+ with open(dest_path, "wb") as f:
14
+ for chunk in r.iter_content():
15
+ if chunk:
16
+ f.write(chunk)
17
+
18
+
19
+ def unzip(zip_store: pathlib.Path, dest_folder: pathlib.Path):
20
+ with zipfile.ZipFile(zip_store) as zipfile_:
21
+ for filename in zipfile_.namelist():
22
+ zipfile_.extract(filename, path=dest_folder)
23
+
24
+
25
+ class FileLocation:
26
+ GIT_PROVIDER = "https://gitlab.com/os-amsterdam"
27
+
28
+ def __init__(
29
+ self,
30
+ dest_folder: str,
31
+ dest_folder_name: str,
32
+ repo: str,
33
+ branch: str,
34
+ subfolder: str,
35
+ ):
36
+ self.dest_folder = pathlib.Path(dest_folder)
37
+ self.dest_folder_name = dest_folder_name
38
+ self.repo = repo
39
+ self.branch = branch
40
+ self.subfolder = subfolder
41
+
42
+ @property
43
+ def url(self):
44
+ return f"{self.GIT_PROVIDER}/{self.repo}/-/archive/main/{self.repo}-{self.branch}.zip"
45
+
46
+ @property
47
+ def zipfile(self):
48
+ return self.dest_folder / "_temp.zip"
49
+
50
+ @property
51
+ def move_folder(self):
52
+ return self.dest_folder / f"{self.repo}-{self.branch}" / self.subfolder
53
+
54
+ @property
55
+ def unzipped_folder(self):
56
+ return self.dest_folder / f"{self.repo}-{self.branch}"
57
+
58
+ @property
59
+ def os_tools_folder(self):
60
+ return self.dest_folder / self.dest_folder_name
61
+
62
+
63
+ def copy_repo(
64
+ repo: str,
65
+ dest_folder: str,
66
+ dest_folder_name,
67
+ branch,
68
+ subfolder,
69
+ ):
70
+ fl = FileLocation(
71
+ dest_folder=dest_folder,
72
+ dest_folder_name=dest_folder_name,
73
+ repo=repo,
74
+ branch=branch,
75
+ subfolder=subfolder,
76
+ )
77
+ download(url=fl.url, dest_path=fl.zipfile)
78
+ unzip(zip_store=fl.zipfile, dest_folder=fl.dest_folder)
79
+
80
+ if fl.os_tools_folder.exists():
81
+ shutil.rmtree(fl.os_tools_folder)
82
+ shutil.move(fl.move_folder, fl.os_tools_folder)
83
+
84
+ # Remove downloaded zip file and unzipped folder
85
+ fl.zipfile.unlink()
86
+ shutil.rmtree(fl.unzipped_folder)
87
+
88
+
89
+ def copy_os_tools(dest_folder: str, branch="main", subfolder="python"):
90
+ REPO = "tools-onderzoek-en-statistiek"
91
+ DEST_FOLDER_NAME = "ostools"
92
+ if not branch:
93
+ branch = "main"
94
+ copy_repo(REPO, dest_folder, DEST_FOLDER_NAME, branch, subfolder)
95
+
96
+
97
+ if __name__ == "__main__":
98
+ copy_os_tools("C:/python_projects/_uitproberen/test_project/src")
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Union
4
+
5
+ import requests
6
+
7
+
8
+ def get_geo_json(
9
+ level: str, year: Union[int, Any], with_water: bool = False, mra: bool = False
10
+ ) -> dict[str, str]:
11
+ """_summary_
12
+
13
+ Args:
14
+ level (str): 'stadsdelen'/'gebieden'/'wijken'/'buurten'
15
+ year (int): jaar
16
+
17
+ Returns:
18
+ dict[str, str]: geo json containg of the desired level and year
19
+ """
20
+ base_url = "https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/geo/"
21
+
22
+ if mra:
23
+ level = f"{level}-mra"
24
+ base_url = f"{base_url}mra/"
25
+ else:
26
+ base_url = f"{base_url}amsterdam/"
27
+
28
+ if (year <= 2020) & ~mra:
29
+ year = "2015-2020"
30
+
31
+ if with_water:
32
+ url = f"{base_url}/{year}/{level}-{year}-geo.json"
33
+ else:
34
+ url = f"{base_url}/{year}/{level}-{year}-zw-geo.json"
35
+
36
+ print(url)
37
+ json = requests.get(url).json()
38
+ return json
39
+
40
+
41
+ def extract_name_code_table(geo_json: dict[str, str]) -> dict[str, str]:
42
+ """_summary_
43
+
44
+ Args:
45
+ geo_json (dict[str, str]): geo_json of a specific level and year
46
+
47
+ Returns:
48
+ dict[str, str]: dictionary containing the mapping 'naam': 'year'
49
+ """
50
+ naam_code = {}
51
+ f: Any # Add explicit type hint for complex dict structure
52
+ for f in geo_json["features"]:
53
+ properties = f.get("properties")
54
+ naam_code[properties["naam"]] = properties["code"]
55
+ return naam_code
56
+
57
+
58
+ def get_geo_name_code(level: str, year: int, mra: bool = False) -> dict[str, str]:
59
+ """_summary_
60
+
61
+ Args:
62
+ level (str): 'stadsdelen'/'gebieden'/'wijken'/'buurten'
63
+ year (int): jaar
64
+
65
+ Returns:
66
+ dict[str, str]: _description_
67
+ """
68
+ json = get_geo_json(level=level, year=year)
69
+ name_code = extract_name_code_table(json)
70
+ return name_code
71
+
72
+
73
+ if __name__ == "__main__":
74
+ ...
75
+ # print(get_geo_json("buurten", 2021, mra=False))
76
+ # print(get_geo_json("buurten", 2018, mra=False))
77
+
78
+ # print(get_geo_json("buurten", 2021, mra=True))
79
+ # print(get_geo_json("buurten", 2018, mra=True))
80
+
81
+ print(get_geo_name_code("wijken", 2020, mra=False))
82
+ print(get_geo_name_code("wijken", 2020, mra=True))
83
+
84
+
85
+ # https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/geo/mra//2015-2020/buurten-mra-2015-2020-zw-geo.json
@@ -0,0 +1,39 @@
1
+ from __future__ import annotations
2
+
3
+ import functools
4
+ import time
5
+ from typing import Optional, Union
6
+
7
+ import pandas as pd
8
+
9
+
10
+ def time_it(func):
11
+ @functools.wraps(func)
12
+ def wrapper(*args, **kwargs):
13
+ start = time.perf_counter()
14
+ value = func(*args, **kwargs)
15
+ print(time.perf_counter() - start)
16
+ return value
17
+
18
+ return wrapper
19
+
20
+
21
+ def os_cut(
22
+ x: Union[list[Union[int, float]], pd.Series],
23
+ bins: list,
24
+ start_label: str = "lager dan",
25
+ end_label: str = "en hoger",
26
+ add_edge: Optional[int] = None,
27
+ sep: str = " - ",
28
+ ) -> pd.Series:
29
+ # Add non_overlap to left edge/boundary
30
+ if not add_edge:
31
+ add_edge = 0
32
+
33
+ start_l = [f"{start_label} {bins[1]}"]
34
+ inbetween_labels = [
35
+ f"{bins[i] + add_edge}{sep}{bins[i+1]}" for i in range(1, len(bins) - 2)
36
+ ]
37
+ end_l = [f"{bins[-2]} {end_label}"]
38
+
39
+ return pd.cut(x, bins=bins, labels=start_l + inbetween_labels + end_l) # type: ignore
File without changes
@@ -0,0 +1,48 @@
1
+ import requests
2
+
3
+
4
+ def get_os_colors(
5
+ type: str, kleur: str, aantal: str | int, invert: bool = False
6
+ ) -> list[str]:
7
+ """_summary_
8
+
9
+ Args:
10
+ type (str): type of (oplopend, uiteenlopend, discreet)
11
+ kleur (str):
12
+ oplopend:
13
+ 'blauw' |
14
+ 'paars' |
15
+ 'groen' |
16
+ 'roze' |
17
+ 'lichtblauw' |
18
+ 'oranje' |
19
+ 'lichtgroen' |
20
+ 'grijs'
21
+ uiteenlopend:
22
+ 'stoplicht (1-7)' |
23
+ 'blauw - grijs - groen (1-9)' |
24
+ 'paars - grijs - lichtblauw (1-9)' |
25
+ 'blauw - geel - groen (1-9)' |
26
+ 'rood - geel - lichtblauw (1-9)'
27
+ discreet:
28
+ 'discreet (1-9)' |
29
+ 'fruitig (1-9)' |
30
+ 'fruitig (1-9, anders gesorteerd)' |
31
+ 'waterkant (1-9)' |
32
+ 'waterkant (1-9, anders gesorteerd)' |
33
+ 'zonsondergang (1-9)'
34
+ aantal (str): number of colors returned
35
+ invert (bool, optional): invert colors. Defaults to False.
36
+
37
+ Returns:
38
+ list[str]: list with colors
39
+ """
40
+ url = "https://gitlab.com/os-amsterdam/tools-onderzoek-en-statistiek/-/raw/main/references/OS_colors.json"
41
+ colors = requests.get(url).json()
42
+
43
+ colors = colors[type][kleur][str(aantal)]
44
+
45
+ if invert:
46
+ colors = colors[::-1]
47
+
48
+ return colors
File without changes