toolsos 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toolsos/cbs_tools.py +36 -17
- toolsos/database/database_connection.py +0 -20
- toolsos/geo.py +1 -1
- toolsos/huisstijl/graphs/piegraph.py +1 -2
- toolsos/huisstijl/tables/tables.py +2 -0
- {toolsos-0.2.4.dist-info → toolsos-0.2.5.dist-info}/METADATA +24 -17
- {toolsos-0.2.4.dist-info → toolsos-0.2.5.dist-info}/RECORD +9 -9
- {toolsos-0.2.4.dist-info → toolsos-0.2.5.dist-info}/WHEEL +1 -1
- {toolsos-0.2.4.dist-info → toolsos-0.2.5.dist-info}/top_level.txt +0 -0
toolsos/cbs_tools.py
CHANGED
|
@@ -2,8 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import pickle
|
|
5
|
+
from datetime import datetime
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, Iterator, Optional
|
|
7
|
+
from typing import TYPE_CHECKING, Any, Iterator, Optional
|
|
7
8
|
|
|
8
9
|
import pandas as pd
|
|
9
10
|
import pyarrow as pa
|
|
@@ -14,18 +15,27 @@ if TYPE_CHECKING:
|
|
|
14
15
|
import pyreadstat
|
|
15
16
|
|
|
16
17
|
|
|
18
|
+
def get_batch_size(path, memory_limit):
|
|
19
|
+
df, _ = prs.read_sav(path, row_limit=1000)
|
|
20
|
+
|
|
21
|
+
# memory in megabytes
|
|
22
|
+
mem_size = df.memory_usage().sum() / 1_000_000
|
|
23
|
+
|
|
24
|
+
# The amount of blocks (of a thousand rows fit in the memory_limit)
|
|
25
|
+
n_blocks = memory_limit / mem_size
|
|
26
|
+
|
|
27
|
+
# Calculate the number of rows that fit within the memory limit
|
|
28
|
+
return round(n_blocks * 1000)
|
|
29
|
+
|
|
30
|
+
|
|
17
31
|
class SavToParquet:
|
|
18
32
|
def __init__(
|
|
19
|
-
self,
|
|
20
|
-
file: str,
|
|
21
|
-
folder_out: str,
|
|
22
|
-
chunksize: Optional[int] = None,
|
|
23
|
-
verbose: bool = False,
|
|
33
|
+
self, file: str, folder_out: str, verbose: bool = False, memory_limit=10_000
|
|
24
34
|
) -> None:
|
|
25
35
|
self.file = file
|
|
26
36
|
self.folder_out = folder_out
|
|
27
37
|
self.verbose = verbose
|
|
28
|
-
self.
|
|
38
|
+
self.memory_limit = memory_limit
|
|
29
39
|
|
|
30
40
|
@property
|
|
31
41
|
def path_out(self) -> str:
|
|
@@ -33,20 +43,27 @@ class SavToParquet:
|
|
|
33
43
|
|
|
34
44
|
@property
|
|
35
45
|
def chunks(self) -> Iterator[tuple["pyreadstat.metadata_container", pd.DataFrame]]:
|
|
36
|
-
return prs.read_file_in_chunks(
|
|
37
|
-
prs.read_sav, self.file, chunksize=self.chunksize
|
|
38
|
-
)
|
|
39
46
|
|
|
40
|
-
|
|
41
|
-
|
|
47
|
+
chunksize = get_batch_size(self.file, self.memory_limit)
|
|
48
|
+
|
|
49
|
+
if self.verbose:
|
|
50
|
+
print(f"Reading file in blocks of {chunksize} rows")
|
|
51
|
+
print("One such block should fit within the memory limit")
|
|
52
|
+
|
|
53
|
+
return prs.read_file_in_chunks(prs.read_sav, self.file, chunksize=chunksize)
|
|
42
54
|
|
|
43
55
|
def write_meta_to_json(self) -> None:
|
|
44
56
|
json_path = self.path_out.replace(".parquet", "_meta.json")
|
|
45
57
|
|
|
46
58
|
meta_dict = {}
|
|
47
|
-
for
|
|
48
|
-
if not
|
|
49
|
-
|
|
59
|
+
for attr_name in dir(self.meta):
|
|
60
|
+
if not attr_name.startswith("__"):
|
|
61
|
+
attr = getattr(self.meta, attr_name)
|
|
62
|
+
|
|
63
|
+
if isinstance(attr, datetime):
|
|
64
|
+
attr = attr.strftime("%Y-%m-%d %H:%M:%S")
|
|
65
|
+
|
|
66
|
+
meta_dict[attr_name] = attr
|
|
50
67
|
|
|
51
68
|
with open(json_path, "w") as file:
|
|
52
69
|
json.dump(meta_dict, file)
|
|
@@ -58,10 +75,12 @@ class SavToParquet:
|
|
|
58
75
|
pickle.dump(self.meta, file)
|
|
59
76
|
|
|
60
77
|
def write_to_parquet(self) -> None:
|
|
61
|
-
meta_df, self.meta = self.get_meta()
|
|
62
|
-
schema = table = pa.Table.from_pandas(meta_df).schema
|
|
63
78
|
|
|
64
79
|
print("Writing table")
|
|
80
|
+
|
|
81
|
+
line1, self.meta = prs.read_sav(self.file, row_limit=1)
|
|
82
|
+
schema = pa.Table.from_pandas(line1).schema
|
|
83
|
+
|
|
65
84
|
with pq.ParquetWriter(self.path_out, schema) as writer:
|
|
66
85
|
for idx, (df, _) in enumerate(self.chunks):
|
|
67
86
|
if self.verbose:
|
|
@@ -153,23 +153,3 @@ def write_multiple_pgpass(conn_details, path: str | None = None):
|
|
|
153
153
|
|
|
154
154
|
if os.name != "nt":
|
|
155
155
|
path.chmod("0600")
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
# Writing connection settings to pgpass.conf
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
if __name__ == "__main__":
|
|
162
|
-
...
|
|
163
|
-
# Examples
|
|
164
|
-
|
|
165
|
-
# Get database connection settings from yaml
|
|
166
|
-
engine_strings = get_db_connection_strings(
|
|
167
|
-
"src/toolsos/database/database_config.yml"
|
|
168
|
-
)
|
|
169
|
-
print(engine_strings.ruimte_analyse222)
|
|
170
|
-
|
|
171
|
-
# Get database connection settings from yaml and reset password
|
|
172
|
-
engine_strings = get_db_connection_strings(
|
|
173
|
-
"src/toolsos/database/database_config.yml", reset_pw=["ruimte_analyse222"]
|
|
174
|
-
)
|
|
175
|
-
print(engine_strings.ruimte_analyse222)
|
toolsos/geo.py
CHANGED
|
@@ -17,7 +17,7 @@ def get_geo_json(
|
|
|
17
17
|
Returns:
|
|
18
18
|
dict[str, str]: geo json containg of the desired level and year
|
|
19
19
|
"""
|
|
20
|
-
base_url = "https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/geo/"
|
|
20
|
+
base_url = "https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/public/geo/"
|
|
21
21
|
|
|
22
22
|
if mra:
|
|
23
23
|
level = f"{level}-mra"
|
|
@@ -56,6 +56,8 @@ def cols_to_str(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
56
56
|
Returns:
|
|
57
57
|
pd.DataFrame: Dataframe with column names as strings
|
|
58
58
|
"""
|
|
59
|
+
|
|
60
|
+
# Multiindex columns are always strings and therefore can't be casted as string
|
|
59
61
|
if df.columns.nlevels == 1:
|
|
60
62
|
df.columns = df.columns.astype(str)
|
|
61
63
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: toolsos
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: OS tools
|
|
5
5
|
Author-email: OS <d.schmitz@amsterdam.nl>
|
|
6
6
|
Keywords: tools,Onderzoek & Statistiek
|
|
@@ -10,22 +10,22 @@ Classifier: Programming Language :: Python :: 3
|
|
|
10
10
|
Requires-Python: >=3.10
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
12
|
Provides-Extra: all
|
|
13
|
-
Requires-Dist: keyring
|
|
14
|
-
Requires-Dist: openpyxl
|
|
15
|
-
Requires-Dist: pandas
|
|
16
|
-
Requires-Dist: plotly
|
|
17
|
-
Requires-Dist: polars
|
|
18
|
-
Requires-Dist: pyarrow
|
|
19
|
-
Requires-Dist: pyreadstat
|
|
20
|
-
Requires-Dist: pyyaml
|
|
21
|
-
Requires-Dist: requests
|
|
22
|
-
Requires-Dist: sqlalchemy
|
|
13
|
+
Requires-Dist: keyring; extra == "all"
|
|
14
|
+
Requires-Dist: openpyxl; extra == "all"
|
|
15
|
+
Requires-Dist: pandas; extra == "all"
|
|
16
|
+
Requires-Dist: plotly; extra == "all"
|
|
17
|
+
Requires-Dist: polars; extra == "all"
|
|
18
|
+
Requires-Dist: pyarrow; extra == "all"
|
|
19
|
+
Requires-Dist: pyreadstat; extra == "all"
|
|
20
|
+
Requires-Dist: pyyaml; extra == "all"
|
|
21
|
+
Requires-Dist: requests; extra == "all"
|
|
22
|
+
Requires-Dist: sqlalchemy; extra == "all"
|
|
23
23
|
Provides-Extra: dev
|
|
24
|
-
Requires-Dist: black
|
|
25
|
-
Requires-Dist: bumpver
|
|
26
|
-
Requires-Dist: isort
|
|
27
|
-
Requires-Dist: pip-tools
|
|
28
|
-
Requires-Dist: pytest
|
|
24
|
+
Requires-Dist: black; extra == "dev"
|
|
25
|
+
Requires-Dist: bumpver; extra == "dev"
|
|
26
|
+
Requires-Dist: isort; extra == "dev"
|
|
27
|
+
Requires-Dist: pip-tools; extra == "dev"
|
|
28
|
+
Requires-Dist: pytest; extra == "dev"
|
|
29
29
|
|
|
30
30
|
# Tools Onderzoek & Statistiek
|
|
31
31
|
|
|
@@ -63,5 +63,12 @@ Instructions on building a package can be found [here](https://packaging.python.
|
|
|
63
63
|
|
|
64
64
|
- make a pypi account
|
|
65
65
|
- ask to be added as collaborator to toolsos
|
|
66
|
-
- first update twine: py -m pip install --upgrade
|
|
66
|
+
- first update twine: py -m pip install --upgrade twine
|
|
67
67
|
- upload to pypi: twine upload dist/* --skip-existing
|
|
68
|
+
|
|
69
|
+
## Install to local enviroment for testing
|
|
70
|
+
|
|
71
|
+
- python -m venv local (maak een lokale venv aan)
|
|
72
|
+
- local\Scripts\activate (activeer de venv)
|
|
73
|
+
- pip install -e . (installer toolsos)
|
|
74
|
+
- pip install -r local_requirements.txt (installeer de benodigde dependencies)
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
toolsos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
toolsos/cbs_tools.py,sha256=
|
|
2
|
+
toolsos/cbs_tools.py,sha256=361cogk0aIU4D4BKHaa7YSOBh64t5C3zrHlqtWx0iIc,3465
|
|
3
3
|
toolsos/create_tables.py,sha256=43FHK3EERjumBtnGhngIdtthZzcc_Qi37lJ1MgATzBg,908
|
|
4
4
|
toolsos/download.py,sha256=88hehmPL5m5d1nrcJjltuh4xrCItF5EYHaZdHOcSt-g,2652
|
|
5
|
-
toolsos/geo.py,sha256=
|
|
5
|
+
toolsos/geo.py,sha256=arapy_ol6_so8KZ5gJk9ywXysSz4W8ah-cjrJ3DuxAo,2419
|
|
6
6
|
toolsos/helpers.py,sha256=VeOl-fLgePCbjEmAQdVmYe7z8OE1pISeDDuP1t5QSxM,997
|
|
7
7
|
toolsos/polars_helpers.py,sha256=P3RHLQFeDL7-9U_Q1n4ma_NSkdYAiker4pnc57uluHw,770
|
|
8
|
-
toolsos/database/database_connection.py,sha256=
|
|
8
|
+
toolsos/database/database_connection.py,sha256=_CW84UMElCI4ix0LqDWRCL6igMjuJilJYXxxWdMUcbA,4352
|
|
9
9
|
toolsos/database/database_transfer.py,sha256=1ghq5VEtKyOdCKdM45uOyrZSoXMuWsdC35R3WNuFvdU,1827
|
|
10
10
|
toolsos/huisstijl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
toolsos/huisstijl/colors.py,sha256=lSCHCdSjge5cGfLfAObd6mV6TaXq3QGImLOmoGJpGkw,1484
|
|
@@ -13,13 +13,13 @@ toolsos/huisstijl/graphs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
|
13
13
|
toolsos/huisstijl/graphs/bargraph.py,sha256=HYl01_euh23iDYSUhnAzYAXS0DhDpg9eLRjJEpeR6iU,2815
|
|
14
14
|
toolsos/huisstijl/graphs/graph_styles.py,sha256=Z9LLH7j8ODTsYMYK0rslacphuiRDcq5_IpSjEEiK2VY,975
|
|
15
15
|
toolsos/huisstijl/graphs/linegraph.py,sha256=dMUarRe31SXaY78OCXLy-PgnU8LlVJ9KkzKaHhDtuuI,698
|
|
16
|
-
toolsos/huisstijl/graphs/piegraph.py,sha256=
|
|
16
|
+
toolsos/huisstijl/graphs/piegraph.py,sha256=aEFiEM-9QuhBOjKHSXVuE5bTh-8uucq4FP6O8Vk1vZI,703
|
|
17
17
|
toolsos/huisstijl/graphs/styler.py,sha256=-uZ7pjY1G39XvmaGHQd31gPRxjxmJGhYZk8xhy2JUWc,6623
|
|
18
18
|
toolsos/huisstijl/tables/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
19
|
toolsos/huisstijl/tables/table_helpers.py,sha256=jsQ6lw93sxtGJGrUn8X2_LyA2vYYnytngpUI5A_wpWQ,2037
|
|
20
20
|
toolsos/huisstijl/tables/table_styles.py,sha256=oYU6GJcfqlKpZof5PUjPsA7woJ3Tew78CHPyT0_jY6w,1343
|
|
21
|
-
toolsos/huisstijl/tables/tables.py,sha256=
|
|
22
|
-
toolsos-0.2.
|
|
23
|
-
toolsos-0.2.
|
|
24
|
-
toolsos-0.2.
|
|
25
|
-
toolsos-0.2.
|
|
21
|
+
toolsos/huisstijl/tables/tables.py,sha256=2FO-ByLjgs-DbNgem3cDfYJbLbIDzRDqXtjL75WN7kY,24054
|
|
22
|
+
toolsos-0.2.5.dist-info/METADATA,sha256=rTUT5FhyCYenHMdaFTeU6v5LQymQDfMjdJOM1lTsTdM,2683
|
|
23
|
+
toolsos-0.2.5.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
|
24
|
+
toolsos-0.2.5.dist-info/top_level.txt,sha256=2ClEjUBbtfDQ8oPwvWRy1Sz2nrkLCXlg0mHaMdCWia0,8
|
|
25
|
+
toolsos-0.2.5.dist-info/RECORD,,
|
|
File without changes
|