toolsos 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toolsos/cbs_tools.py CHANGED
@@ -2,8 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import pickle
5
+ from datetime import datetime
5
6
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Iterator, Optional, Any
7
+ from typing import TYPE_CHECKING, Any, Iterator, Optional
7
8
 
8
9
  import pandas as pd
9
10
  import pyarrow as pa
@@ -14,18 +15,27 @@ if TYPE_CHECKING:
14
15
  import pyreadstat
15
16
 
16
17
 
18
+ def get_batch_size(path, memory_limit):
19
+ df, _ = prs.read_sav(path, row_limit=1000)
20
+
21
+ # memory in megabytes
22
+ mem_size = df.memory_usage().sum() / 1_000_000
23
+
24
+ # The amount of blocks (of a thousand rows fit in the memory_limit)
25
+ n_blocks = memory_limit / mem_size
26
+
27
+ # Calculate the number of rows that fit within the memory limit
28
+ return round(n_blocks * 1000)
29
+
30
+
17
31
  class SavToParquet:
18
32
  def __init__(
19
- self,
20
- file: str,
21
- folder_out: str,
22
- chunksize: Optional[int] = None,
23
- verbose: bool = False,
33
+ self, file: str, folder_out: str, verbose: bool = False, memory_limit=10_000
24
34
  ) -> None:
25
35
  self.file = file
26
36
  self.folder_out = folder_out
27
37
  self.verbose = verbose
28
- self.chunksize = 5_000_000 if not chunksize else chunksize
38
+ self.memory_limit = memory_limit
29
39
 
30
40
  @property
31
41
  def path_out(self) -> str:
@@ -33,20 +43,27 @@ class SavToParquet:
33
43
 
34
44
  @property
35
45
  def chunks(self) -> Iterator[tuple["pyreadstat.metadata_container", pd.DataFrame]]:
36
- return prs.read_file_in_chunks(
37
- prs.read_sav, self.file, chunksize=self.chunksize
38
- )
39
46
 
40
- def get_meta(self) -> Iterator:
41
- return prs.read_sav(self.file, row_limit=10)
47
+ chunksize = get_batch_size(self.file, self.memory_limit)
48
+
49
+ if self.verbose:
50
+ print(f"Reading file in blocks of {chunksize} rows")
51
+ print("One such block should fit within the memory limit")
52
+
53
+ return prs.read_file_in_chunks(prs.read_sav, self.file, chunksize=chunksize)
42
54
 
43
55
  def write_meta_to_json(self) -> None:
44
56
  json_path = self.path_out.replace(".parquet", "_meta.json")
45
57
 
46
58
  meta_dict = {}
47
- for attr in dir(self.meta):
48
- if not attr.startswith("__"):
49
- meta_dict[attr] = getattr(self.meta, attr)
59
+ for attr_name in dir(self.meta):
60
+ if not attr_name.startswith("__"):
61
+ attr = getattr(self.meta, attr_name)
62
+
63
+ if isinstance(attr, datetime):
64
+ attr = attr.strftime("%Y-%m-%d %H:%M:%S")
65
+
66
+ meta_dict[attr_name] = attr
50
67
 
51
68
  with open(json_path, "w") as file:
52
69
  json.dump(meta_dict, file)
@@ -58,10 +75,12 @@ class SavToParquet:
58
75
  pickle.dump(self.meta, file)
59
76
 
60
77
  def write_to_parquet(self) -> None:
61
- meta_df, self.meta = self.get_meta()
62
- schema = table = pa.Table.from_pandas(meta_df).schema
63
78
 
64
79
  print("Writing table")
80
+
81
+ line1, self.meta = prs.read_sav(self.file, row_limit=1)
82
+ schema = pa.Table.from_pandas(line1).schema
83
+
65
84
  with pq.ParquetWriter(self.path_out, schema) as writer:
66
85
  for idx, (df, _) in enumerate(self.chunks):
67
86
  if self.verbose:
@@ -153,23 +153,3 @@ def write_multiple_pgpass(conn_details, path: str | None = None):
153
153
 
154
154
  if os.name != "nt":
155
155
  path.chmod("0600")
156
-
157
-
158
- # Writing connection settings to pgpass.conf
159
-
160
-
161
- if __name__ == "__main__":
162
- ...
163
- # Examples
164
-
165
- # Get database connection settings from yaml
166
- engine_strings = get_db_connection_strings(
167
- "src/toolsos/database/database_config.yml"
168
- )
169
- print(engine_strings.ruimte_analyse222)
170
-
171
- # Get database connection settings from yaml and reset password
172
- engine_strings = get_db_connection_strings(
173
- "src/toolsos/database/database_config.yml", reset_pw=["ruimte_analyse222"]
174
- )
175
- print(engine_strings.ruimte_analyse222)
toolsos/geo.py CHANGED
@@ -17,7 +17,7 @@ def get_geo_json(
17
17
  Returns:
18
18
  dict[str, str]: geo json containg of the desired level and year
19
19
  """
20
- base_url = "https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/geo/"
20
+ base_url = "https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/public/geo/"
21
21
 
22
22
  if mra:
23
23
  level = f"{level}-mra"
@@ -24,9 +24,8 @@ def pie(
24
24
  width=width,
25
25
  height=height,
26
26
  hole=hole,
27
- template=BaseStyle().get_base_template(),
27
+ template=BaseStyle().get_base_template(font=font),
28
28
  color_discrete_sequence=color_discrete_sequence,
29
- font=font,
30
29
  **kwargs,
31
30
  )
32
31
 
@@ -56,6 +56,8 @@ def cols_to_str(df: pd.DataFrame) -> pd.DataFrame:
56
56
  Returns:
57
57
  pd.DataFrame: Dataframe with column names as strings
58
58
  """
59
+
60
+ # Multiindex columns are always strings and therefore can't be casted as string
59
61
  if df.columns.nlevels == 1:
60
62
  df.columns = df.columns.astype(str)
61
63
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toolsos
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: OS tools
5
5
  Author-email: OS <d.schmitz@amsterdam.nl>
6
6
  Keywords: tools,Onderzoek & Statistiek
@@ -10,22 +10,22 @@ Classifier: Programming Language :: Python :: 3
10
10
  Requires-Python: >=3.10
11
11
  Description-Content-Type: text/markdown
12
12
  Provides-Extra: all
13
- Requires-Dist: keyring ; extra == 'all'
14
- Requires-Dist: openpyxl ; extra == 'all'
15
- Requires-Dist: pandas ; extra == 'all'
16
- Requires-Dist: plotly ; extra == 'all'
17
- Requires-Dist: polars ; extra == 'all'
18
- Requires-Dist: pyarrow ; extra == 'all'
19
- Requires-Dist: pyreadstat ; extra == 'all'
20
- Requires-Dist: pyyaml ; extra == 'all'
21
- Requires-Dist: requests ; extra == 'all'
22
- Requires-Dist: sqlalchemy ; extra == 'all'
13
+ Requires-Dist: keyring; extra == "all"
14
+ Requires-Dist: openpyxl; extra == "all"
15
+ Requires-Dist: pandas; extra == "all"
16
+ Requires-Dist: plotly; extra == "all"
17
+ Requires-Dist: polars; extra == "all"
18
+ Requires-Dist: pyarrow; extra == "all"
19
+ Requires-Dist: pyreadstat; extra == "all"
20
+ Requires-Dist: pyyaml; extra == "all"
21
+ Requires-Dist: requests; extra == "all"
22
+ Requires-Dist: sqlalchemy; extra == "all"
23
23
  Provides-Extra: dev
24
- Requires-Dist: black ; extra == 'dev'
25
- Requires-Dist: bumpver ; extra == 'dev'
26
- Requires-Dist: isort ; extra == 'dev'
27
- Requires-Dist: pip-tools ; extra == 'dev'
28
- Requires-Dist: pytest ; extra == 'dev'
24
+ Requires-Dist: black; extra == "dev"
25
+ Requires-Dist: bumpver; extra == "dev"
26
+ Requires-Dist: isort; extra == "dev"
27
+ Requires-Dist: pip-tools; extra == "dev"
28
+ Requires-Dist: pytest; extra == "dev"
29
29
 
30
30
  # Tools Onderzoek & Statistiek
31
31
 
@@ -63,5 +63,12 @@ Instructions on building a package can be found [here](https://packaging.python.
63
63
 
64
64
  - make a pypi account
65
65
  - ask to be added as collaborator to toolsos
66
- - first update twine: py -m pip install --upgrade twin
66
+ - first update twine: py -m pip install --upgrade twine
67
67
  - upload to pypi: twine upload dist/* --skip-existing
68
+
69
+ ## Install to local enviroment for testing
70
+
71
+ - python -m venv local (maak een lokale venv aan)
72
+ - local\Scripts\activate (activeer de venv)
73
+ - pip install -e . (installer toolsos)
74
+ - pip install -r local_requirements.txt (installeer de benodigde dependencies)
@@ -1,11 +1,11 @@
1
1
  toolsos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- toolsos/cbs_tools.py,sha256=VLhptzy7m5EaET07s6VeAfDz79g1Hs38qeWPyA14wkw,2823
2
+ toolsos/cbs_tools.py,sha256=361cogk0aIU4D4BKHaa7YSOBh64t5C3zrHlqtWx0iIc,3465
3
3
  toolsos/create_tables.py,sha256=43FHK3EERjumBtnGhngIdtthZzcc_Qi37lJ1MgATzBg,908
4
4
  toolsos/download.py,sha256=88hehmPL5m5d1nrcJjltuh4xrCItF5EYHaZdHOcSt-g,2652
5
- toolsos/geo.py,sha256=_OexkeUgXcnPW1mw27VN6fMcX2PMUSljLwIg48Xkv3M,2412
5
+ toolsos/geo.py,sha256=arapy_ol6_so8KZ5gJk9ywXysSz4W8ah-cjrJ3DuxAo,2419
6
6
  toolsos/helpers.py,sha256=VeOl-fLgePCbjEmAQdVmYe7z8OE1pISeDDuP1t5QSxM,997
7
7
  toolsos/polars_helpers.py,sha256=P3RHLQFeDL7-9U_Q1n4ma_NSkdYAiker4pnc57uluHw,770
8
- toolsos/database/database_connection.py,sha256=NTgwXLJ7LFrTlMr74W9Yge8hLyQG0DCW4t5UuwtoP18,4920
8
+ toolsos/database/database_connection.py,sha256=_CW84UMElCI4ix0LqDWRCL6igMjuJilJYXxxWdMUcbA,4352
9
9
  toolsos/database/database_transfer.py,sha256=1ghq5VEtKyOdCKdM45uOyrZSoXMuWsdC35R3WNuFvdU,1827
10
10
  toolsos/huisstijl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  toolsos/huisstijl/colors.py,sha256=lSCHCdSjge5cGfLfAObd6mV6TaXq3QGImLOmoGJpGkw,1484
@@ -13,13 +13,13 @@ toolsos/huisstijl/graphs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
13
13
  toolsos/huisstijl/graphs/bargraph.py,sha256=HYl01_euh23iDYSUhnAzYAXS0DhDpg9eLRjJEpeR6iU,2815
14
14
  toolsos/huisstijl/graphs/graph_styles.py,sha256=Z9LLH7j8ODTsYMYK0rslacphuiRDcq5_IpSjEEiK2VY,975
15
15
  toolsos/huisstijl/graphs/linegraph.py,sha256=dMUarRe31SXaY78OCXLy-PgnU8LlVJ9KkzKaHhDtuuI,698
16
- toolsos/huisstijl/graphs/piegraph.py,sha256=tHWQlM5BP03Pq044-pONoyYlDRNyG-hC1itwsoZ7DDA,714
16
+ toolsos/huisstijl/graphs/piegraph.py,sha256=aEFiEM-9QuhBOjKHSXVuE5bTh-8uucq4FP6O8Vk1vZI,703
17
17
  toolsos/huisstijl/graphs/styler.py,sha256=-uZ7pjY1G39XvmaGHQd31gPRxjxmJGhYZk8xhy2JUWc,6623
18
18
  toolsos/huisstijl/tables/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  toolsos/huisstijl/tables/table_helpers.py,sha256=jsQ6lw93sxtGJGrUn8X2_LyA2vYYnytngpUI5A_wpWQ,2037
20
20
  toolsos/huisstijl/tables/table_styles.py,sha256=oYU6GJcfqlKpZof5PUjPsA7woJ3Tew78CHPyT0_jY6w,1343
21
- toolsos/huisstijl/tables/tables.py,sha256=67vYOe4DAOyZDPCdc4BOEcPJ80IJLCGilHyQNSMrIvo,23967
22
- toolsos-0.2.4.dist-info/METADATA,sha256=TX31wvxxA7reLc6bKCtkzAuq20DELKfSkDQfvFSJAO8,2433
23
- toolsos-0.2.4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
24
- toolsos-0.2.4.dist-info/top_level.txt,sha256=2ClEjUBbtfDQ8oPwvWRy1Sz2nrkLCXlg0mHaMdCWia0,8
25
- toolsos-0.2.4.dist-info/RECORD,,
21
+ toolsos/huisstijl/tables/tables.py,sha256=2FO-ByLjgs-DbNgem3cDfYJbLbIDzRDqXtjL75WN7kY,24054
22
+ toolsos-0.2.5.dist-info/METADATA,sha256=rTUT5FhyCYenHMdaFTeU6v5LQymQDfMjdJOM1lTsTdM,2683
23
+ toolsos-0.2.5.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
24
+ toolsos-0.2.5.dist-info/top_level.txt,sha256=2ClEjUBbtfDQ8oPwvWRy1Sz2nrkLCXlg0mHaMdCWia0,8
25
+ toolsos-0.2.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5