toolsos 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {toolsos-0.2.4 → toolsos-0.2.5}/PKG-INFO +9 -2
  2. {toolsos-0.2.4 → toolsos-0.2.5}/README.md +8 -1
  3. {toolsos-0.2.4 → toolsos-0.2.5}/pyproject.toml +1 -1
  4. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/cbs_tools.py +36 -17
  5. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/database/database_connection.py +0 -20
  6. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/geo.py +1 -1
  7. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/graphs/piegraph.py +1 -2
  8. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/tables/tables.py +2 -0
  9. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos.egg-info/PKG-INFO +9 -2
  10. {toolsos-0.2.4 → toolsos-0.2.5}/setup.cfg +0 -0
  11. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/__init__.py +0 -0
  12. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/create_tables.py +0 -0
  13. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/database/database_transfer.py +0 -0
  14. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/download.py +0 -0
  15. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/helpers.py +0 -0
  16. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/__init__.py +0 -0
  17. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/colors.py +0 -0
  18. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/graphs/__init__.py +0 -0
  19. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/graphs/bargraph.py +0 -0
  20. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/graphs/graph_styles.py +0 -0
  21. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/graphs/linegraph.py +0 -0
  22. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/graphs/styler.py +0 -0
  23. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/tables/__init__.py +0 -0
  24. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/tables/table_helpers.py +0 -0
  25. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/huisstijl/tables/table_styles.py +0 -0
  26. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos/polars_helpers.py +0 -0
  27. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos.egg-info/SOURCES.txt +0 -0
  28. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos.egg-info/dependency_links.txt +0 -0
  29. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos.egg-info/requires.txt +0 -0
  30. {toolsos-0.2.4 → toolsos-0.2.5}/src/toolsos.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toolsos
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: OS tools
5
5
  Author-email: OS <d.schmitz@amsterdam.nl>
6
6
  Keywords: tools,Onderzoek & Statistiek
@@ -63,5 +63,12 @@ Instructions on building a package can be found [here](https://packaging.python.
63
63
 
64
64
  - make a pypi account
65
65
  - ask to be added as collaborator to toolsos
66
- - first update twine: py -m pip install --upgrade twin
66
+ - first update twine: py -m pip install --upgrade twine
67
67
  - upload to pypi: twine upload dist/* --skip-existing
68
+
69
+ ## Install to local enviroment for testing
70
+
71
+ - python -m venv local (maak een lokale venv aan)
72
+ - local\Scripts\activate (activeer de venv)
73
+ - pip install -e . (installer toolsos)
74
+ - pip install -r local_requirements.txt (installeer de benodigde dependencies)
@@ -34,5 +34,12 @@ Instructions on building a package can be found [here](https://packaging.python.
34
34
 
35
35
  - make a pypi account
36
36
  - ask to be added as collaborator to toolsos
37
- - first update twine: py -m pip install --upgrade twin
37
+ - first update twine: py -m pip install --upgrade twine
38
38
  - upload to pypi: twine upload dist/* --skip-existing
39
+
40
+ ## Install to local enviroment for testing
41
+
42
+ - python -m venv local (maak een lokale venv aan)
43
+ - local\Scripts\activate (activeer de venv)
44
+ - pip install -e . (installer toolsos)
45
+ - pip install -r local_requirements.txt (installeer de benodigde dependencies)
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "toolsos"
9
- version = "0.2.4"
9
+ version = "0.2.5"
10
10
  description = "OS tools"
11
11
  readme = "README.md"
12
12
  authors = [{ name = "OS", email = "d.schmitz@amsterdam.nl" }]
@@ -2,8 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import pickle
5
+ from datetime import datetime
5
6
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Iterator, Optional, Any
7
+ from typing import TYPE_CHECKING, Any, Iterator, Optional
7
8
 
8
9
  import pandas as pd
9
10
  import pyarrow as pa
@@ -14,18 +15,27 @@ if TYPE_CHECKING:
14
15
  import pyreadstat
15
16
 
16
17
 
18
+ def get_batch_size(path, memory_limit):
19
+ df, _ = prs.read_sav(path, row_limit=1000)
20
+
21
+ # memory in megabytes
22
+ mem_size = df.memory_usage().sum() / 1_000_000
23
+
24
+ # The amount of blocks (of a thousand rows fit in the memory_limit)
25
+ n_blocks = memory_limit / mem_size
26
+
27
+ # Calculate the number of rows that fit within the memory limit
28
+ return round(n_blocks * 1000)
29
+
30
+
17
31
  class SavToParquet:
18
32
  def __init__(
19
- self,
20
- file: str,
21
- folder_out: str,
22
- chunksize: Optional[int] = None,
23
- verbose: bool = False,
33
+ self, file: str, folder_out: str, verbose: bool = False, memory_limit=10_000
24
34
  ) -> None:
25
35
  self.file = file
26
36
  self.folder_out = folder_out
27
37
  self.verbose = verbose
28
- self.chunksize = 5_000_000 if not chunksize else chunksize
38
+ self.memory_limit = memory_limit
29
39
 
30
40
  @property
31
41
  def path_out(self) -> str:
@@ -33,20 +43,27 @@ class SavToParquet:
33
43
 
34
44
  @property
35
45
  def chunks(self) -> Iterator[tuple["pyreadstat.metadata_container", pd.DataFrame]]:
36
- return prs.read_file_in_chunks(
37
- prs.read_sav, self.file, chunksize=self.chunksize
38
- )
39
46
 
40
- def get_meta(self) -> Iterator:
41
- return prs.read_sav(self.file, row_limit=10)
47
+ chunksize = get_batch_size(self.file, self.memory_limit)
48
+
49
+ if self.verbose:
50
+ print(f"Reading file in blocks of {chunksize} rows")
51
+ print("One such block should fit within the memory limit")
52
+
53
+ return prs.read_file_in_chunks(prs.read_sav, self.file, chunksize=chunksize)
42
54
 
43
55
  def write_meta_to_json(self) -> None:
44
56
  json_path = self.path_out.replace(".parquet", "_meta.json")
45
57
 
46
58
  meta_dict = {}
47
- for attr in dir(self.meta):
48
- if not attr.startswith("__"):
49
- meta_dict[attr] = getattr(self.meta, attr)
59
+ for attr_name in dir(self.meta):
60
+ if not attr_name.startswith("__"):
61
+ attr = getattr(self.meta, attr_name)
62
+
63
+ if isinstance(attr, datetime):
64
+ attr = attr.strftime("%Y-%m-%d %H:%M:%S")
65
+
66
+ meta_dict[attr_name] = attr
50
67
 
51
68
  with open(json_path, "w") as file:
52
69
  json.dump(meta_dict, file)
@@ -58,10 +75,12 @@ class SavToParquet:
58
75
  pickle.dump(self.meta, file)
59
76
 
60
77
  def write_to_parquet(self) -> None:
61
- meta_df, self.meta = self.get_meta()
62
- schema = table = pa.Table.from_pandas(meta_df).schema
63
78
 
64
79
  print("Writing table")
80
+
81
+ line1, self.meta = prs.read_sav(self.file, row_limit=1)
82
+ schema = pa.Table.from_pandas(line1).schema
83
+
65
84
  with pq.ParquetWriter(self.path_out, schema) as writer:
66
85
  for idx, (df, _) in enumerate(self.chunks):
67
86
  if self.verbose:
@@ -153,23 +153,3 @@ def write_multiple_pgpass(conn_details, path: str | None = None):
153
153
 
154
154
  if os.name != "nt":
155
155
  path.chmod("0600")
156
-
157
-
158
- # Writing connection settings to pgpass.conf
159
-
160
-
161
- if __name__ == "__main__":
162
- ...
163
- # Examples
164
-
165
- # Get database connection settings from yaml
166
- engine_strings = get_db_connection_strings(
167
- "src/toolsos/database/database_config.yml"
168
- )
169
- print(engine_strings.ruimte_analyse222)
170
-
171
- # Get database connection settings from yaml and reset password
172
- engine_strings = get_db_connection_strings(
173
- "src/toolsos/database/database_config.yml", reset_pw=["ruimte_analyse222"]
174
- )
175
- print(engine_strings.ruimte_analyse222)
@@ -17,7 +17,7 @@ def get_geo_json(
17
17
  Returns:
18
18
  dict[str, str]: geo json containg of the desired level and year
19
19
  """
20
- base_url = "https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/geo/"
20
+ base_url = "https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/public/geo/"
21
21
 
22
22
  if mra:
23
23
  level = f"{level}-mra"
@@ -24,9 +24,8 @@ def pie(
24
24
  width=width,
25
25
  height=height,
26
26
  hole=hole,
27
- template=BaseStyle().get_base_template(),
27
+ template=BaseStyle().get_base_template(font=font),
28
28
  color_discrete_sequence=color_discrete_sequence,
29
- font=font,
30
29
  **kwargs,
31
30
  )
32
31
 
@@ -56,6 +56,8 @@ def cols_to_str(df: pd.DataFrame) -> pd.DataFrame:
56
56
  Returns:
57
57
  pd.DataFrame: Dataframe with column names as strings
58
58
  """
59
+
60
+ # Multiindex columns are always strings and therefore can't be casted as string
59
61
  if df.columns.nlevels == 1:
60
62
  df.columns = df.columns.astype(str)
61
63
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: toolsos
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: OS tools
5
5
  Author-email: OS <d.schmitz@amsterdam.nl>
6
6
  Keywords: tools,Onderzoek & Statistiek
@@ -63,5 +63,12 @@ Instructions on building a package can be found [here](https://packaging.python.
63
63
 
64
64
  - make a pypi account
65
65
  - ask to be added as collaborator to toolsos
66
- - first update twine: py -m pip install --upgrade twin
66
+ - first update twine: py -m pip install --upgrade twine
67
67
  - upload to pypi: twine upload dist/* --skip-existing
68
+
69
+ ## Install to local enviroment for testing
70
+
71
+ - python -m venv local (maak een lokale venv aan)
72
+ - local\Scripts\activate (activeer de venv)
73
+ - pip install -e . (installer toolsos)
74
+ - pip install -r local_requirements.txt (installeer de benodigde dependencies)
File without changes
File without changes
File without changes
File without changes