toolsos 0.2.0__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {toolsos-0.2.0 → toolsos-0.2.6}/PKG-INFO +11 -4
  2. {toolsos-0.2.0 → toolsos-0.2.6}/README.md +8 -1
  3. {toolsos-0.2.0 → toolsos-0.2.6}/pyproject.toml +4 -4
  4. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/cbs_tools.py +36 -17
  5. {toolsos-0.2.0/src/toolsos → toolsos-0.2.6/src/toolsos/database}/database_connection.py +54 -13
  6. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/geo.py +1 -1
  7. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/graphs/bargraph.py +11 -1
  8. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/graphs/graph_styles.py +3 -1
  9. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/graphs/linegraph.py +13 -2
  10. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/graphs/piegraph.py +5 -1
  11. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/graphs/styler.py +21 -14
  12. toolsos-0.2.6/src/toolsos/huisstijl/tables/table_helpers.py +76 -0
  13. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/tables/tables.py +248 -107
  14. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos.egg-info/PKG-INFO +11 -4
  15. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos.egg-info/SOURCES.txt +3 -2
  16. {toolsos-0.2.0 → toolsos-0.2.6}/setup.cfg +0 -0
  17. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/__init__.py +0 -0
  18. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/create_tables.py +0 -0
  19. {toolsos-0.2.0/src/toolsos → toolsos-0.2.6/src/toolsos/database}/database_transfer.py +0 -0
  20. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/download.py +0 -0
  21. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/helpers.py +0 -0
  22. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/__init__.py +0 -0
  23. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/colors.py +0 -0
  24. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/graphs/__init__.py +0 -0
  25. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/tables/__init__.py +0 -0
  26. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/huisstijl/tables/table_styles.py +0 -0
  27. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos/polars_helpers.py +0 -0
  28. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos.egg-info/dependency_links.txt +0 -0
  29. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos.egg-info/requires.txt +0 -0
  30. {toolsos-0.2.0 → toolsos-0.2.6}/src/toolsos.egg-info/top_level.txt +0 -0
@@ -1,13 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: toolsos
3
- Version: 0.2.0
3
+ Version: 0.2.6
4
4
  Summary: OS tools
5
5
  Author-email: OS <d.schmitz@amsterdam.nl>
6
6
  Keywords: tools,Onderzoek & Statistiek
7
7
  Classifier: License :: OSI Approved :: MIT License
8
8
  Classifier: Programming Language :: Python
9
9
  Classifier: Programming Language :: Python :: 3
10
- Requires-Python: >=3.11
10
+ Requires-Python: >=3.10
11
11
  Description-Content-Type: text/markdown
12
12
  Provides-Extra: dev
13
13
  Requires-Dist: black; extra == "dev"
@@ -63,5 +63,12 @@ Instructions on building a package can be found [here](https://packaging.python.
63
63
 
64
64
  - make a pypi account
65
65
  - ask to be added as collaborator to toolsos
66
- - first update twine: py -m pip install --upgrade twin
66
+ - first update twine: py -m pip install --upgrade twine
67
67
  - upload to pypi: twine upload dist/* --skip-existing
68
+
69
+ ## Install to local enviroment for testing
70
+
71
+ - python -m venv local (maak een lokale venv aan)
72
+ - local\Scripts\activate (activeer de venv)
73
+ - pip install -e . (installer toolsos)
74
+ - pip install -r local_requirements.txt (installeer de benodigde dependencies)
@@ -34,5 +34,12 @@ Instructions on building a package can be found [here](https://packaging.python.
34
34
 
35
35
  - make a pypi account
36
36
  - ask to be added as collaborator to toolsos
37
- - first update twine: py -m pip install --upgrade twin
37
+ - first update twine: py -m pip install --upgrade twine
38
38
  - upload to pypi: twine upload dist/* --skip-existing
39
+
40
+ ## Install to local enviroment for testing
41
+
42
+ - python -m venv local (maak een lokale venv aan)
43
+ - local\Scripts\activate (activeer de venv)
44
+ - pip install -e . (installer toolsos)
45
+ - pip install -r local_requirements.txt (installeer de benodigde dependencies)
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
 
7
7
  [project]
8
8
  name = "toolsos"
9
- version = "0.2.0"
9
+ version = "0.2.6"
10
10
  description = "OS tools"
11
11
  readme = "README.md"
12
12
  authors = [{ name = "OS", email = "d.schmitz@amsterdam.nl" }]
@@ -25,7 +25,7 @@ keywords = ["tools", "Onderzoek & Statistiek"]
25
25
  # use pip install "toolsos[all]" to pip install with al dependencies
26
26
 
27
27
  dependencies = []
28
- requires-python = ">=3.11"
28
+ requires-python = ">=3.10"
29
29
 
30
30
  [project.optional-dependencies]
31
31
  dev = [
@@ -49,5 +49,5 @@ all = [
49
49
  "sqlalchemy",
50
50
  ]
51
51
 
52
- #[project.urls]
53
- #Homepage = "https://github.com/realpython/reader"
52
+ # [project.urls]
53
+ #Homepage = ""
@@ -2,8 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import pickle
5
+ from datetime import datetime
5
6
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Iterator, Optional, Any
7
+ from typing import TYPE_CHECKING, Any, Iterator, Optional
7
8
 
8
9
  import pandas as pd
9
10
  import pyarrow as pa
@@ -14,18 +15,27 @@ if TYPE_CHECKING:
14
15
  import pyreadstat
15
16
 
16
17
 
18
+ def get_batch_size(path, memory_limit):
19
+ df, _ = prs.read_sav(path, row_limit=1000)
20
+
21
+ # memory in megabytes
22
+ mem_size = df.memory_usage().sum() / 1_000_000
23
+
24
+ # The amount of blocks (of a thousand rows fit in the memory_limit)
25
+ n_blocks = memory_limit / mem_size
26
+
27
+ # Calculate the number of rows that fit within the memory limit
28
+ return round(n_blocks * 1000)
29
+
30
+
17
31
  class SavToParquet:
18
32
  def __init__(
19
- self,
20
- file: str,
21
- folder_out: str,
22
- chunksize: Optional[int] = None,
23
- verbose: bool = False,
33
+ self, file: str, folder_out: str, verbose: bool = False, memory_limit=10_000
24
34
  ) -> None:
25
35
  self.file = file
26
36
  self.folder_out = folder_out
27
37
  self.verbose = verbose
28
- self.chunksize = 5_000_000 if not chunksize else chunksize
38
+ self.memory_limit = memory_limit
29
39
 
30
40
  @property
31
41
  def path_out(self) -> str:
@@ -33,20 +43,27 @@ class SavToParquet:
33
43
 
34
44
  @property
35
45
  def chunks(self) -> Iterator[tuple["pyreadstat.metadata_container", pd.DataFrame]]:
36
- return prs.read_file_in_chunks(
37
- prs.read_sav, self.file, chunksize=self.chunksize
38
- )
39
46
 
40
- def get_meta(self) -> Iterator:
41
- return prs.read_sav(self.file, row_limit=10)
47
+ chunksize = get_batch_size(self.file, self.memory_limit)
48
+
49
+ if self.verbose:
50
+ print(f"Reading file in blocks of {chunksize} rows")
51
+ print("One such block should fit within the memory limit")
52
+
53
+ return prs.read_file_in_chunks(prs.read_sav, self.file, chunksize=chunksize)
42
54
 
43
55
  def write_meta_to_json(self) -> None:
44
56
  json_path = self.path_out.replace(".parquet", "_meta.json")
45
57
 
46
58
  meta_dict = {}
47
- for attr in dir(self.meta):
48
- if not attr.startswith("__"):
49
- meta_dict[attr] = getattr(self.meta, attr)
59
+ for attr_name in dir(self.meta):
60
+ if not attr_name.startswith("__"):
61
+ attr = getattr(self.meta, attr_name)
62
+
63
+ if isinstance(attr, datetime):
64
+ attr = attr.strftime("%Y-%m-%d %H:%M:%S")
65
+
66
+ meta_dict[attr_name] = attr
50
67
 
51
68
  with open(json_path, "w") as file:
52
69
  json.dump(meta_dict, file)
@@ -58,10 +75,12 @@ class SavToParquet:
58
75
  pickle.dump(self.meta, file)
59
76
 
60
77
  def write_to_parquet(self) -> None:
61
- meta_df, self.meta = self.get_meta()
62
- schema = table = pa.Table.from_pandas(meta_df).schema
63
78
 
64
79
  print("Writing table")
80
+
81
+ line1, self.meta = prs.read_sav(self.file, row_limit=1)
82
+ schema = pa.Table.from_pandas(line1).schema
83
+
65
84
  with pq.ParquetWriter(self.path_out, schema) as writer:
66
85
  for idx, (df, _) in enumerate(self.chunks):
67
86
  if self.verbose:
@@ -2,8 +2,10 @@ from __future__ import annotations
2
2
 
3
3
  import getpass
4
4
  import json
5
+ import os
5
6
  import subprocess
6
7
  from json import JSONDecodeError
8
+ from pathlib import Path
7
9
  from typing import Optional
8
10
 
9
11
  import keyring
@@ -33,7 +35,7 @@ def get_db_connection_strings(
33
35
  for dbname, params in db_info.items():
34
36
  flush = dbname in reset_pw if reset_pw else False
35
37
 
36
- if params["pw"] == "acces_token":
38
+ if params["password"] == "access_token":
37
39
  pw = get_azure_access_token()
38
40
  else:
39
41
  pw = get_pw_from_keyring(dbname=dbname, user=params["user"], reset_pw=flush)
@@ -94,21 +96,60 @@ def get_azure_access_token():
94
96
  result = subprocess.run(command, capture_output=True, shell=True, text=True)
95
97
 
96
98
  try:
97
- json.loads(result.stdout)["accessToken"]
99
+ return json.loads(result.stdout)["accessToken"]
98
100
  except JSONDecodeError:
99
101
  subprocess.run("az login", shell=True)
100
102
 
101
103
 
102
- if __name__ == "__main__":
103
- ...
104
- # Examples
104
+ def get_token_from_pgpass() -> None:
105
+ p = Path(os.getenv("APPDATA")) / "postgresql" / "pgpass.conf"
106
+ with open(p) as f:
107
+ token = f.readline().split(":")[4]
105
108
 
106
- # Get database connection settings from yaml
107
- # engine_strings = get_db_connection_strings("python/database_config.yml")
108
- # print(engine_strings.ruimte_analyse222)
109
+ return token
109
110
 
110
- # Get database connection settings from yaml and reset password
111
- # engine_strings = get_db_connection_strings(
112
- # "python/database_config.yml", reset_pw=["ruimte_analyse222"]
113
- # )
114
- # print(engine_strings.ruimte_analyse222)
111
+
112
+ def write_pgpass(
113
+ host: str, port: str, database: str, user: str, path: str | None = None
114
+ ) -> None:
115
+ password = get_azure_access_token()
116
+ conn_string = f"{host}:{port}:{database}:{user}:{password}"
117
+
118
+ if not path:
119
+ if os.name == "nt":
120
+ path = Path(os.getenv("APPDATA")) / "postgresql" / "pgpass.conf"
121
+ else:
122
+ path = Path("$home/.pgpass")
123
+
124
+ if not path.parent.exists():
125
+ path.parent.mkdir()
126
+
127
+ with open(path, "w") as f:
128
+ f.write(conn_string)
129
+
130
+ if os.name != "nt":
131
+ path.chmod("0600")
132
+
133
+
134
+ def write_multiple_pgpass(conn_details, path: str | None = None):
135
+ password = get_azure_access_token()
136
+
137
+ conn_strings = []
138
+ for c in conn_details:
139
+ c_string = f'{c["host"]}:{c["port"]}:{c["database"]}:{c["user"]}:{password}'
140
+ conn_strings.append(c_string)
141
+
142
+ if not path:
143
+ if os.name == "nt":
144
+ path = Path(os.getenv("APPDATA")) / "postgresql" / "pgpass.conf"
145
+ else:
146
+ path = Path("$home/.pgpass")
147
+
148
+ if not path.parent.exists():
149
+ path.parent.mkdir()
150
+
151
+ with open(path, "w") as f:
152
+ f.writelines(line + "\n" for line in conn_strings)
153
+
154
+ if os.name != "nt":
155
+ path.chmod("0600")
@@ -17,7 +17,7 @@ def get_geo_json(
17
17
  Returns:
18
18
  dict[str, str]: geo json containg of the desired level and year
19
19
  """
20
- base_url = "https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/geo/"
20
+ base_url = "https://gitlab.com/os-amsterdam/datavisualisatie-onderzoek-en-statistiek/-/raw/main/public/geo/"
21
21
 
22
22
  if mra:
23
23
  level = f"{level}-mra"
@@ -1,4 +1,5 @@
1
1
  import plotly.express as px
2
+
2
3
  from .styler import BaseStyle
3
4
 
4
5
  basestyle = BaseStyle()
@@ -14,6 +15,7 @@ def bar(
14
15
  barmode=None,
15
16
  width=750,
16
17
  height=490,
18
+ font="Amsterdam Sans",
17
19
  **kwargs,
18
20
  ):
19
21
  fig = px.bar(
@@ -21,7 +23,7 @@ def bar(
21
23
  x=x,
22
24
  y=y,
23
25
  color=color,
24
- template=basestyle.get_base_template("bar", orientation=orientation),
26
+ template=basestyle.get_base_template("bar", orientation=orientation, font=font),
25
27
  width=width,
26
28
  color_discrete_sequence=color_discrete_sequence,
27
29
  height=height,
@@ -43,6 +45,7 @@ def stacked_single(
43
45
  color: str = None,
44
46
  color_discrete_sequence: list = None,
45
47
  orientation="v",
48
+ font="Amsterdam Sans",
46
49
  **kwargs,
47
50
  ):
48
51
  fig = bar(
@@ -53,6 +56,7 @@ def stacked_single(
53
56
  color_discrete_sequence=color_discrete_sequence,
54
57
  barmode="relative",
55
58
  orientation=orientation,
59
+ font=font,
56
60
  **kwargs,
57
61
  )
58
62
 
@@ -71,6 +75,7 @@ def stacked_multiple(
71
75
  color: str = None,
72
76
  color_discrete_sequence: list = None,
73
77
  orientation="v",
78
+ font="Amsterdam Sans",
74
79
  **kwargs,
75
80
  ):
76
81
  fig = bar(
@@ -81,6 +86,7 @@ def stacked_multiple(
81
86
  color_discrete_sequence=color_discrete_sequence,
82
87
  barmode="stack",
83
88
  orientation=orientation,
89
+ font=font,
84
90
  **kwargs,
85
91
  )
86
92
 
@@ -94,6 +100,7 @@ def grouped(
94
100
  color: str = None,
95
101
  color_discrete_sequence: list = None,
96
102
  orientation="v",
103
+ font="Amsterdam Sans",
97
104
  **kwargs,
98
105
  ):
99
106
  fig = bar(
@@ -104,6 +111,7 @@ def grouped(
104
111
  color_discrete_sequence=color_discrete_sequence,
105
112
  barmode="group",
106
113
  orientation=orientation,
114
+ font=font,
107
115
  **kwargs,
108
116
  )
109
117
 
@@ -116,6 +124,7 @@ def single(
116
124
  y: str,
117
125
  color_discrete_sequence: list = None,
118
126
  orientation="v",
127
+ font="Amsterdam Sans",
119
128
  **kwargs,
120
129
  ):
121
130
  fig = bar(
@@ -124,6 +133,7 @@ def single(
124
133
  y=y,
125
134
  color_discrete_sequence=color_discrete_sequence,
126
135
  orientation=orientation,
136
+ font=font,
127
137
  **kwargs,
128
138
  )
129
139
 
@@ -16,8 +16,10 @@ STYLE_OLD = {
16
16
 
17
17
  STYLE_NEW = {
18
18
  "font_bold": {"family": "Amsterdam Sans ExtraBold, Corbel", "size": 15},
19
+ "font_bold_corbel": {"family": "Corbel Bold", "size": 15},
19
20
  "font": {"family": "Amsterdam Sans, Corbel", "size": 15},
20
- "axis_font": {"family": font, "size": 15},
21
+ "font_corbel": {"family": "Corbel", "size": 15},
22
+ "axis_font": {"family": "Amsterdam Sans ExtraBold, Corbel", "size": 15},
21
23
  "plot_bgcolor": "#FFFFFF",
22
24
  "gridline_color": "#dbdbdb",
23
25
  "gridline_width": 0.75,
@@ -5,7 +5,17 @@ from .styler import BaseStyle
5
5
  basestyle = BaseStyle()
6
6
 
7
7
 
8
- def line(data, x, y, color: None, width=750, height=490, **kwargs):
8
+ def line(
9
+ data,
10
+ x,
11
+ y,
12
+ color: None,
13
+ width=750,
14
+ height=490,
15
+ color_discrete_sequence=None,
16
+ font="Amsterdam Sans",
17
+ **kwargs,
18
+ ):
9
19
  fig = px.line(
10
20
  data_frame=data,
11
21
  x=x,
@@ -13,7 +23,8 @@ def line(data, x, y, color: None, width=750, height=490, **kwargs):
13
23
  color=color,
14
24
  width=width,
15
25
  height=height,
16
- template=BaseStyle().get_base_template(graph_type="line"),
26
+ color_discrete_sequence=color_discrete_sequence,
27
+ template=BaseStyle().get_base_template(graph_type="line", font=font),
17
28
  **kwargs,
18
29
  )
19
30
 
@@ -1,4 +1,5 @@
1
1
  import plotly.express as px
2
+
2
3
  from .styler import BaseStyle
3
4
 
4
5
  basestyle = BaseStyle()
@@ -12,6 +13,8 @@ def pie(
12
13
  width=750,
13
14
  height=490,
14
15
  text_format: str = None,
16
+ color_discrete_sequence=None,
17
+ font="Amsterdam Sans",
15
18
  **kwargs,
16
19
  ):
17
20
  fig = px.pie(
@@ -21,7 +24,8 @@ def pie(
21
24
  width=width,
22
25
  height=height,
23
26
  hole=hole,
24
- template=BaseStyle().get_base_template(),
27
+ template=BaseStyle().get_base_template(font=font),
28
+ color_discrete_sequence=color_discrete_sequence,
25
29
  **kwargs,
26
30
  )
27
31
 
@@ -109,7 +109,6 @@ class BaseStyle:
109
109
  self.style = json.load(file)
110
110
 
111
111
  def _get_axis_format(self):
112
- self.gridline_color = "#dbdbdb" # Jorren vragen om deze aan te passen
113
112
 
114
113
  return {
115
114
  "zerolinecolor": self.style["gridline_color"],
@@ -121,29 +120,37 @@ class BaseStyle:
121
120
  "showgrid": self.style["showgrid"],
122
121
  }
123
122
 
124
- def _get_base_template_layout(self):
123
+ def _get_base_template_layout(self, font):
124
+ if font == "Amsterdam Sans":
125
+ font_ = self.style["font"]
126
+ font_bold_ = self.style["font_bold"]
127
+ elif font == "Corbel":
128
+ font_ = self.style["font_corbel"]
129
+ font_bold_ = self.style["font_bold_corbel"]
130
+ else:
131
+ raise ValueError("Font should be 'Amsterdam Sans' or 'Corbel'")
132
+
125
133
  return go.layout.Template(
126
134
  layout={
127
- # "font": self.styles["font_bold"],
128
135
  "xaxis": {
129
- "tickfont": self.style["font_bold"],
136
+ "tickfont": font_bold_,
130
137
  },
131
138
  "yaxis": {
132
- "tickfont": {
133
- "family": self.style["axis_font"]["family"],
134
- "size": self.style["axis_font"]["size"],
135
- },
139
+ "tickfont": font_bold_,
136
140
  },
137
- "legend": {"font": self.style["font"]},
141
+ "legend": {"font": font_},
138
142
  "plot_bgcolor": self.style["plot_bgcolor"],
139
- # "colorway": self.colors["darkblue_lightblue_gradient_5"],
140
- "separators": ",", # Jorren vragen om deze toe te voegen
141
- "font": self.style["font_bold"],
143
+ "separators": ",",
144
+ "font": font_bold_,
142
145
  }
143
146
  )
144
147
 
145
148
  def get_base_template(
146
- self, graph_type: str = None, orientation: str = None, colors: str = None
149
+ self,
150
+ graph_type: str = None,
151
+ orientation: str = None,
152
+ colors: str = None,
153
+ font: str = "Amsterdam Sans",
147
154
  ):
148
155
  """[summary]
149
156
 
@@ -158,7 +165,7 @@ class BaseStyle:
158
165
  Returns:
159
166
  [type]: [description]
160
167
  """
161
- base_template = self._get_base_template_layout()
168
+ base_template = self._get_base_template_layout(font)
162
169
  axis_format = self._get_axis_format()
163
170
 
164
171
  if graph_type == "bar":
@@ -0,0 +1,76 @@
1
+ from pathlib import Path
2
+
3
+ import pandas as pd
4
+ import win32com.client as win32
5
+
6
+
7
+ def remove_underscores_from_columns(df: pd.DataFrame) -> pd.DataFrame:
8
+ df.columns = df.columns.str.replace("_", " ")
9
+ return df
10
+
11
+
12
+ def get_excel_files_from_folder(folder: str) -> list[str]:
13
+ return [str(f.resolve()) for f in Path("to_merge").glob("*") if f.suffix == ".xlsx"]
14
+
15
+
16
+ def combine_excel_files(out_path: str, files: list[str] = None, overwrite: bool = True):
17
+ out_path = Path(out_path)
18
+
19
+ if overwrite:
20
+ if out_path.exists():
21
+ out_path.unlink()
22
+
23
+ # INITIALIZE EXCEL COM APP
24
+ try:
25
+ xlapp = win32.gencache.EnsureDispatch("Excel.Application")
26
+
27
+ # constants
28
+ xlPasteValues = -4163
29
+ lPasteFormats = -4122
30
+ xlWorkbookDefault = 51
31
+
32
+ # create new workbook
33
+ new_wb = xlapp.Workbooks.Add()
34
+ new_wb.SaveAs(Filename=str(out_path), FileFormat=xlWorkbookDefault)
35
+
36
+ dup_count = 1
37
+
38
+ for wb in files:
39
+ xlwb = xlapp.Workbooks.Open(wb)
40
+
41
+ for xlsh in xlwb.Worksheets:
42
+ new_sh = new_wb.Worksheets.Add()
43
+
44
+ try:
45
+ new_sh.Name = xlsh.Name
46
+
47
+ # Ugly non defined exception. Be aware that this wil caputre
48
+ except Exception as e:
49
+ new_sh.Name = f"{xlsh.Name}_{dup_count}"
50
+ dup_count += 1
51
+
52
+ new_wb.Save()
53
+ new_sh.Move(After=new_wb.Worksheets(new_wb.Worksheets.Count))
54
+
55
+ xlsh.Cells.Copy(new_sh.Cells)
56
+ new_sh = None
57
+
58
+ xlwb.Close(True)
59
+ xlwb = None
60
+
61
+ # remove default blad1
62
+ new_wb.Worksheets("Blad1").Delete()
63
+ new_wb.Save()
64
+
65
+ except Exception as e:
66
+ print(e)
67
+
68
+ # RELEASE RESOURCES
69
+ finally:
70
+ xlsh = None
71
+ new_sh = None
72
+ xlwb = None
73
+ new_wb = None
74
+ xlapp.Quit()
75
+ xlapp = None
76
+ xlwb = None
@@ -7,8 +7,7 @@ from typing import Any, Callable, Dict
7
7
  import numpy as np
8
8
  import pandas as pd
9
9
  from openpyxl import Workbook
10
- from openpyxl.styles import (Alignment, Border, Font, PatternFill, Protection,
11
- Side)
10
+ from openpyxl.styles import Alignment, Border, Font, PatternFill, Protection, Side
12
11
  from openpyxl.utils import get_column_letter
13
12
 
14
13
  Fmt = list[list[dict[str, Any]]]
@@ -47,6 +46,38 @@ def set_global_style(style: str) -> None:
47
46
  STYLES = STYLE_NEW
48
47
 
49
48
 
49
+ def cols_to_str(df: pd.DataFrame) -> pd.DataFrame:
50
+ """Change column names in to string. Multiindex column names are nog changed because
51
+ these are always strings
52
+
53
+ Args:
54
+ df (pd.DataFrame): Dataframe
55
+
56
+ Returns:
57
+ pd.DataFrame: Dataframe with column names as strings
58
+ """
59
+
60
+ # Multiindex columns are always strings and therefore can't be casted as string
61
+ if df.columns.nlevels == 1:
62
+ df.columns = df.columns.astype(str)
63
+
64
+ return df
65
+
66
+
67
+ def get_max_col_widths(data: pd.DataFrame | np.ndarray) -> list[float]:
68
+ col_widths = []
69
+ if isinstance(data, pd.DataFrame):
70
+ for col in zip(*flatten_multiindex_columns(data)):
71
+ col_widths.append(max(len(e) for e in col))
72
+ else:
73
+ for col in zip(*data):
74
+ col_widths.append(max(len(str(e)) for e in col))
75
+
76
+ col_widths = [col_width * 1.13 for col_width in col_widths]
77
+
78
+ return col_widths
79
+
80
+
50
81
  def flatten_multiindex_columns(df):
51
82
  column_multi = []
52
83
  for level in range(df.columns.nlevels):
@@ -68,7 +99,18 @@ def df_to_array(df: pd.DataFrame) -> np.ndarray:
68
99
  return np.vstack([column_names, df.to_numpy()])
69
100
 
70
101
 
71
- def get_cells_to_merge(df: pd.DataFrame) -> list[list[int]]:
102
+ def get_cells_to_merge(df: pd.DataFrame) -> dict[int : list[int, int]]:
103
+ """Pandas dataframes sometimes have mutliindex columns. For all but the last level
104
+ a dictionary is created to merge the cells. The last level isn't merged because these
105
+ are these contain unique column names
106
+
107
+ Args:
108
+ df (pd.DataFrame): Pandas dataframe. If the dataframe has multicolumn indices
109
+ a dictionary containg the cells to merge is returned
110
+
111
+ Returns:
112
+ dict[int: list[int, int]]: Dictionary containg the cells to merge
113
+ """
72
114
  levels = flatten_multiindex_columns(df)[:-1]
73
115
 
74
116
  cells_to_merge = {}
@@ -331,14 +373,15 @@ def cell_formatting(
331
373
  return fmt
332
374
 
333
375
 
334
- def write_worksheet(
376
+ def write_to_worksheet(
335
377
  ws: Any,
336
378
  arr: np.ndarray,
337
379
  fmt: Fmt,
338
380
  title: str | None = None,
339
381
  source: str | None = None,
340
382
  col_filter: bool | None = None,
341
- autofit_columns: bool | None = None,
383
+ col_widths: list | None = None,
384
+ min_column_width: int | None = None,
342
385
  cells_to_merge: list[list[int]] | None = None,
343
386
  ) -> None:
344
387
  """Writing data to worksheet. Used for writing values to cells and formatting the cells
@@ -379,36 +422,39 @@ def write_worksheet(
379
422
  filters = ws.auto_filter
380
423
  filters.ref = f"A1:{excel_style(len(fmt), len(fmt[0]))}"
381
424
 
382
- if autofit_columns:
383
- _autofit_columns(ws)
384
-
385
425
  if source:
386
426
  _insert_source(ws, source, arr)
387
427
 
428
+ if col_widths:
429
+ _set_column_widths(ws, col_widths, min_column_width)
430
+
388
431
  if title:
389
432
  _insert_title(ws, title)
390
433
 
391
434
  if cells_to_merge:
392
- _merge_cells(ws, cells_to_merge)
435
+ _merge_cells(ws, cells_to_merge, title)
393
436
 
394
437
 
395
- # def _set_column_width(ws: Any, column_widths: list) -> None:
396
- # for i, column_number in enumerate(range(ws.max_column)):
397
- # column_letter = get_column_letter(column_letter)
398
- # column_width = column_widths[i]
399
- # ws.column_dimensions[column_letter].width = column_width
438
+ def _set_column_widths(
439
+ ws: Any, col_widths: list[int], min_column_width: int | None
440
+ ) -> None:
441
+ for idx, col_width in enumerate(col_widths):
442
+ col_letter = get_column_letter(idx + 1)
400
443
 
444
+ if min_column_width:
445
+ if col_width < min_column_width:
446
+ col_width = min_column_width
447
+
448
+ ws.column_dimensions[col_letter].width = col_width
401
449
 
402
- def _autofit_columns(ws: Any) -> None:
403
- column_letters = tuple(
404
- get_column_letter(col_number + 1) for col_number in range(ws.max_column)
405
- )
406
- for column_letter in column_letters:
407
- ws.column_dimensions[column_letter].auto_fit = True
408
450
 
451
+ def _merge_cells(ws, cells_to_merge, title: str | None = None) -> None:
452
+ add = 0
453
+ if title:
454
+ add = 1
409
455
 
410
- def _merge_cells(ws, cells_to_merge):
411
456
  for row_idx, merge in cells_to_merge.items():
457
+ row_idx = row_idx + add
412
458
  for start, stop in merge:
413
459
  cell = ws.cell(row_idx + 1, start)
414
460
  cell.alignment = Alignment(horizontal="center")
@@ -422,14 +468,15 @@ def _merge_cells(ws, cells_to_merge):
422
468
 
423
469
  def _insert_source(ws, source, arr):
424
470
  height, width = arr.shape
425
- cell = ws.cell(width, height + 1, source)
426
- cell.font = Font(**STYLES["calibri"]["font"])
471
+ cell = ws.cell(height + 1, width, source)
427
472
  cell.alignment = Alignment(horizontal="right")
473
+ cell.font = Font(**STYLES["calibri"]["font"])
428
474
 
429
475
 
430
476
  def _insert_title(ws: Any, title: str) -> None:
431
477
  ws.insert_rows(0)
432
478
  cell = ws.cell(1, 1, title)
479
+ cell.alignment = Alignment(horizontal="left")
433
480
  for t, kwa in STYLES["title_bold"].items():
434
481
  setattr(cell, t, LOOKUP[t](**kwa))
435
482
 
@@ -455,10 +502,99 @@ def write_table(
455
502
  blue_border: bool | None = True,
456
503
  blue_border_row_ids: int | list[int] | None = None,
457
504
  number_format: str = "0.0",
458
- autofit_columns: bool | None = False,
505
+ autofit_columns: str | None = "column_names",
506
+ min_column_width: int | None = None,
459
507
  col_filter: bool | None = False,
460
508
  style: str = "old",
461
509
  combine_multiindex: bool | int = False,
510
+ column_names_to_string: bool = True,
511
+ ):
512
+ wb = Workbook()
513
+ # Empty sheet is created on Workbook creation
514
+ del wb["Sheet"]
515
+
516
+ set_global_style(style)
517
+
518
+ if not isinstance(data, dict):
519
+ data = {"Sheet1": data}
520
+
521
+ for sheet_name, df in data.items():
522
+ format_worksheet(
523
+ wb=wb,
524
+ df=df,
525
+ sheet_name=sheet_name,
526
+ header_row=header_row,
527
+ title=title,
528
+ source=source,
529
+ total_row=total_row,
530
+ light_blue_row_ids=light_blue_row_ids,
531
+ total_col=total_col,
532
+ right_align_ids=right_align_ids,
533
+ right_align_pattern=right_align_pattern,
534
+ right_align_numeric=right_align_numeric,
535
+ left_align_ids=left_align_ids,
536
+ left_align_pattern=left_align_pattern,
537
+ left_align_string=left_align_string,
538
+ perc_ids=perc_ids,
539
+ perc_pattern=perc_pattern,
540
+ perc_col_format=perc_col_format,
541
+ blue_border=blue_border,
542
+ blue_border_row_ids=blue_border_row_ids,
543
+ number_format=number_format,
544
+ autofit_columns=autofit_columns,
545
+ min_column_width=min_column_width,
546
+ col_filter=col_filter,
547
+ combine_multiindex=combine_multiindex,
548
+ column_names_to_string=column_names_to_string,
549
+ )
550
+
551
+ wb.save(file)
552
+
553
+
554
+ def write_table_from_dict(
555
+ file,
556
+ write_info,
557
+ style: str = "old",
558
+ ):
559
+ wb = Workbook()
560
+ # Empty sheet is created on Workbook creation
561
+ del wb["Sheet"]
562
+
563
+ set_global_style(style)
564
+
565
+ for sheet in write_info:
566
+ format_worksheet(wb=wb, **sheet)
567
+
568
+ wb.save(file)
569
+
570
+
571
+ def format_worksheet(
572
+ wb: Any,
573
+ df: pd.DataFrame,
574
+ sheet_name: str,
575
+ header_row: int = 0,
576
+ title: str | dict[str, str] | None = None,
577
+ source: str | None = None,
578
+ total_row: bool | None = None,
579
+ light_blue_row_ids: int | list[int] | None = None,
580
+ total_col: bool | None = None,
581
+ right_align_ids: list | None = None,
582
+ right_align_pattern: str | None = None,
583
+ right_align_numeric: bool | None = True,
584
+ left_align_ids: list | None = None,
585
+ left_align_pattern: str | None = None,
586
+ left_align_string: bool | None = True,
587
+ perc_ids: list | None = None,
588
+ perc_pattern: str | None = None,
589
+ perc_col_format: str | None = None,
590
+ blue_border: bool | None = True,
591
+ blue_border_row_ids: int | list[int] | None = None,
592
+ number_format: str = "0.0",
593
+ autofit_columns: str | None = "column_names",
594
+ min_column_width: int | None = None,
595
+ col_filter: bool | None = False,
596
+ combine_multiindex: bool | int = False,
597
+ column_names_to_string: bool = True,
462
598
  ):
463
599
  """_summary_
464
600
 
@@ -481,107 +617,112 @@ def write_table(
481
617
  perc_col_format (str, optional): The formatting string of percentage columns. Defaults to None.
482
618
  col_filter (bool, optional): Set filter on columns. Defaults to False.
483
619
  """
620
+ if column_names_to_string == True:
621
+ df = cols_to_str(df)
484
622
 
485
- wb = Workbook()
486
- # Empty sheet is created on Workbook creation
487
- del wb["Sheet"]
623
+ arr = df_to_array(df)
488
624
 
489
- set_global_style(style)
625
+ blue_rows = []
626
+ light_blue_rows = []
627
+ light_blue_cols = []
628
+ blue_border_ids = []
629
+ r_align_ids = []
630
+ l_align_ids = []
631
+ p_ids = []
632
+ cells_to_merge = []
633
+ title_tbl = None
634
+ title_src = None
490
635
 
491
- if not isinstance(data, dict):
492
- data = {"Sheet1": data}
493
-
494
- for sheet_name, df in data.items():
495
- arr = df_to_array(df)
636
+ if isinstance(header_row, int):
637
+ blue_rows.extend(list(range(0, header_row + 1)))
496
638
 
497
- blue_rows = []
498
- light_blue_rows = []
499
- light_blue_cols = []
500
- blue_border_ids = []
501
- r_align_ids = []
502
- l_align_ids = []
503
- p_ids = []
504
- cells_to_merge = []
505
- title_tbl = None
506
-
507
- if isinstance(header_row, int):
508
- blue_rows.extend(list(range(0, header_row + 1)))
509
-
510
- if title:
511
- if isinstance(title, str):
512
- title_tbl = title
513
- elif isinstance(title, dict):
514
- title_tbl = title.get(sheet_name)
515
-
516
- if right_align_ids:
517
- r_align_ids.extend(right_align_ids)
639
+ if title:
640
+ if isinstance(title, str):
641
+ title_tbl = title
642
+ elif isinstance(title, dict):
643
+ title_tbl = title.get(sheet_name)
518
644
 
519
- if right_align_pattern:
520
- r_align_ids.extend(get_cols_id_with_pattern(df, right_align_pattern))
645
+ if source:
646
+ if isinstance(source, str):
647
+ title_src = source
648
+ elif isinstance(title, dict):
649
+ title_src = source.get(sheet_name)
521
650
 
522
- if right_align_numeric:
523
- r_align_ids.extend(get_numeric_col_ids(df))
651
+ if right_align_ids:
652
+ r_align_ids.extend(right_align_ids)
524
653
 
525
- if left_align_ids:
526
- r_align_ids.extend(left_align_ids)
654
+ if right_align_pattern:
655
+ r_align_ids.extend(get_cols_id_with_pattern(df, right_align_pattern))
527
656
 
528
- if left_align_pattern:
529
- l_align_ids.extend(get_cols_id_with_pattern(df, left_align_pattern))
657
+ if right_align_numeric:
658
+ r_align_ids.extend(get_numeric_col_ids(df))
530
659
 
531
- if left_align_string:
532
- l_align_ids.extend(get_string_cols_ids(df))
660
+ if left_align_ids:
661
+ r_align_ids.extend(left_align_ids)
533
662
 
534
- if perc_ids:
535
- p_ids.extend(perc_ids)
663
+ if left_align_pattern:
664
+ l_align_ids.extend(get_cols_id_with_pattern(df, left_align_pattern))
536
665
 
537
- if perc_pattern:
538
- r_id = get_cols_id_with_pattern(df, perc_pattern)
539
- p_ids.extend(r_id)
540
- r_align_ids.extend(r_id)
666
+ if left_align_string:
667
+ l_align_ids.extend(get_string_cols_ids(df))
541
668
 
542
- if total_row:
543
- light_blue_rows.append(arr.shape[0] - 1)
669
+ if perc_ids:
670
+ p_ids.extend(perc_ids)
544
671
 
545
- if light_blue_row_ids:
546
- light_blue_rows.extend(light_blue_row_ids)
672
+ if perc_pattern:
673
+ r_id = get_cols_id_with_pattern(df, perc_pattern)
674
+ p_ids.extend(r_id)
675
+ r_align_ids.extend(r_id)
547
676
 
548
- if total_col:
549
- light_blue_cols.append(arr.shape[1] - 1)
677
+ if total_row:
678
+ light_blue_rows.append(arr.shape[0] - 1)
550
679
 
551
- if blue_border:
552
- blue_border_ids.append(arr.shape[0] - 1)
680
+ if light_blue_row_ids:
681
+ light_blue_rows.extend(light_blue_row_ids)
553
682
 
554
- if blue_border_row_ids:
555
- blue_border_ids.extend(blue_border_row_ids)
683
+ if total_col:
684
+ light_blue_cols.append(arr.shape[1] - 1)
556
685
 
557
- if combine_multiindex:
558
- cells_to_merge = get_cells_to_merge(df)
686
+ if blue_border:
687
+ blue_border_ids.append(arr.shape[0] - 1)
559
688
 
560
- ws = wb.create_sheet(sheet_name)
689
+ if blue_border_row_ids:
690
+ blue_border_ids.extend(blue_border_row_ids)
561
691
 
562
- fmt = cell_formatting(
563
- arr=arr,
564
- default_format=STYLES["calibri"],
565
- blue_row_ids=blue_rows,
566
- light_blue_row_ids=light_blue_rows,
567
- light_blue_col_ids=light_blue_cols,
568
- left_align_ids=l_align_ids,
569
- right_align_ids=r_align_ids,
570
- perc_col_ids=p_ids,
571
- perc_col_format=perc_col_format,
572
- number_format=number_format,
573
- blue_border_ids=blue_border_ids,
574
- )
692
+ if combine_multiindex:
693
+ cells_to_merge = get_cells_to_merge(df)
575
694
 
576
- write_worksheet(
577
- ws=ws,
578
- arr=arr,
579
- fmt=fmt,
580
- title=title_tbl,
581
- source=source,
582
- col_filter=col_filter,
583
- autofit_columns=autofit_columns,
584
- cells_to_merge=cells_to_merge,
585
- )
695
+ if autofit_columns == "column_names":
696
+ col_widths = get_max_col_widths(df)
697
+ elif autofit_columns == "all_data":
698
+ col_widths = get_max_col_widths(arr)
699
+ else:
700
+ col_widths = None
701
+
702
+ ws = wb.create_sheet(sheet_name)
703
+
704
+ fmt = cell_formatting(
705
+ arr=arr,
706
+ default_format=STYLES["calibri"],
707
+ blue_row_ids=blue_rows,
708
+ light_blue_row_ids=light_blue_rows,
709
+ light_blue_col_ids=light_blue_cols,
710
+ left_align_ids=l_align_ids,
711
+ right_align_ids=r_align_ids,
712
+ perc_col_ids=p_ids,
713
+ perc_col_format=perc_col_format,
714
+ number_format=number_format,
715
+ blue_border_ids=blue_border_ids,
716
+ )
586
717
 
587
- wb.save(file)
718
+ write_to_worksheet(
719
+ ws=ws,
720
+ arr=arr,
721
+ fmt=fmt,
722
+ title=title_tbl,
723
+ source=title_src,
724
+ col_filter=col_filter,
725
+ col_widths=col_widths,
726
+ cells_to_merge=cells_to_merge,
727
+ min_column_width=min_column_width,
728
+ )
@@ -1,13 +1,13 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: toolsos
3
- Version: 0.2.0
3
+ Version: 0.2.6
4
4
  Summary: OS tools
5
5
  Author-email: OS <d.schmitz@amsterdam.nl>
6
6
  Keywords: tools,Onderzoek & Statistiek
7
7
  Classifier: License :: OSI Approved :: MIT License
8
8
  Classifier: Programming Language :: Python
9
9
  Classifier: Programming Language :: Python :: 3
10
- Requires-Python: >=3.11
10
+ Requires-Python: >=3.10
11
11
  Description-Content-Type: text/markdown
12
12
  Provides-Extra: dev
13
13
  Requires-Dist: black; extra == "dev"
@@ -63,5 +63,12 @@ Instructions on building a package can be found [here](https://packaging.python.
63
63
 
64
64
  - make a pypi account
65
65
  - ask to be added as collaborator to toolsos
66
- - first update twine: py -m pip install --upgrade twin
66
+ - first update twine: py -m pip install --upgrade twine
67
67
  - upload to pypi: twine upload dist/* --skip-existing
68
+
69
+ ## Install to local enviroment for testing
70
+
71
+ - python -m venv local (maak een lokale venv aan)
72
+ - local\Scripts\activate (activeer de venv)
73
+ - pip install -e . (installer toolsos)
74
+ - pip install -r local_requirements.txt (installeer de benodigde dependencies)
@@ -3,8 +3,6 @@ pyproject.toml
3
3
  src/toolsos/__init__.py
4
4
  src/toolsos/cbs_tools.py
5
5
  src/toolsos/create_tables.py
6
- src/toolsos/database_connection.py
7
- src/toolsos/database_transfer.py
8
6
  src/toolsos/download.py
9
7
  src/toolsos/geo.py
10
8
  src/toolsos/helpers.py
@@ -14,6 +12,8 @@ src/toolsos.egg-info/SOURCES.txt
14
12
  src/toolsos.egg-info/dependency_links.txt
15
13
  src/toolsos.egg-info/requires.txt
16
14
  src/toolsos.egg-info/top_level.txt
15
+ src/toolsos/database/database_connection.py
16
+ src/toolsos/database/database_transfer.py
17
17
  src/toolsos/huisstijl/__init__.py
18
18
  src/toolsos/huisstijl/colors.py
19
19
  src/toolsos/huisstijl/graphs/__init__.py
@@ -23,5 +23,6 @@ src/toolsos/huisstijl/graphs/linegraph.py
23
23
  src/toolsos/huisstijl/graphs/piegraph.py
24
24
  src/toolsos/huisstijl/graphs/styler.py
25
25
  src/toolsos/huisstijl/tables/__init__.py
26
+ src/toolsos/huisstijl/tables/table_helpers.py
26
27
  src/toolsos/huisstijl/tables/table_styles.py
27
28
  src/toolsos/huisstijl/tables/tables.py
File without changes
File without changes
File without changes
File without changes