wuti-py-util 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 WUTI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,34 @@
1
+ Metadata-Version: 2.4
2
+ Name: wuti-py-util
3
+ Version: 0.6.1
4
+ Summary: wuti python utility libraries
5
+ Project-URL: Homepage, https://github.com/wuti-labs
6
+ Author-email: Alex Qin <qinqinghai@hotmail.com>
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Keywords: csv,excel,validation
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Programming Language :: Python :: 3.14
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Requires-Dist: jsonschema>=4.25
23
+ Requires-Dist: pyyaml>=6.0
24
+ Provides-Extra: all
25
+ Requires-Dist: openpyxl>=3.1.5; extra == 'all'
26
+ Requires-Dist: pandas>=2.3; extra == 'all'
27
+ Requires-Dist: tabulate>=0.10.0; extra == 'all'
28
+ Provides-Extra: excel
29
+ Requires-Dist: openpyxl>=3.1.5; extra == 'excel'
30
+ Requires-Dist: pandas>=2.3; extra == 'excel'
31
+ Requires-Dist: tabulate>=0.10.0; extra == 'excel'
32
+ Provides-Extra: pandas
33
+ Requires-Dist: pandas>=2.3; extra == 'pandas'
34
+ Requires-Dist: tabulate>=0.10.0; extra == 'pandas'
@@ -0,0 +1,102 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "wuti-py-util"
7
+ dynamic = ["version"]
8
+ description = "wuti python utility libraries"
9
+ license = "MIT"
10
+ requires-python = ">=3.10"
11
+ authors = [{ name = "Alex Qin", email = "qinqinghai@hotmail.com" }]
12
+ keywords = ["csv", "excel", "validation"]
13
+ classifiers = [
14
+ "Development Status :: 4 - Beta",
15
+ "Intended Audience :: Developers",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Programming Language :: Python :: 3.14",
24
+ "Topic :: Software Development :: Libraries :: Python Modules",
25
+ ]
26
+ dependencies = ["jsonschema>=4.25", "PyYAML>=6.0"]
27
+
28
+ [project.optional-dependencies]
29
+ all = ["pandas>=2.3", "tabulate>=0.10.0", "openpyxl>=3.1.5"]
30
+ excel = ["pandas>=2.3", "tabulate>=0.10.0", "openpyxl>=3.1.5"]
31
+ pandas = ["pandas>=2.3", "tabulate>=0.10.0"]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/wuti-labs"
35
+
36
+ [dependency-groups]
37
+ dev = [
38
+ "build",
39
+ "coverage[toml]>=7.13",
40
+ "hatchling",
41
+ "mypy>=1.18",
42
+ "openpyxl>=3.1.5",
43
+ "pandas>=2.3",
44
+ "pytest>=9.0.2",
45
+ "pytest-cov>=7.0",
46
+ "ruff>=0.15",
47
+ "tabulate>=0.10.0",
48
+ ]
49
+
50
+ [tool.hatch.version]
51
+ path = "src/wuti/util/__init__.py"
52
+
53
+ [tool.hatch.build.targets.wheel]
54
+ packages = ["src/wuti"]
55
+
56
+ [tool.hatch.build.targets.sdist]
57
+ ignore-vcs = true
58
+ only-include = ["src/wuti"]
59
+ exclude = [
60
+ "/.github",
61
+ "/data",
62
+ "/tests",
63
+ "/README.md",
64
+ ]
65
+
66
+ [tool.hatch.build.targets.sdist.hooks.custom]
67
+ path = "scripts/hatch_sdist_hook.py"
68
+
69
+ [tool.ruff]
70
+ line-length = 140
71
+ target-version = "py310"
72
+ src = ["src", "tests"]
73
+
74
+ [tool.ruff.lint]
75
+ select = ["E4", "E7", "E9", "F", "B"]
76
+
77
+ [tool.ruff.lint.per-file-ignores]
78
+ "tests/**" = ["B017", "F841"]
79
+
80
+ [tool.mypy]
81
+ python_version = "3.10"
82
+ files = ["src"]
83
+ ignore_missing_imports = true
84
+ warn_unused_configs = true
85
+ check_untyped_defs = true
86
+ allow_redefinition = true
87
+
88
+ [tool.pytest.ini_options]
89
+ pythonpath = ["src"]
90
+ testpaths = ["tests"]
91
+ addopts = "-v --cov=src --cov-branch --cov-report=html --cov-report=term --cov-report=term-missing"
92
+ filterwarnings = ["ignore:unclosed database:ResourceWarning"]
93
+
94
+ [tool.coverage.run]
95
+ source = ["src"]
96
+ omit = ["*/tests/*"]
97
+ branch = true
98
+ relative_files = true
99
+
100
+ [tool.coverage.report]
101
+ exclude_lines = ["pragma: no cover", "def __repr__"]
102
+ fail_under = 100
@@ -0,0 +1,46 @@
1
+ """Utility package for common tasks.
2
+
3
+ Submodules are imported lazily (PEP 562) so that `import wuti.util` stays
4
+ cheap and heavy optional dependencies (pandas, openpyxl, ...) are only
5
+ loaded when the submodules that need them are actually accessed.
6
+ """
7
+
8
+ import importlib
9
+
10
+ __version__ = "0.6.1"
11
+ __author__ = "Alex Qin"
12
+
13
+ _SUBMODULES = frozenset(
14
+ {
15
+ "check",
16
+ "conversion_stats",
17
+ "csv",
18
+ "dataclass_utils",
19
+ "database",
20
+ "datetime",
21
+ "environment",
22
+ "excel",
23
+ "exception",
24
+ "filehash",
25
+ "filelist",
26
+ "filesystem",
27
+ "log",
28
+ "serialization",
29
+ "string",
30
+ "util",
31
+ "validation",
32
+ "web",
33
+ }
34
+ )
35
+
36
+
37
+ def __getattr__(name):
38
+ if name in _SUBMODULES:
39
+ module = importlib.import_module(f".{name}", __name__)
40
+ globals()[name] = module # cache for subsequent accesses
41
+ return module
42
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
43
+
44
+
45
+ def __dir__():
46
+ return sorted(set(globals()) | _SUBMODULES)
@@ -0,0 +1,82 @@
1
+ """Shared helpers for tabular readers (CsvReader / ExcelReader).
2
+
3
+ Internal module: column-name normalization/comparison and type-conversion
4
+ logic shared by csv.py and excel.py.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any, Optional, Sequence, Tuple
10
+
11
+ import pandas as pd
12
+
13
+ from .conversion_stats import DataConversionStats
14
+
15
+ # Accepted pandas downcast names that may be passed directly as a "type"
16
+ _DOWNCAST_NAMES = ("integer", "signed", "unsigned", "float")
17
+
18
+
19
+ def normalize_column_name(s: str) -> str:
20
+ """Normalize column name: remove BOM, lowercase, strip, collapse whitespace."""
21
+ return " ".join(s.replace("\ufeff", "").lower().strip().split())
22
+
23
+
24
+ def columns_equal(col1: str, col2: str) -> bool:
25
+ """Check if two column names match (case-insensitive, whitespace-normalized)."""
26
+ return col1 == col2 or normalize_column_name(col1) == normalize_column_name(col2)
27
+
28
+
29
+ def tuples_equal(tup1: Tuple[str, ...], tup2: Tuple[str, ...]) -> bool:
30
+ """Check if two column tuples match (for MultiIndex)."""
31
+ return len(tup1) == len(tup2) and all(columns_equal(c1, c2) for c1, c2 in zip(tup1, tup2, strict=False))
32
+
33
+
34
+ def apply_type_conversions(
35
+ df: pd.DataFrame,
36
+ columns: Sequence[Any],
37
+ types: Sequence[Any],
38
+ errors: str,
39
+ conversion_stats: Optional[DataConversionStats] = None,
40
+ show_failed_values: bool = False,
41
+ ) -> None:
42
+ """Apply type conversions in-place on the given DataFrame columns.
43
+
44
+ Args:
45
+ df: DataFrame to modify in-place.
46
+ columns: Column names to convert (parallel to types).
47
+ types: Target types: str, int, float, None (skip), or a pandas
48
+ downcast name ('integer', 'signed', 'unsigned', 'float').
49
+ errors: How to handle conversion errors ('raise' or 'coerce').
50
+ conversion_stats: Optional DataConversionStats used to track conversions.
51
+ show_failed_values: Forwarded to convert_to_numeric when stats are used.
52
+
53
+ Raises:
54
+ TypeError: If an unsupported target type is given.
55
+ """
56
+ # Length mismatch is validated by callers before reaching here
57
+ for col, target_type in zip(columns, types, strict=False):
58
+ if target_type is None:
59
+ continue
60
+
61
+ if target_type is str:
62
+ if conversion_stats:
63
+ df[col], _ = conversion_stats.convert_to_string(df[col], strip=True)
64
+ else:
65
+ df[col] = df[col].astype(str).str.strip()
66
+ continue
67
+
68
+ if target_type is int:
69
+ downcast = "integer"
70
+ elif target_type is float:
71
+ downcast = "float"
72
+ elif target_type in _DOWNCAST_NAMES:
73
+ downcast = target_type
74
+ else:
75
+ raise TypeError(f"Unsupported type: {target_type}")
76
+
77
+ if conversion_stats:
78
+ df[col], _ = conversion_stats.convert_to_numeric(
79
+ df[col], downcast=downcast, errors=errors, show_failed_values=show_failed_values
80
+ )
81
+ else:
82
+ df[col] = pd.to_numeric(df[col], errors=errors, downcast=downcast)
@@ -0,0 +1,141 @@
1
+ """Module to check the current system and machine."""
2
+
3
+ import platform
4
+ from enum import Enum
5
+ from typing import Optional
6
+
7
+
8
+ class OS(Enum):
9
+ """Enum for operating systems."""
10
+
11
+ LINUX = "Linux"
12
+ WINDOWS = "Windows"
13
+ MACOS = "Darwin"
14
+
15
+
16
+ class Machine(Enum):
17
+ """Enum for machine architectures."""
18
+
19
+ X86_64 = "x86_64"
20
+ I386 = "i386"
21
+ ARM = "arm"
22
+ ARM64 = "arm64"
23
+ AARCH64 = "aarch64"
24
+
25
+
26
+ def _normalize_machine(machine: str) -> str:
27
+ """Map platform.machine() aliases to canonical Machine values."""
28
+ if machine == "AMD64":
29
+ return Machine.X86_64.value
30
+ return machine
31
+
32
+
33
+ # check the system
34
+
35
+
36
+ def is_linux():
37
+ """Function to check if the current system is Linux."""
38
+ return platform.system() == OS.LINUX.value
39
+
40
+
41
+ def is_windows():
42
+ """Function to check if the current system is Windows."""
43
+ return platform.system() == OS.WINDOWS.value
44
+
45
+
46
+ def is_mac():
47
+ """Function to check if the current system is macOS."""
48
+ return platform.system() == OS.MACOS.value
49
+
50
+
51
+ # check the machine
52
+ def is_x86_64():
53
+ """Function to check if the current machine is x86_64."""
54
+ return _normalize_machine(platform.machine()) == Machine.X86_64.value
55
+
56
+
57
+ def is_arm64():
58
+ """Function to check if the current machine is arm64."""
59
+ return platform.machine() in {Machine.ARM.value, Machine.ARM64.value, Machine.AARCH64.value}
60
+
61
+
62
+ # check the macOS version
63
+ def get_macos_version() -> Optional[int]:
64
+ """Function to get the macOS version number."""
65
+ if is_mac():
66
+ return int(platform.mac_ver()[0].split(".")[0])
67
+ return None
68
+
69
+
70
+ def get_macos_release() -> Optional[int]:
71
+ """Function to get the macOS version number."""
72
+ if is_mac():
73
+ return int(platform.mac_ver()[0].split(".")[1])
74
+ return None
75
+
76
+
77
+ def is_mac_os_x_catalina():
78
+ """Function to check if the current macOS version is Catalina."""
79
+ if is_mac():
80
+ return get_macos_version() == 10 and get_macos_release() == 15
81
+ else:
82
+ return False
83
+
84
+
85
+ def is_mac_os_big_sur():
86
+ """Function to check if the current macOS version is Big Sur."""
87
+ return get_macos_version() == 11
88
+
89
+
90
+ def is_mac_os_monterey():
91
+ """Function to check if the current macOS version is Monterey."""
92
+ return get_macos_version() == 12
93
+
94
+
95
+ def is_mac_os_ventura():
96
+ """Function to check if the current macOS version is Ventura."""
97
+ return get_macos_version() == 13
98
+
99
+
100
+ def is_mac_os_sonoma():
101
+ """Function to check if the current macOS version is Sonoma."""
102
+ return get_macos_version() == 14
103
+
104
+
105
+ def is_mac_os_sequoia():
106
+ """Function to check if the current macOS version is Sequoia."""
107
+ return get_macos_version() == 15
108
+
109
+
110
+ def is_mac_os_tahoe():
111
+ """Function to check if the current macOS version is Tahoe."""
112
+ return get_macos_version() == 26
113
+
114
+
115
+ def is_mac_os_golden_gate():
116
+ """Function to check if the current macOS version is Golden Gate."""
117
+ return get_macos_version() == 27
118
+
119
+
120
+ def get_release_name():
121
+ """Function to get the release name of the current system."""
122
+ if is_mac():
123
+ return platform.mac_ver()[0]
124
+ elif is_linux():
125
+ return platform.release()
126
+ elif is_windows():
127
+ return platform.release()
128
+ else:
129
+ return platform.release()
130
+
131
+
132
+ def is_supported_platform() -> bool:
133
+ """Function to check if the current platform is supported."""
134
+ supported_systems = {os.value for os in OS}
135
+ supported_machines = {machine.value for machine in Machine}
136
+ return platform.system() in supported_systems and _normalize_machine(platform.machine()) in supported_machines
137
+
138
+
139
+ def get_uname():
140
+ """Function to get the platform uname information."""
141
+ return platform.uname()