tk-normalizer 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Terakeet
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,231 @@
1
+ Metadata-Version: 2.4
2
+ Name: tk-normalizer
3
+ Version: 1.0.0
4
+ Summary: URL normalization library for consistent URL representation
5
+ Author-email: Terakeet <engineering@terakeet.com>
6
+ Maintainer-email: Terakeet <engineering@terakeet.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/terakeet/tk-normalizer
9
+ Project-URL: Repository, https://github.com/terakeet/tk-normalizer.git
10
+ Project-URL: Issues, https://github.com/terakeet/tk-normalizer/issues
11
+ Project-URL: Documentation, https://github.com/terakeet/tk-normalizer/blob/main/docs/ARCHITECTURE.md
12
+ Keywords: url,normalization,canonicalization,web,utilities
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Internet :: WWW/HTTP
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Topic :: Text Processing :: Filters
23
+ Classifier: Operating System :: OS Independent
24
+ Requires-Python: >=3.11
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
29
+ Requires-Dist: pytest-mock>=3.0.0; extra == "dev"
30
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
31
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
32
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
33
+ Requires-Dist: build>=0.10.0; extra == "dev"
34
+ Requires-Dist: twine>=4.0.0; extra == "dev"
35
+ Dynamic: license-file
36
+
37
+ # tk-normalizer
38
+
39
+ [![Python](https://img.shields.io/pypi/pyversions/tk-normalizer.svg)](https://pypi.org/project/tk-normalizer/)
40
+ [![PyPI](https://img.shields.io/pypi/v/tk-normalizer.svg)](https://pypi.org/project/tk-normalizer/)
41
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
42
+
43
+ URL normalization library for creating consistent URL representations.
44
+
45
+ ## Purpose
46
+
47
+ The URL normalization process creates a mechanism to provide equivalence between URLs with varying string, protocol, scheme, and query parameter ordering. This library helps create normalized representations of URLs for consistent storage, comparison, and analysis.
48
+
49
+ ## Installation
50
+
51
+ ```bash
52
+ pip install tk-normalizer
53
+ ```
54
+
55
+ ## Quick Start
56
+
57
+ ```python
58
+ from tk_normalizer import normalize_url
59
+
60
+ # Simple usage with the convenience function
61
+ normalized = normalize_url("http://www.Example.com/path?b=2&a=1&utm_source=test")
62
+ print(normalized) # Output: example.com/path?a=1&b=2
63
+
64
+ # Using the class directly for more control
65
+ from tk_normalizer import TkNormalizer
66
+
67
+ normalizer = TkNormalizer("http://www.Example.com/path?b=2&a=1&utm_source=test")
68
+ print(normalizer.normalized_url) # example.com/path?a=1&b=2
69
+ print(normalizer.get_normalized_url()) # Full details including hashes
70
+ ```
71
+
72
+ ## Features
73
+
74
+ ### URL Normalization
75
+
76
+ The following URLs all normalize to the same normalized form:
77
+
78
+ ```
79
+ https://example.com/
80
+ http://www.example.com/
81
+ http://www.example.com
82
+ http://www.example.com/#my_search_engine_is_great
83
+ https://www.example.com/?utm_campaign=SomeGoogleCampaign
84
+ https://www.example.com/?utm_source=because&utm_campaign=SomeGoogleCampaign
85
+ ```
86
+
87
+ All normalize to: `example.com`
88
+
89
+ ### Normalization Process
90
+
91
+ URLs are normalized through the following steps:
92
+
93
+ - ✅ Protocol and www subdomains removed
94
+ - ✅ Lowercased
95
+ - ✅ Trailing slashes removed
96
+ - ✅ Query parameters reordered alphabetically by key
97
+ - ✅ Duplicate query parameter key/value pairs removed
98
+ - ✅ Common tracking parameters removed (utm_*, gclid, fbclid, etc.)
99
+ - ✅ Non-HTTP(S) protocols rejected
100
+ - ✅ Localhost URLs rejected
101
+
102
+ ### Tracking Parameters Removed
103
+
104
+ The following tracking parameters are automatically removed during normalization:
105
+
106
+ - `utm_*` (all utm parameters)
107
+ - `gclid`, `fbclid`, `dclid` (click identifiers)
108
+ - `_ga`, `_gid`, `_fbp`, `_hjid` (analytics cookies)
109
+ - `msclkid` (Microsoft Ads)
110
+ - `aff_id`, `affid` (affiliate tracking)
111
+ - `referrer`, `adgroupid`, `srsltid`
112
+
113
+ ## Advanced Usage
114
+
115
+ ### Getting Full Normalization Details
116
+
117
+ ```python
118
+ from tk_normalizer import TkNormalizer
119
+
120
+ normalizer = TkNormalizer("http://blog.example.com/page?b=2&a=1")
121
+ result = normalizer.get_normalized_url()
122
+
123
+ print(result)
124
+ # {
125
+ # 'normalized_url': 'blog.example.com/page?a=1&b=2',
126
+ # 'parent_normal_url': 'blog.example.com',
127
+ # 'root_normal_url': 'example.com',
128
+ # 'normalized_url_hash': '...',
129
+ # 'parent_normal_url_hash': '...',
130
+ # 'root_normal_url_hash': '...'
131
+ # }
132
+ ```
133
+
134
+ ### Error Handling
135
+
136
+ ```python
137
+ from tk_normalizer import normalize_url, InvalidUrlException
138
+
139
+ try:
140
+ normalized = normalize_url("not a valid url")
141
+ except InvalidUrlException as e:
142
+ print(f"Invalid URL: {e}")
143
+ ```
144
+
145
+ ### Accessing Individual Components
146
+
147
+ ```python
148
+ from tk_normalizer import TkNormalizer
149
+
150
+ normalizer = TkNormalizer("https://blog.example.com/path?a=1")
151
+
152
+ # Access individual normalized components
153
+ print(normalizer.normalized_url) # blog.example.com/path?a=1
154
+ print(normalizer.parent_normal_url) # blog.example.com
155
+ print(normalizer.root_normal_url) # example.com
156
+ ```
157
+
158
+ ## Hashing
159
+
160
+ For efficient storage and comparison, SHA-256 hashes are computed for:
161
+ - The normalized URL
162
+ - The parent normal URL (domain without path)
163
+ - The root normal URL (root domain without subdomains)
164
+
165
+ This provides fixed-length representations suitable for database indexing.
166
+
167
+ ## Important Caveats
168
+
169
+ While this normalization process works well for most use cases, there are some limitations:
170
+
171
+ 1. **www subdomain removal**: Technically, `www.example.com` and `example.com` could serve different content, though this is rare in practice.
172
+
173
+ 2. **Case sensitivity**: URLs are lowercased, but some servers are case-sensitive for paths.
174
+
175
+ 3. **Tracking parameters**: New tracking parameters emerge over time and may not be in the removal list.
176
+
177
+ 4. **Fragment removal**: URL fragments (#anchors) are removed, which may affect single-page applications.
178
+
179
+ ## Development
180
+
181
+ ### Setting Up Development Environment
182
+
183
+ ```bash
184
+ # Clone the repository
185
+ git clone https://github.com/terakeet/tk-normalizer.git
186
+ cd tk-normalizer
187
+
188
+ # Install development dependencies
189
+ pip install -e ".[dev]"
190
+
191
+ # Run tests
192
+ pytest
193
+
194
+ # Run tests with coverage
195
+ pytest --cov=tk_normalizer
196
+
197
+ # Run linting
198
+ ruff check src tests
199
+ ```
200
+
201
+ ### Running Tests
202
+
203
+ ```bash
204
+ # Run all tests
205
+ pytest
206
+
207
+ # Run with verbose output
208
+ pytest -v
209
+
210
+ # Run specific test file
211
+ pytest tests/test_normalizer.py
212
+
213
+ # Run with coverage report
214
+ pytest --cov=tk_normalizer --cov-report=html
215
+ ```
216
+
217
+ ## Contributing
218
+
219
+ Contributions are welcome! Please feel free to submit a Pull Request.
220
+
221
+ ## License
222
+
223
+ This project is licensed under the MIT License - see the LICENSE file for details.
224
+
225
+ ## Support
226
+
227
+ For issues and questions, please use the [GitHub issue tracker](https://github.com/terakeet/tk-normalizer/issues).
228
+
229
+ ## Credits
230
+
231
+ Based on the URL normalization functionality from [tk-core](https://github.com/terakeet/tk-core), extracted and packaged for standalone use.
@@ -0,0 +1,195 @@
1
+ # tk-normalizer
2
+
3
+ [![Python](https://img.shields.io/pypi/pyversions/tk-normalizer.svg)](https://pypi.org/project/tk-normalizer/)
4
+ [![PyPI](https://img.shields.io/pypi/v/tk-normalizer.svg)](https://pypi.org/project/tk-normalizer/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
+
7
+ URL normalization library for creating consistent URL representations.
8
+
9
+ ## Purpose
10
+
11
+ The URL normalization process creates a mechanism to provide equivalence between URLs with varying string, protocol, scheme, and query parameter ordering. This library helps create normalized representations of URLs for consistent storage, comparison, and analysis.
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install tk-normalizer
17
+ ```
18
+
19
+ ## Quick Start
20
+
21
+ ```python
22
+ from tk_normalizer import normalize_url
23
+
24
+ # Simple usage with the convenience function
25
+ normalized = normalize_url("http://www.Example.com/path?b=2&a=1&utm_source=test")
26
+ print(normalized) # Output: example.com/path?a=1&b=2
27
+
28
+ # Using the class directly for more control
29
+ from tk_normalizer import TkNormalizer
30
+
31
+ normalizer = TkNormalizer("http://www.Example.com/path?b=2&a=1&utm_source=test")
32
+ print(normalizer.normalized_url) # example.com/path?a=1&b=2
33
+ print(normalizer.get_normalized_url()) # Full details including hashes
34
+ ```
35
+
36
+ ## Features
37
+
38
+ ### URL Normalization
39
+
40
+ The following URLs all normalize to the same normalized form:
41
+
42
+ ```
43
+ https://example.com/
44
+ http://www.example.com/
45
+ http://www.example.com
46
+ http://www.example.com/#my_search_engine_is_great
47
+ https://www.example.com/?utm_campaign=SomeGoogleCampaign
48
+ https://www.example.com/?utm_source=because&utm_campaign=SomeGoogleCampaign
49
+ ```
50
+
51
+ All normalize to: `example.com`
52
+
53
+ ### Normalization Process
54
+
55
+ URLs are normalized through the following steps:
56
+
57
+ - ✅ Protocol and www subdomains removed
58
+ - ✅ Lowercased
59
+ - ✅ Trailing slashes removed
60
+ - ✅ Query parameters reordered alphabetically by key
61
+ - ✅ Duplicate query parameter key/value pairs removed
62
+ - ✅ Common tracking parameters removed (utm_*, gclid, fbclid, etc.)
63
+ - ✅ Non-HTTP(S) protocols rejected
64
+ - ✅ Localhost URLs rejected
65
+
66
+ ### Tracking Parameters Removed
67
+
68
+ The following tracking parameters are automatically removed during normalization:
69
+
70
+ - `utm_*` (all utm parameters)
71
+ - `gclid`, `fbclid`, `dclid` (click identifiers)
72
+ - `_ga`, `_gid`, `_fbp`, `_hjid` (analytics cookies)
73
+ - `msclkid` (Microsoft Ads)
74
+ - `aff_id`, `affid` (affiliate tracking)
75
+ - `referrer`, `adgroupid`, `srsltid`
76
+
77
+ ## Advanced Usage
78
+
79
+ ### Getting Full Normalization Details
80
+
81
+ ```python
82
+ from tk_normalizer import TkNormalizer
83
+
84
+ normalizer = TkNormalizer("http://blog.example.com/page?b=2&a=1")
85
+ result = normalizer.get_normalized_url()
86
+
87
+ print(result)
88
+ # {
89
+ # 'normalized_url': 'blog.example.com/page?a=1&b=2',
90
+ # 'parent_normal_url': 'blog.example.com',
91
+ # 'root_normal_url': 'example.com',
92
+ # 'normalized_url_hash': '...',
93
+ # 'parent_normal_url_hash': '...',
94
+ # 'root_normal_url_hash': '...'
95
+ # }
96
+ ```
97
+
98
+ ### Error Handling
99
+
100
+ ```python
101
+ from tk_normalizer import normalize_url, InvalidUrlException
102
+
103
+ try:
104
+ normalized = normalize_url("not a valid url")
105
+ except InvalidUrlException as e:
106
+ print(f"Invalid URL: {e}")
107
+ ```
108
+
109
+ ### Accessing Individual Components
110
+
111
+ ```python
112
+ from tk_normalizer import TkNormalizer
113
+
114
+ normalizer = TkNormalizer("https://blog.example.com/path?a=1")
115
+
116
+ # Access individual normalized components
117
+ print(normalizer.normalized_url) # blog.example.com/path?a=1
118
+ print(normalizer.parent_normal_url) # blog.example.com
119
+ print(normalizer.root_normal_url) # example.com
120
+ ```
121
+
122
+ ## Hashing
123
+
124
+ For efficient storage and comparison, SHA-256 hashes are computed for:
125
+ - The normalized URL
126
+ - The parent normal URL (domain without path)
127
+ - The root normal URL (root domain without subdomains)
128
+
129
+ This provides fixed-length representations suitable for database indexing.
130
+
131
+ ## Important Caveats
132
+
133
+ While this normalization process works well for most use cases, there are some limitations:
134
+
135
+ 1. **www subdomain removal**: Technically, `www.example.com` and `example.com` could serve different content, though this is rare in practice.
136
+
137
+ 2. **Case sensitivity**: URLs are lowercased, but some servers are case-sensitive for paths.
138
+
139
+ 3. **Tracking parameters**: New tracking parameters emerge over time and may not be in the removal list.
140
+
141
+ 4. **Fragment removal**: URL fragments (#anchors) are removed, which may affect single-page applications.
142
+
143
+ ## Development
144
+
145
+ ### Setting Up Development Environment
146
+
147
+ ```bash
148
+ # Clone the repository
149
+ git clone https://github.com/terakeet/tk-normalizer.git
150
+ cd tk-normalizer
151
+
152
+ # Install development dependencies
153
+ pip install -e ".[dev]"
154
+
155
+ # Run tests
156
+ pytest
157
+
158
+ # Run tests with coverage
159
+ pytest --cov=tk_normalizer
160
+
161
+ # Run linting
162
+ ruff check src tests
163
+ ```
164
+
165
+ ### Running Tests
166
+
167
+ ```bash
168
+ # Run all tests
169
+ pytest
170
+
171
+ # Run with verbose output
172
+ pytest -v
173
+
174
+ # Run specific test file
175
+ pytest tests/test_normalizer.py
176
+
177
+ # Run with coverage report
178
+ pytest --cov=tk_normalizer --cov-report=html
179
+ ```
180
+
181
+ ## Contributing
182
+
183
+ Contributions are welcome! Please feel free to submit a Pull Request.
184
+
185
+ ## License
186
+
187
+ This project is licensed under the MIT License - see the LICENSE file for details.
188
+
189
+ ## Support
190
+
191
+ For issues and questions, please use the [GitHub issue tracker](https://github.com/terakeet/tk-normalizer/issues).
192
+
193
+ ## Credits
194
+
195
+ Based on the URL normalization functionality from [tk-core](https://github.com/terakeet/tk-core), extracted and packaged for standalone use.
@@ -0,0 +1,162 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "tk-normalizer"
7
+ version = "1.0.0"
8
+ description = "URL normalization library for consistent URL representation"
9
+ readme = "README.md"
10
+ requires-python = ">=3.11"
11
+ license = {text = "MIT"}
12
+ authors = [
13
+ {name = "Terakeet", email = "engineering@terakeet.com"}
14
+ ]
15
+ maintainers = [
16
+ {name = "Terakeet", email = "engineering@terakeet.com"}
17
+ ]
18
+ keywords = ["url", "normalization", "canonicalization", "web", "utilities"]
19
+ classifiers = [
20
+ "Development Status :: 4 - Beta",
21
+ "Intended Audience :: Developers",
22
+ "License :: OSI Approved :: MIT License",
23
+ "Programming Language :: Python :: 3",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ "Programming Language :: Python :: 3.13",
27
+ "Topic :: Internet :: WWW/HTTP",
28
+ "Topic :: Software Development :: Libraries :: Python Modules",
29
+ "Topic :: Text Processing :: Filters",
30
+ "Operating System :: OS Independent",
31
+ ]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/terakeet/tk-normalizer"
35
+ Repository = "https://github.com/terakeet/tk-normalizer.git"
36
+ Issues = "https://github.com/terakeet/tk-normalizer/issues"
37
+ Documentation = "https://github.com/terakeet/tk-normalizer/blob/main/docs/ARCHITECTURE.md"
38
+
39
+ [project.optional-dependencies]
40
+ dev = [
41
+ "pytest>=7.0.0",
42
+ "pytest-mock>=3.0.0",
43
+ "pytest-cov>=4.0.0",
44
+ "ruff>=0.1.0",
45
+ "pre-commit>=3.0.0",
46
+ "build>=0.10.0",
47
+ "twine>=4.0.0",
48
+ ]
49
+
50
+ [tool.setuptools.packages.find]
51
+ where = ["src"]
52
+
53
+ [tool.setuptools.package-data]
54
+ "*" = ["py.typed"]
55
+
56
+ [tool.pytest.ini_options]
57
+ pythonpath = ["src"]
58
+ testpaths = ["tests"]
59
+ python_files = ["test_*.py", "*_test.py"]
60
+ python_classes = ["Test*"]
61
+ python_functions = ["test_*"]
62
+ addopts = [
63
+ "-ra",
64
+ "--strict-markers",
65
+ "--strict-config",
66
+ "--cov=tk_normalizer",
67
+ "--cov-report=term-missing",
68
+ "--cov-branch",
69
+ "-vv",
70
+ ]
71
+ log_cli = true
72
+ log_cli_level = "INFO"
73
+ log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)"
74
+ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
75
+ filterwarnings = [
76
+ "ignore::DeprecationWarning",
77
+ "ignore::PendingDeprecationWarning",
78
+ ]
79
+ markers = [
80
+ "unit: Unit tests",
81
+ "integration: Integration tests",
82
+ "slow: Slow tests",
83
+ ]
84
+
85
+ [tool.ruff]
86
+ line-length = 120
87
+ target-version = "py311"
88
+ exclude = [
89
+ ".git",
90
+ ".venv",
91
+ "venv",
92
+ "__pycache__",
93
+ ".ruff_cache",
94
+ "build",
95
+ "dist",
96
+ "*.egg-info",
97
+ "htmlcov",
98
+ ]
99
+
100
+ [tool.ruff.lint]
101
+ select = [
102
+ "E", # pycodestyle errors
103
+ "W", # pycodestyle warnings
104
+ "F", # pyflakes
105
+ "I", # isort
106
+ "B", # flake8-bugbear
107
+ "C4", # flake8-comprehensions
108
+ "UP", # pyupgrade
109
+ "ARG", # flake8-unused-arguments
110
+ "SIM", # flake8-simplify
111
+ "S", # flake8-bandit
112
+ "ANN", # annotations
113
+ ]
114
+ ignore = [
115
+ "E501", # line too long (handled by formatter)
116
+ "B008", # do not perform function calls in argument defaults
117
+ "S101", # use of assert in tests is fine
118
+ "ANN002", # Args type hint skip
119
+ "ANN003", # Kwargs type hint skip
120
+ "ANN204", # Missing return type annotation for special method `__init__`
121
+ "ANN401", # Allow Any type declaration
122
+ "SIM108", # ternary operator required
123
+ ]
124
+
125
+ [tool.ruff.lint.per-file-ignores]
126
+ "tests/*" = ["S101", "ARG001", "ARG002", "S105", "S106", "ANN201"]
127
+ "test_*.py" = ["S101", "ARG001", "ARG002", "S105", "S106", "ANN201"]
128
+
129
+ [tool.ruff.format]
130
+ quote-style = "double"
131
+ indent-style = "space"
132
+ skip-magic-trailing-comma = false
133
+ line-ending = "lf"
134
+
135
+ [tool.coverage.run]
136
+ branch = true
137
+ source = ["src/tk_normalizer"]
138
+ omit = [
139
+ "tests/*",
140
+ "test_*.py",
141
+ ".venv/*",
142
+ "venv/*",
143
+ "*/site-packages/*",
144
+ "*/__init__.py",
145
+ ]
146
+
147
+ [tool.coverage.report]
148
+ exclude_lines = [
149
+ "pragma: no cover",
150
+ "def __repr__",
151
+ "if self.debug:",
152
+ "if __name__ == .__main__.:",
153
+ "raise AssertionError",
154
+ "raise NotImplementedError",
155
+ "pass",
156
+ "except ImportError:",
157
+ "if TYPE_CHECKING:",
158
+ ]
159
+ show_missing = true
160
+ skip_covered = false
161
+ precision = 2
162
+ fail_under = 0
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,37 @@
1
+ """
2
+ tk-normalizer: URL normalization library for consistent URL representation.
3
+
4
+ This library provides URL normalization functionality to create normalized
5
+ representations of URLs, handling variations in protocols, subdomains,
6
+ query parameters, and more.
7
+ """
8
+
9
+ from .normalizer import InvalidUrlException, TkNormalizer
10
+
11
+ __version__ = "0.1.0"
12
+ __all__ = ["TkNormalizer", "InvalidUrlException", "normalize_url"]
13
+
14
+
15
+ def normalize_url(url: str) -> str:
16
+ """
17
+ Normalize a URL to its normalized form.
18
+
19
+ This is a convenience function that creates a TkNormalizer instance
20
+ and returns the normalized URL string.
21
+
22
+ Args:
23
+ url: The URL string to normalize.
24
+
25
+ Returns:
26
+ The normalized URL string.
27
+
28
+ Raises:
29
+ InvalidUrlException: If the URL is invalid or cannot be normalized.
30
+
31
+ Example:
32
+ >>> from tk_normalizer import normalize_url
33
+ >>> normalize_url("http://www.Example.com/path?b=2&a=1&utm_source=test")
34
+ 'example.com/path?a=1&b=2'
35
+ """
36
+ normalizer = TkNormalizer(url)
37
+ return normalizer.normalized_url