structly-whois 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- structly_whois-1.0.0/.gitignore +208 -0
- structly_whois-1.0.0/CHANGELOG.md +47 -0
- structly_whois-1.0.0/LICENSE +21 -0
- structly_whois-1.0.0/PKG-INFO +264 -0
- structly_whois-1.0.0/README.md +203 -0
- structly_whois-1.0.0/benchmarks/README.md +81 -0
- structly_whois-1.0.0/pyproject.toml +94 -0
- structly_whois-1.0.0/src/structly_whois/__about__.py +4 -0
- structly_whois-1.0.0/src/structly_whois/__init__.py +33 -0
- structly_whois-1.0.0/src/structly_whois/cli.py +69 -0
- structly_whois-1.0.0/src/structly_whois/config.py +794 -0
- structly_whois-1.0.0/src/structly_whois/domain_inference.py +123 -0
- structly_whois-1.0.0/src/structly_whois/normalization.py +180 -0
- structly_whois-1.0.0/src/structly_whois/parser.py +259 -0
- structly_whois-1.0.0/src/structly_whois/py.typed +0 -0
- structly_whois-1.0.0/src/structly_whois/records/__init__.py +31 -0
- structly_whois-1.0.0/src/structly_whois/records/builder.py +139 -0
- structly_whois-1.0.0/src/structly_whois/records/models.py +111 -0
- structly_whois-1.0.0/src/structly_whois/records/utils.py +186 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
# .vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
.ruff.log
|
|
194
|
+
|
|
195
|
+
# PyPI configuration file
|
|
196
|
+
.pypirc
|
|
197
|
+
|
|
198
|
+
# Cursor
|
|
199
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
200
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
201
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
202
|
+
.cursorignore
|
|
203
|
+
.cursorindexingignore
|
|
204
|
+
|
|
205
|
+
# Marimo
|
|
206
|
+
marimo/_static/
|
|
207
|
+
marimo/_lsp/
|
|
208
|
+
__marimo__/
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented here. This project adheres to
|
|
4
|
+
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
5
|
+
|
|
6
|
+
## [1.0.0] - 2025-02-14
|
|
7
|
+
|
|
8
|
+
### Added
|
|
9
|
+
|
|
10
|
+
- Fallback guard for `.info` domains so callers who pass the domain explicitly always get it back even when registries return `INFO` as the payload's domain label.
|
|
11
|
+
- Targeted unit tests for CLI defaults, domain inference helpers, normalization edge cases, and `.info` parsing paths, plus docs describing the Kafka benchmark batching strategy.
|
|
12
|
+
|
|
13
|
+
### Changed
|
|
14
|
+
|
|
15
|
+
- Reorganized the test suite into `tests/unit`, `tests/integration`, and `tests/common` with helper scripts under `tests/scripts`, making it clearer which suites hit fixtures vs. fast-running modules.
|
|
16
|
+
- Updated README/docs examples to reference the new helper module path (`tests.common.helpers`) and use the improved benchmark documentation.
|
|
17
|
+
|
|
18
|
+
### Fixed
|
|
19
|
+
|
|
20
|
+
- Addressed Ruff's SIM102 warning in the domain inference registry and ensured `.info` expected fixtures behave consistently across sample-driven tests.
|
|
21
|
+
|
|
22
|
+
## [0.2.4] - 2025-12-10
|
|
23
|
+
|
|
24
|
+
### Added
|
|
25
|
+
|
|
26
|
+
- Ship `tests/report_coverage.py` so maintainers can generate the per-TLD field coverage numbers that back `coverage_report.txt`, making it easier to decide which registries deserve new fixtures next.
|
|
27
|
+
- Expand the WHOIS fixture corpus (Belgium, EU, Sweden, Ukraine, etc.) and expected outputs so regressions in tricky registries are caught by CI instead of surfacing in production.
|
|
28
|
+
|
|
29
|
+
### Changed
|
|
30
|
+
|
|
31
|
+
- Domain inference now refreshes itself from every `domain_name` pattern defined in the Structly base fields and overrides, so newly registered TLD configs immediately influence CLI/domain auto-detection without extra plumbing.
|
|
32
|
+
|
|
33
|
+
### Fixed
|
|
34
|
+
|
|
35
|
+
- Harden the `.be` override to drop stray single-token statuses (no more `["NOT", "NOT AVAILABLE"]`) and capture registrar/registrant metadata that DNS Belgium hides behind multi-line blocks.
|
|
36
|
+
|
|
37
|
+
## [0.2.0] - 2024-06-01
|
|
38
|
+
|
|
39
|
+
- Rename the package from `structly_whois_parser` to `structly_whois` (distribution: `structly-whois`) and expose `__version__` from `__about__.py`.
|
|
40
|
+
- Introduce optional `date_parser: Callable[[str], datetime]` hooks across `WhoisParser` and `build_whois_record`.
|
|
41
|
+
- Add pytest suite (fixtures + Hypothesis), CLI entry point, Ruff tooling, Makefile, and GitHub Actions pipeline (lint → test → build → publish).
|
|
42
|
+
- Provide benchmark harness + marketing-grade docs/README demonstrating throughput vs `whois-parser` and `python-whois`.
|
|
43
|
+
- Document SemVer/tagging strategy and include `py.typed` for downstream type checking.
|
|
44
|
+
|
|
45
|
+
## [0.1.0] - 2023-xx-xx
|
|
46
|
+
|
|
47
|
+
- Initial `structly_whois_parser` release (legacy name).
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Nikola Stankovic
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: structly-whois
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Fast WHOIS parsing core with a Pythonic surface area.
|
|
5
|
+
Project-URL: Homepage, https://gitlab.com/example/structly-whois
|
|
6
|
+
Project-URL: Repository, https://gitlab.com/example/structly-whois
|
|
7
|
+
Project-URL: Issues, https://gitlab.com/example/structly-whois/-/issues
|
|
8
|
+
Author-email: Nikola Stankovic <nstankovic.bg@proton.me>
|
|
9
|
+
License: MIT License
|
|
10
|
+
|
|
11
|
+
Copyright (c) 2025 Nikola Stankovic
|
|
12
|
+
|
|
13
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
14
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
15
|
+
in the Software without restriction, including without limitation the rights
|
|
16
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
17
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
18
|
+
furnished to do so, subject to the following conditions:
|
|
19
|
+
|
|
20
|
+
The above copyright notice and this permission notice shall be included in all
|
|
21
|
+
copies or substantial portions of the Software.
|
|
22
|
+
|
|
23
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
24
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
26
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
27
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
28
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
29
|
+
SOFTWARE.
|
|
30
|
+
License-File: LICENSE
|
|
31
|
+
Keywords: domain,parser,structly,whois
|
|
32
|
+
Classifier: Development Status :: 4 - Beta
|
|
33
|
+
Classifier: Intended Audience :: Developers
|
|
34
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
35
|
+
Classifier: Operating System :: OS Independent
|
|
36
|
+
Classifier: Programming Language :: Python :: 3
|
|
37
|
+
Classifier: Topic :: Internet :: Name Service (DNS)
|
|
38
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
39
|
+
Requires-Python: >=3.9
|
|
40
|
+
Requires-Dist: msgspec>=0.18
|
|
41
|
+
Requires-Dist: structly>=1.0.1
|
|
42
|
+
Provides-Extra: benchmarks
|
|
43
|
+
Requires-Dist: confluent-kafka>=2.3; extra == 'benchmarks'
|
|
44
|
+
Requires-Dist: orjson>=3.9; extra == 'benchmarks'
|
|
45
|
+
Requires-Dist: python-snappy>=0.6; extra == 'benchmarks'
|
|
46
|
+
Provides-Extra: dev
|
|
47
|
+
Requires-Dist: build>=1.2; extra == 'dev'
|
|
48
|
+
Requires-Dist: confluent-kafka>=2.3; extra == 'dev'
|
|
49
|
+
Requires-Dist: hypothesis>=6.100; extra == 'dev'
|
|
50
|
+
Requires-Dist: maturin>=1.5.1; extra == 'dev'
|
|
51
|
+
Requires-Dist: orjson>=3.9; extra == 'dev'
|
|
52
|
+
Requires-Dist: psutil>=5.9; extra == 'dev'
|
|
53
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
54
|
+
Requires-Dist: pytest>=8.2; extra == 'dev'
|
|
55
|
+
Requires-Dist: python-snappy>=0.6; extra == 'dev'
|
|
56
|
+
Requires-Dist: python-whois>=0.9; extra == 'dev'
|
|
57
|
+
Requires-Dist: ruff>=0.5; extra == 'dev'
|
|
58
|
+
Requires-Dist: tabulate>=0.9; extra == 'dev'
|
|
59
|
+
Requires-Dist: whois-parser; extra == 'dev'
|
|
60
|
+
Description-Content-Type: text/markdown
|
|
61
|
+
|
|
62
|
+
<p align="center">
|
|
63
|
+
<picture>
|
|
64
|
+
<source media="(prefers-color-scheme: dark)" srcset="docs/structly_whois.svg">
|
|
65
|
+
<img src="https://github.com/bytevader/structly-whois-parser/raw/main/docs/structly_whois.svg" alt="structly_whois" width="320">
|
|
66
|
+
</picture>
|
|
67
|
+
</p>
|
|
68
|
+
<p align="center">
|
|
69
|
+
<em>Structly-powered WHOIS parsing.</em>
|
|
70
|
+
</p>
|
|
71
|
+
<p align="center">
|
|
72
|
+
<a href="https://github.com/bytevader/structly-whois-parser/actions/workflows/ci.yml?query=branch%3Amain" target="_blank">
|
|
73
|
+
<img src="https://github.com/bytevader/structly-whois-parser/actions/workflows/ci.yml/badge.svg?branch=main" alt="Main CI">
|
|
74
|
+
</a>
|
|
75
|
+
<a href="https://coverage-badge.samuelcolvin.workers.dev/bytevader/structly-whois-parser.svg?branch=main" target="_blank">
|
|
76
|
+
<img src="https://coverage-badge.samuelcolvin.workers.dev/bytevader/structly-whois-parser.svg?branch=main" alt="Coverage">
|
|
77
|
+
</a>
|
|
78
|
+
<a href="https://pypi.org/project/structly-whois" target="_blank">
|
|
79
|
+
<img src="https://img.shields.io/pypi/v/structly-whois?color=%2334D058&label=pypi%20package" alt="PyPI">
|
|
80
|
+
</a>
|
|
81
|
+
</p>
|
|
82
|
+
|
|
83
|
+
> Fast WHOIS parser powered by [structly](https://pypi.org/project/structly/) and [msgspec](https://pypi.org/project/structly/).
|
|
84
|
+
|
|
85
|
+
**structly_whois** wraps Structly's compiled parsers with a modern Python API so you can normalize noisy WHOIS payloads, auto-detect TLD-specific overrides, and emit JSON-ready records without hauling heavy regex DSLs or dateparser into your hot path.
|
|
86
|
+
|
|
87
|
+
## Highlights
|
|
88
|
+
|
|
89
|
+
- **Structly speed** – Per-TLD configurations are compiled by Structly, keeping parsing under a millisecond/record even on commodity hardware.
|
|
90
|
+
- **Typed surface** – msgspec-based `WhoisRecord` structs, `py.typed` wheels, and a CLI entrypoint (`structly-whois`) for quick inspection.
|
|
91
|
+
- **Configurable** – Inject your own Structly configs, register TLD overrides at runtime, or extend the base field definitions without forking.
|
|
92
|
+
- **Lean dependencies** – No `dateparser` or required by default. Plug in a `date_parser` callable only when locale-aware coercion is truly needed.
|
|
93
|
+
- **Batched & streaming friendly** – `parse_many` and `parse_chunks` let you process millions of payloads from queues, tarballs, or S3 archives without buffering everything in memory.
|
|
94
|
+
|
|
95
|
+
## Installation
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
pip install structly-whois # end users
|
|
99
|
+
pip install -e '.[dev]' # contributors (installs Ruff, pytest, etc.)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Python 3.9+ is supported. Wheels ship `py.typed` markers for static analyzers.
|
|
103
|
+
|
|
104
|
+
## Quickstart
|
|
105
|
+
|
|
106
|
+
```python
|
|
107
|
+
from structly_whois import WhoisParser
|
|
108
|
+
|
|
109
|
+
parser = WhoisParser()
|
|
110
|
+
payload = """\
|
|
111
|
+
Domain Name: example.com
|
|
112
|
+
Registrar: Example Registrar LLC
|
|
113
|
+
Creation Date: 2020-01-01T12:00:00Z
|
|
114
|
+
Registry Expiry Date: 2030-01-01T12:00:00Z
|
|
115
|
+
Name Server: NS1.EXAMPLE.COM
|
|
116
|
+
Name Server: NS2.EXAMPLE.COM
|
|
117
|
+
Status: clientTransferProhibited https://icann.org/epp#clientTransferProhibited
|
|
118
|
+
Registrant Name: Example DNS
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
record = parser.parse_record(payload, domain="example.com")
|
|
122
|
+
print(record.domain)
|
|
123
|
+
print(record.statuses)
|
|
124
|
+
print(record.registered_at)
|
|
125
|
+
print(record.to_dict())
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
If you omit `domain`, structly_whois inspects the payload to infer the domain/TLD and automatically picks the right Structly configuration.
|
|
129
|
+
|
|
130
|
+
## CLI usage
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
structly-whois tests/samples/whois/google.com.txt \
|
|
134
|
+
--domain google.com \
|
|
135
|
+
--record --json \
|
|
136
|
+
--date-parser tests.common.helpers:iso_to_datetime
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
The CLI mirrors the Python API: pass `--record` to emit a structured `WhoisRecord`, `--lowercase` to normalize strings, and `--date-parser module:callable` when you want custom date coercion.
|
|
140
|
+
|
|
141
|
+
## Advanced usage
|
|
142
|
+
|
|
143
|
+
### Batched parsing
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
parser = WhoisParser()
|
|
147
|
+
payloads: list[str] = fetch_from_queue()
|
|
148
|
+
records = parser.parse_many(payloads, to_records=True, lowercase=True)
|
|
149
|
+
for record in records:
|
|
150
|
+
ingest(record) # bulk insert, emit to Kafka, etc.
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Optional date parser hook
|
|
154
|
+
|
|
155
|
+
`structly_whois` intentionally avoids bundling `dateparser`. If you need locale-specific conversions, pass a callable either when constructing the parser or per method:
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from datetime import datetime
|
|
159
|
+
|
|
160
|
+
def date_hook(value: str) -> datetime:
|
|
161
|
+
return datetime.fromisoformat(value.replace("Z", "+00:00"))
|
|
162
|
+
|
|
163
|
+
parser = WhoisParser(date_parser=date_hook)
|
|
164
|
+
record = parser.parse_record(raw_whois, domain="example.dev", date_parser=date_hook)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
For multilingual registries, the simplest plug-in is [`dateparser.parse`](https://pypi.org/project/dateparser/).
|
|
168
|
+
|
|
169
|
+
NOTE: It can cut throughput by more than half.
|
|
170
|
+
|
|
171
|
+
### Streaming from S3
|
|
172
|
+
|
|
173
|
+
```python
|
|
174
|
+
import boto3
|
|
175
|
+
import gzip
|
|
176
|
+
import tarfile
|
|
177
|
+
from structly_whois import WhoisParser
|
|
178
|
+
|
|
179
|
+
def iter_whois_payloads(bucket: str, key: str):
|
|
180
|
+
"""Stream WHOIS samples from an S3-hosted tar.gz without touching disk."""
|
|
181
|
+
s3 = boto3.client("s3")
|
|
182
|
+
obj = s3.get_object(Bucket=bucket, Key=key)
|
|
183
|
+
with gzip.GzipFile(fileobj=obj["Body"]) as gz:
|
|
184
|
+
with tarfile.open(fileobj=gz, mode="r:") as tar:
|
|
185
|
+
for member in tar:
|
|
186
|
+
if not member.isfile():
|
|
187
|
+
continue
|
|
188
|
+
raw = tar.extractfile(member).read().decode("utf-8", errors="ignore")
|
|
189
|
+
yield raw
|
|
190
|
+
|
|
191
|
+
parser = WhoisParser()
|
|
192
|
+
payloads = iter_whois_payloads("whois-dumps", "2024-12.tar.gz")
|
|
193
|
+
|
|
194
|
+
for chunk in parser.parse_chunks(payloads, chunk_size=512):
|
|
195
|
+
process(chunk) # bulk insert, publish, etc.
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### Custom Structly Config overrides
|
|
199
|
+
|
|
200
|
+
`structly_whois` is built for easy extensibility—you can extend the bundled Structly configs or replace
|
|
201
|
+
them entirely, so parser behavior stays configurable without forking.
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
from structly import FieldPattern
|
|
205
|
+
from structly_whois import StructlyConfigFactory, WhoisParser
|
|
206
|
+
|
|
207
|
+
factory = StructlyConfigFactory(
|
|
208
|
+
base_field_definitions={
|
|
209
|
+
"domain_name": {"patterns": [FieldPattern.regex(r"^dn:\s*(?P<val>[a-z0-9.-]+)$")]},
|
|
210
|
+
},
|
|
211
|
+
tld_overrides={},
|
|
212
|
+
)
|
|
213
|
+
parser = WhoisParser(preload_tlds=("dev",), config_factory=factory)
|
|
214
|
+
parser.register_tld(
|
|
215
|
+
"app",
|
|
216
|
+
{
|
|
217
|
+
"domain_name": {
|
|
218
|
+
"extend_patterns": [FieldPattern.starts_with("App Domain:")],
|
|
219
|
+
}
|
|
220
|
+
},
|
|
221
|
+
)
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
## API overview
|
|
225
|
+
|
|
226
|
+
| Component | Description |
|
|
227
|
+
| --------- | ----------- |
|
|
228
|
+
| `structly_whois.WhoisParser` | High-level parser with batching, record conversion, and optional CLI integration. |
|
|
229
|
+
| `structly_whois.StructlyConfigFactory` | Factory that builds Structly configs with base fields + TLD overrides. |
|
|
230
|
+
| `structly_whois.records.WhoisRecord` | Typed msgspec struct with `to_dict()` for JSON serialization. |
|
|
231
|
+
| `structly_whois.normalize_raw_text` | Fast trimming of noise, privacy banners, and multiline headers. |
|
|
232
|
+
| `structly_whois.cli` | Argparse-powered CLI that mirrors the Python API. |
|
|
233
|
+
|
|
234
|
+
## Benchmarks
|
|
235
|
+
|
|
236
|
+
`make bench` runs `benchmarks/run_benchmarks.py`, comparing structly_whois against `whois-parser` and `python-whois`.
|
|
237
|
+
Default settings parse all 105 fixtures ×100 iterations on a MacBook Pro (M4, Python 3.14):
|
|
238
|
+
|
|
239
|
+
| backend | records | records/s | avg latency (ms) |
|
|
240
|
+
| ------------------------- | ------- | --------- | ---------------- |
|
|
241
|
+
| structly-whois | 10,500 | 7,779 | 0.129 |
|
|
242
|
+
| structly-whois + dateutil | 10,500 | 3,236 | 0.309 |
|
|
243
|
+
| structly-whois + dateparser | 10,500 | 996 | 1.004 |
|
|
244
|
+
| python-whois | 10,500 | 196 | 5.096 |
|
|
245
|
+
| whois-parser | 10,500 | 17 | 58.229 |
|
|
246
|
+
|
|
247
|
+
“dateutil” uses `date_parser=dateutil.parser.parse`; “dateparser” uses `date_parser=dateparser.parse`. Both illustrate how heavier date coercion affects throughput.
|
|
248
|
+
|
|
249
|
+
## Development
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
make lint # Ruff (E/F/W/I/UP/B/SIM)
|
|
253
|
+
make fmt # Ruff formatter across src/tests/benchmarks
|
|
254
|
+
make test # pytest + coverage (Hypothesis fixtures)
|
|
255
|
+
make cov # coverage xml/report (≥90%)
|
|
256
|
+
make bench # compare structly_whois vs whois-parser/python-whois
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
See [CONTRIBUTING.md](CONTRIBUTING.md) for versioning, release, and pull-request guidelines.
|
|
260
|
+
CI (GitHub Actions) runs lint/test/build on every push; pushes to `dev` publish wheels to TestPyPI and tags `vX.Y.Z` publish to PyPI.
|
|
261
|
+
|
|
262
|
+
## License
|
|
263
|
+
|
|
264
|
+
MIT © Nikola Stankovic.
|