wfh-wordlist 2.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. wfh_wordlist-2.1.2/CODE_OF_CONDUCT.md +79 -0
  2. wfh_wordlist-2.1.2/CONTRIBUTING.md +90 -0
  3. wfh_wordlist-2.1.2/LICENSE +21 -0
  4. wfh_wordlist-2.1.2/MANIFEST.in +9 -0
  5. wfh_wordlist-2.1.2/PKG-INFO +268 -0
  6. wfh_wordlist-2.1.2/README.md +215 -0
  7. wfh_wordlist-2.1.2/README.pt-BR.md +247 -0
  8. wfh_wordlist-2.1.2/SECURITY.md +78 -0
  9. wfh_wordlist-2.1.2/data/behavior_patterns.json +868 -0
  10. wfh_wordlist-2.1.2/data/corp_prefix_patterns.json +452 -0
  11. wfh_wordlist-2.1.2/pyproject.toml +85 -0
  12. wfh_wordlist-2.1.2/requirements.txt +26 -0
  13. wfh_wordlist-2.1.2/setup.cfg +4 -0
  14. wfh_wordlist-2.1.2/wfh.py +2189 -0
  15. wfh_wordlist-2.1.2/wfh_modules/__init__.py +1 -0
  16. wfh_wordlist-2.1.2/wfh_modules/analyzer.py +447 -0
  17. wfh_wordlist-2.1.2/wfh_modules/charset_gen.py +531 -0
  18. wfh_wordlist-2.1.2/wfh_modules/compute_backend.py +382 -0
  19. wfh_wordlist-2.1.2/wfh_modules/corp_prefixes.py +402 -0
  20. wfh_wordlist-2.1.2/wfh_modules/corp_profiler.py +350 -0
  21. wfh_wordlist-2.1.2/wfh_modules/data/__init__.py +2 -0
  22. wfh_wordlist-2.1.2/wfh_modules/data/behavior_patterns.json +868 -0
  23. wfh_wordlist-2.1.2/wfh_modules/data/corp_prefix_patterns.json +452 -0
  24. wfh_wordlist-2.1.2/wfh_modules/dns_wordlist.py +348 -0
  25. wfh_wordlist-2.1.2/wfh_modules/domain_users.py +1333 -0
  26. wfh_wordlist-2.1.2/wfh_modules/file_extractor.py +237 -0
  27. wfh_wordlist-2.1.2/wfh_modules/hw_profiler.py +331 -0
  28. wfh_wordlist-2.1.2/wfh_modules/leet_permuter.py +278 -0
  29. wfh_wordlist-2.1.2/wfh_modules/linkedin_search.py +401 -0
  30. wfh_wordlist-2.1.2/wfh_modules/mangler.py +166 -0
  31. wfh_wordlist-2.1.2/wfh_modules/merger.py +146 -0
  32. wfh_wordlist-2.1.2/wfh_modules/ml_patterns.py +858 -0
  33. wfh_wordlist-2.1.2/wfh_modules/ocr_extractor.py +151 -0
  34. wfh_wordlist-2.1.2/wfh_modules/pattern_engine.py +287 -0
  35. wfh_wordlist-2.1.2/wfh_modules/phone_gen.py +362 -0
  36. wfh_wordlist-2.1.2/wfh_modules/profiler.py +1122 -0
  37. wfh_wordlist-2.1.2/wfh_modules/sanitizer.py +251 -0
  38. wfh_wordlist-2.1.2/wfh_modules/thread_pool.py +345 -0
  39. wfh_wordlist-2.1.2/wfh_modules/web_scraper.py +336 -0
  40. wfh_wordlist-2.1.2/wfh_modules/xor_crypto.py +167 -0
  41. wfh_wordlist-2.1.2/wfh_wordlist.egg-info/PKG-INFO +268 -0
  42. wfh_wordlist-2.1.2/wfh_wordlist.egg-info/SOURCES.txt +44 -0
  43. wfh_wordlist-2.1.2/wfh_wordlist.egg-info/dependency_links.txt +1 -0
  44. wfh_wordlist-2.1.2/wfh_wordlist.egg-info/entry_points.txt +2 -0
  45. wfh_wordlist-2.1.2/wfh_wordlist.egg-info/requires.txt +20 -0
  46. wfh_wordlist-2.1.2/wfh_wordlist.egg-info/top_level.txt +2 -0
@@ -0,0 +1,79 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, caste, color, religion, or sexual
10
+ identity and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ - Demonstrating empathy and kindness toward other people
21
+ - Being respectful of differing opinions, viewpoints, and experiences
22
+ - Giving and gracefully accepting constructive feedback
23
+ - Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience
25
+ - Focusing on what is best not just for us as individuals, but for the overall
26
+ community
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ - The use of sexualized language or imagery, and sexual attention or advances of
31
+ any kind
32
+ - Trolling, insulting or derogatory comments, and personal or political attacks
33
+ - Public or private harassment
34
+ - Publishing others' private information, such as a physical or email address,
35
+ without their explicit permission
36
+ - Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting
38
+
39
+ ## Enforcement responsibilities
40
+
41
+ Project maintainers are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Project maintainers have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official e-mail address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the project maintainers responsible for enforcement at the contact
63
+ address listed in the repository README or organization profile.
64
+
65
+ All complaints will be reviewed and investigated promptly and fairly.
66
+
67
+ All project maintainers are obligated to respect the privacy and security of the
68
+ reporter of any incident.
69
+
70
+ ## Attribution
71
+
72
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
73
+ version 2.1, available at
74
+ https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
75
+
76
+ [homepage]: https://www.contributor-covenant.org
77
+
78
+ For answers to common questions about this code of conduct, see
79
+ https://www.contributor-covenant.org/faq
@@ -0,0 +1,90 @@
1
+ # Contributing
2
+
3
+ Thank you for your interest in improving **WordListsForHacking**. This guide
4
+ explains how to contribute effectively while preserving attribution and quality.
5
+
6
+ ## Ground Rules
7
+
8
+ - Use this project **only** for authorized security research, education, and
9
+ contracted penetration testing.
10
+ - Do not submit content intended to facilitate unauthorized access.
11
+ - Follow the [Code of Conduct](CODE_OF_CONDUCT.md) in all interactions.
12
+ - Never include real PII, real credentials, or identifiable company data in
13
+ wordlists, patterns, or code.
14
+
15
+ ## How to Contribute
16
+
17
+ 1. **Open an issue first** for substantial changes (new features, large
18
+ refactors, new modules).
19
+ 2. **Fork** the repository and create a **feature branch** from `main`.
20
+ 3. **Keep commits focused** — one logical change per commit.
21
+ 4. **Test locally** before opening a pull request.
22
+ 5. **Open a pull request** with a clear description.
23
+
24
+ ## Code Style
25
+
26
+ - **Type hints** on all function signatures.
27
+ - **Docstrings** in Google style for every class and public function.
28
+ - **Logging** via `logging` module — never `print()` in library code.
29
+ - **No hardcoded sensitive data** — use `.env` and configuration files.
30
+ - **No unnecessary imports** — keep modules lean.
31
+ - Follow existing module structure in `wfh_modules/`.
32
+
33
+ ## Adding a New Module
34
+
35
+ 1. Create `wfh_modules/your_module.py` following the existing pattern.
36
+ 2. Add the CLI subcommand in `wfh.py` → `build_parser()`.
37
+ 3. Add the handler function `cmd_your_module()` in `wfh.py`.
38
+ 4. Add the interactive menu entry if appropriate.
39
+ 5. Update `requirements.txt` if new dependencies are needed.
40
+ 6. Update the README with documentation and examples.
41
+
42
+ ## Wordlist Contributions
43
+
44
+ - **No real PII** — no real names tied to real companies.
45
+ - **No real credentials** — no actual passwords from breaches.
46
+ - **Structural patterns only** — abstract shapes, not raw data.
47
+ - Deduplicate entries before submitting.
48
+ - Follow existing file naming conventions (`*.lst`).
49
+
50
+ ## Pull Request Template
51
+
52
+ When opening a PR, include:
53
+
54
+ ```
55
+ ## What changed
56
+ <Brief description>
57
+
58
+ ## Why
59
+ <Motivation / issue reference>
60
+
61
+ ## How to test
62
+ <Steps to verify the change>
63
+
64
+ ## Checklist
65
+ - [ ] Type hints on all new functions
66
+ - [ ] Docstrings on all public functions
67
+ - [ ] No hardcoded sensitive data
68
+ - [ ] Tested locally
69
+ - [ ] README updated (if applicable)
70
+ ```
71
+
72
+ ## Versioning
73
+
74
+ This project follows [Semantic Versioning](https://semver.org/):
75
+ `MAJOR.MINOR.PATCH` (e.g., `1.7.0`).
76
+
77
+ - **MAJOR**: Breaking changes to CLI interface or module API.
78
+ - **MINOR**: New features, new modules, new patterns.
79
+ - **PATCH**: Bug fixes, documentation updates, minor improvements.
80
+
81
+ ## Attribution
82
+
83
+ All contributions are attributed to **André Henrique**
84
+ ([@mrhenrike](https://github.com/mrhenrike)) as the project maintainer.
85
+ Contributors are acknowledged in release notes and the GitHub contributors list.
86
+
87
+ ## License
88
+
89
+ By contributing, you agree that your contributions will be licensed under the
90
+ [MIT License](LICENSE).
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 André Henrique (https://github.com/mrhenrike)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,9 @@
1
+ include LICENSE
2
+ include README.md
3
+ include README.pt-BR.md
4
+ include requirements.txt
5
+ include SECURITY.md
6
+ include CONTRIBUTING.md
7
+ include CODE_OF_CONDUCT.md
8
+ recursive-include data *.json
9
+ recursive-include wfh_modules *.py *.json
@@ -0,0 +1,268 @@
1
+ Metadata-Version: 2.4
2
+ Name: wfh-wordlist
3
+ Version: 2.1.2
4
+ Summary: WordList For Hacking — Unified wordlist generation toolkit for pentest and red team operations
5
+ Author-email: André Henrique <contact@safelabs.com.br>
6
+ Maintainer-email: André Henrique <contact@safelabs.com.br>
7
+ License-Expression: MIT
8
+ Project-URL: Homepage, https://github.com/mrhenrike/WordListsForHacking
9
+ Project-URL: Documentation, https://github.com/mrhenrike/WordListsForHacking/wiki
10
+ Project-URL: Repository, https://github.com/mrhenrike/WordListsForHacking
11
+ Project-URL: Issues, https://github.com/mrhenrike/WordListsForHacking/issues
12
+ Project-URL: Changelog, https://github.com/mrhenrike/WordListsForHacking/releases
13
+ Keywords: wordlist,password,pentest,red-team,security,brute-force,dictionary,hacking,cybersecurity,osint,credential,offensive-security
14
+ Classifier: Development Status :: 5 - Production/Stable
15
+ Classifier: Environment :: Console
16
+ Classifier: Intended Audience :: Information Technology
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: Intended Audience :: System Administrators
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Operating System :: Microsoft :: Windows
21
+ Classifier: Operating System :: POSIX :: Linux
22
+ Classifier: Operating System :: MacOS
23
+ Classifier: Programming Language :: Python :: 3
24
+ Classifier: Programming Language :: Python :: 3.8
25
+ Classifier: Programming Language :: Python :: 3.9
26
+ Classifier: Programming Language :: Python :: 3.10
27
+ Classifier: Programming Language :: Python :: 3.11
28
+ Classifier: Programming Language :: Python :: 3.12
29
+ Classifier: Programming Language :: Python :: 3.13
30
+ Classifier: Topic :: Security
31
+ Classifier: Topic :: Utilities
32
+ Requires-Python: >=3.8
33
+ Description-Content-Type: text/markdown
34
+ License-File: LICENSE
35
+ Requires-Dist: colorama>=0.4.6
36
+ Requires-Dist: tqdm>=4.66.0
37
+ Requires-Dist: requests>=2.31.0
38
+ Requires-Dist: beautifulsoup4>=4.12.0
39
+ Requires-Dist: lxml>=4.9.0
40
+ Requires-Dist: chardet>=5.2.0
41
+ Requires-Dist: unidecode>=1.3.6
42
+ Provides-Extra: ocr
43
+ Requires-Dist: easyocr>=1.7.0; extra == "ocr"
44
+ Requires-Dist: Pillow>=10.0.0; extra == "ocr"
45
+ Provides-Extra: docs
46
+ Requires-Dist: openpyxl>=3.1.0; extra == "docs"
47
+ Requires-Dist: pdfplumber>=0.10.0; extra == "docs"
48
+ Requires-Dist: python-docx>=1.1.0; extra == "docs"
49
+ Requires-Dist: striprtf>=0.0.26; extra == "docs"
50
+ Provides-Extra: full
51
+ Requires-Dist: wfh-wordlist[docs,ocr]; extra == "full"
52
+ Dynamic: license-file
53
+
54
+ # WordListsForHacking (WFH)
55
+
56
+ <p align="center">
57
+ <img src="https://img.shields.io/github/stars/mrhenrike/WordListsForHacking?style=flat-square" alt="GitHub Stars">
58
+ <img src="https://img.shields.io/github/license/mrhenrike/WordListsForHacking?style=flat-square" alt="License">
59
+ <img src="https://img.shields.io/badge/version-2.1.2-blue?style=flat-square" alt="Version">
60
+ <img src="https://img.shields.io/badge/python-3.8%2B-blue?style=flat-square&logo=python&logoColor=white" alt="Python 3.8+">
61
+ <img src="https://img.shields.io/pypi/v/wfh-wordlist?style=flat-square&logo=pypi&logoColor=white&color=green" alt="PyPI">
62
+ </p>
63
+
64
+ **Unified wordlist generation toolkit for pentest and red team operations.** Combines charset generation, target profiling, web scraping, OCR extraction, leet speak, DNS fuzzing, phone number generation, corporate user enumeration, ML-based ranking, and statistical analysis — all in a single CLI tool.
65
+
66
+ > **Full documentation:** [Wiki](https://github.com/mrhenrike/WordListsForHacking/wiki)
67
+
68
+ ---
69
+
70
+ > **DISCLAIMER:** This tool is intended **exclusively for authorized security testing, penetration testing, and educational purposes**. Unauthorized use against systems you do not own or have explicit written permission to test is **illegal** and unethical. The author assumes no liability for misuse.
71
+
72
+ ---
73
+
74
+ ## Quick Start
75
+
76
+ ### Install via pip (recommended)
77
+
78
+ ```bash
79
+ pip install wfh-wordlist # core
80
+ pip install wfh-wordlist[full] # all extras (OCR, document parsing)
81
+ ```
82
+
83
+ ### Or clone from source
84
+
85
+ ```bash
86
+ git clone https://github.com/mrhenrike/WordListsForHacking.git
87
+ cd WordListsForHacking
88
+
89
+ # Linux / macOS / Termux
90
+ chmod +x setup_venv.sh && ./setup_venv.sh && source .venv/bin/activate
91
+
92
+ # Windows PowerShell
93
+ .\setup_venv.ps1; .\.venv\Scripts\Activate.ps1
94
+ ```
95
+
96
+ ### Run
97
+
98
+ ```bash
99
+ wfh # interactive menu (pip install)
100
+ python wfh.py # interactive menu (from source)
101
+ python wfh.py --help # full CLI help
102
+ ```
103
+
104
+ > **OS prerequisites (OCR only):** see the [Installation wiki page](https://github.com/mrhenrike/WordListsForHacking/wiki/Installation).
105
+
106
+ ---
107
+
108
+ ## Subcommands
109
+
110
+ | # | Command | Description |
111
+ |---|---------|-------------|
112
+ | 1 | `charset` | Charset/mask generation (crunch-style + hashcat masks) |
113
+ | 2 | `pattern` | Template-based generation with variables |
114
+ | 3 | `profile` | Personal target profiling (CUPP-style) |
115
+ | 4 | `corp` | Corporate target profiling |
116
+ | 5 | `corp-users` | Corporate domain user/password generation (50+ patterns) |
117
+ | 6 | `phone` | Phone number wordlists (BR, US, UK) |
118
+ | 7 | `scrape` | Web scraping (CeWL-style) |
119
+ | 8 | `ocr` | OCR text extraction from images |
120
+ | 9 | `extract` | Extract words from PDF/XLSX/DOCX |
121
+ | 10 | `leet` | Leet speak permutations |
122
+ | 11 | `xor` | XOR encrypt/decrypt/brute-force |
123
+ | 12 | `analyze` | Statistical analysis (pipal-style) |
124
+ | 13 | `merge` | Merge & deduplicate wordlists |
125
+ | 14 | `dns` | DNS/subdomain fuzzing (alterx-style) |
126
+ | 15 | `pharma` | Healthcare/pharmacy credential patterns |
127
+ | 16 | `sanitize` | Clean & normalize wordlists |
128
+ | 17 | `reverse` | Reverse line order |
129
+ | 18 | `corp-prefixes` | Corporate prefix usernames (MSP/SOC/DevOps) |
130
+ | 19 | `train` | Train ML pattern model |
131
+ | 20 | `sysinfo` | Hardware & compute info |
132
+
133
+ > **Detailed syntax and examples for each subcommand:** [Wiki — Subcommands](https://github.com/mrhenrike/WordListsForHacking/wiki)
134
+
135
+ ### Global Flags
136
+
137
+ ```bash
138
+ python wfh.py --threads 20 --compute cuda --no-ml <subcommand>
139
+ ```
140
+
141
+ | Flag | Default | Description |
142
+ |------|---------|-------------|
143
+ | `--threads N` | `5` | Thread count (1–300) |
144
+ | `--compute MODE` | `auto` | `auto` / `cpu` / `gpu` / `cuda` / `rocm` / `mps` / `hybrid` |
145
+ | `--no-ml` | off | Disable ML ranking |
146
+ | `-v` | off | Verbose logging |
147
+
148
+ ---
149
+
150
+ ## Common Usage Examples
151
+
152
+ ### Corporate pentest — generate users + passwords
153
+
154
+ ```bash
155
+ python wfh.py corp-users --domain acme.com.br --file employees.txt --passwords --combo -o acme_combo.lst
156
+ ```
157
+
158
+ ### Personal target profiling
159
+
160
+ ```bash
161
+ python wfh.py profile --name "João Silva" --nick joao --birth 15/03/1990 --leet aggressive -o target.lst
162
+ ```
163
+
164
+ ### Charset with hashcat mask
165
+
166
+ ```bash
167
+ python wfh.py charset 8 8 --mask "?u?l?l?l?d?d?d?s" -o passwords.lst
168
+ ```
169
+
170
+ ### Template-based patterns
171
+
172
+ ```bash
173
+ python wfh.py pattern -t "{company}{year}!" --vars company=acme,globex year=2020-2026 -o patterns.lst
174
+ ```
175
+
176
+ ### DNS subdomain fuzzing
177
+
178
+ ```bash
179
+ python wfh.py dns -d acme.com.br --words dev staging api admin portal -o subdomains.lst
180
+ ```
181
+
182
+ ### Analyze an existing wordlist
183
+
184
+ ```bash
185
+ python wfh.py analyze passwords.lst --top 30 --masks --format json -o analysis.json
186
+ ```
187
+
188
+ ### Merge & sanitize
189
+
190
+ ```bash
191
+ python wfh.py merge list1.lst list2.lst --min-len 6 --sort -o merged.lst
192
+ python wfh.py sanitize merged.lst --inplace
193
+ ```
194
+
195
+ > **More examples and scenarios:** [Wiki — Quick Start](https://github.com/mrhenrike/WordListsForHacking/wiki/Quick-Start)
196
+
197
+ ---
198
+
199
+ ## Wordlists
200
+
201
+ | File | Description | Entries |
202
+ |------|-------------|---------|
203
+ | `passwords/wlist_brasil.lst` | Brazilian password corpus — cultural word banks, corporate patterns, leet speak, keyboard walks. Company names and CNPJs are public OSINT data. | ~3.88M |
204
+ | `passwords/default-creds-combo.lst` | Default credential user:password combos | ~2.4K |
205
+ | `usernames/username_br.lst` | Brazilian + global username patterns | ~1.6K |
206
+ | `labs/*.lst` | Workshop & training wordlists | — |
207
+
208
+ > **Details:** [Wiki — Brazilian Wordlist](https://github.com/mrhenrike/WordListsForHacking/wiki/Brazilian-Wordlist)
209
+
210
+ ---
211
+
212
+ ## Is My Password in This List?
213
+
214
+ ```bash
215
+ # Linux/macOS
216
+ grep -qxF 'YourPassword' passwords/wlist_brasil.lst && echo "FOUND!" || echo "Not found"
217
+
218
+ # Windows PowerShell
219
+ Select-String -Path passwords\wlist_brasil.lst -Pattern '^YourPassword$' -SimpleMatch -Quiet
220
+ ```
221
+
222
+ If found: **change it immediately**, enable MFA/2FA, use a password manager, and never reuse passwords.
223
+
224
+ > **Full guide:** [Wiki — Password Check](https://github.com/mrhenrike/WordListsForHacking/wiki/Password-Check)
225
+
226
+ ---
227
+
228
+ ## ML Model
229
+
230
+ WFH includes a lightweight ML model that ranks generated candidates by structural pattern probability. Train it with `python wfh.py train --auto`. The model stores **only structural patterns** — no PII, passwords, or company names.
231
+
232
+ > **Details:** [Wiki — ML Model](https://github.com/mrhenrike/WordListsForHacking/wiki/ML-Model)
233
+
234
+ ---
235
+
236
+ ## Credits & Inspiration
237
+
238
+ | Project | Inspiration |
239
+ |---------|-------------|
240
+ | [CUPP](https://github.com/Mebus/cupp) | Personal target profiling |
241
+ | [Crunch](https://github.com/jim3ma/crunch) | Charset-based generation |
242
+ | [CeWL](https://github.com/digininja/CeWL) | Web scraping for wordlists |
243
+ | [alterx](https://github.com/projectdiscovery/alterx) | DNS/subdomain fuzzing |
244
+ | [pipal](https://github.com/digininja/pipal) | Statistical analysis |
245
+ | [SecLists](https://github.com/danielmiessler/SecLists) | Curated security lists |
246
+ | [elpscrk](https://github.com/D4Vinci/elpscrk) | Permutation-based generation |
247
+ | [BEWGor](https://github.com/berzerk0/BEWGor) | Biographical wordlist generator |
248
+ | [pnwgen](https://github.com/toxydose/pnwgen) | Phone number generation |
249
+
250
+ ---
251
+
252
+ ## Contributing
253
+
254
+ Contributions welcome. See [CONTRIBUTING.md](CONTRIBUTING.md).
255
+
256
+ ## License
257
+
258
+ [MIT License](LICENSE) — Copyright (c) 2026 André Henrique ([@mrhenrike](https://github.com/mrhenrike))
259
+
260
+ ---
261
+
262
+ <p align="center">
263
+ Created by <a href="https://github.com/mrhenrike">André Henrique (@mrhenrike)</a> — <a href="https://github.com/Uniao-Geek">União Geek</a>
264
+ </p>
265
+
266
+ <p align="center">
267
+ <a href="README.pt-BR.md">Leia em Português</a> · <a href="https://github.com/mrhenrike/WordListsForHacking/wiki">Full Documentation (Wiki)</a>
268
+ </p>
@@ -0,0 +1,215 @@
1
+ # WordListsForHacking (WFH)
2
+
3
+ <p align="center">
4
+ <img src="https://img.shields.io/github/stars/mrhenrike/WordListsForHacking?style=flat-square" alt="GitHub Stars">
5
+ <img src="https://img.shields.io/github/license/mrhenrike/WordListsForHacking?style=flat-square" alt="License">
6
+ <img src="https://img.shields.io/badge/version-2.1.2-blue?style=flat-square" alt="Version">
7
+ <img src="https://img.shields.io/badge/python-3.8%2B-blue?style=flat-square&logo=python&logoColor=white" alt="Python 3.8+">
8
+ <img src="https://img.shields.io/pypi/v/wfh-wordlist?style=flat-square&logo=pypi&logoColor=white&color=green" alt="PyPI">
9
+ </p>
10
+
11
+ **Unified wordlist generation toolkit for pentest and red team operations.** Combines charset generation, target profiling, web scraping, OCR extraction, leet speak, DNS fuzzing, phone number generation, corporate user enumeration, ML-based ranking, and statistical analysis — all in a single CLI tool.
12
+
13
+ > **Full documentation:** [Wiki](https://github.com/mrhenrike/WordListsForHacking/wiki)
14
+
15
+ ---
16
+
17
+ > **DISCLAIMER:** This tool is intended **exclusively for authorized security testing, penetration testing, and educational purposes**. Unauthorized use against systems you do not own or have explicit written permission to test is **illegal** and unethical. The author assumes no liability for misuse.
18
+
19
+ ---
20
+
21
+ ## Quick Start
22
+
23
+ ### Install via pip (recommended)
24
+
25
+ ```bash
26
+ pip install wfh-wordlist # core
27
+ pip install wfh-wordlist[full] # all extras (OCR, document parsing)
28
+ ```
29
+
30
+ ### Or clone from source
31
+
32
+ ```bash
33
+ git clone https://github.com/mrhenrike/WordListsForHacking.git
34
+ cd WordListsForHacking
35
+
36
+ # Linux / macOS / Termux
37
+ chmod +x setup_venv.sh && ./setup_venv.sh && source .venv/bin/activate
38
+
39
+ # Windows PowerShell
40
+ .\setup_venv.ps1; .\.venv\Scripts\Activate.ps1
41
+ ```
42
+
43
+ ### Run
44
+
45
+ ```bash
46
+ wfh # interactive menu (pip install)
47
+ python wfh.py # interactive menu (from source)
48
+ python wfh.py --help # full CLI help
49
+ ```
50
+
51
+ > **OS prerequisites (OCR only):** see the [Installation wiki page](https://github.com/mrhenrike/WordListsForHacking/wiki/Installation).
52
+
53
+ ---
54
+
55
+ ## Subcommands
56
+
57
+ | # | Command | Description |
58
+ |---|---------|-------------|
59
+ | 1 | `charset` | Charset/mask generation (crunch-style + hashcat masks) |
60
+ | 2 | `pattern` | Template-based generation with variables |
61
+ | 3 | `profile` | Personal target profiling (CUPP-style) |
62
+ | 4 | `corp` | Corporate target profiling |
63
+ | 5 | `corp-users` | Corporate domain user/password generation (50+ patterns) |
64
+ | 6 | `phone` | Phone number wordlists (BR, US, UK) |
65
+ | 7 | `scrape` | Web scraping (CeWL-style) |
66
+ | 8 | `ocr` | OCR text extraction from images |
67
+ | 9 | `extract` | Extract words from PDF/XLSX/DOCX |
68
+ | 10 | `leet` | Leet speak permutations |
69
+ | 11 | `xor` | XOR encrypt/decrypt/brute-force |
70
+ | 12 | `analyze` | Statistical analysis (pipal-style) |
71
+ | 13 | `merge` | Merge & deduplicate wordlists |
72
+ | 14 | `dns` | DNS/subdomain fuzzing (alterx-style) |
73
+ | 15 | `pharma` | Healthcare/pharmacy credential patterns |
74
+ | 16 | `sanitize` | Clean & normalize wordlists |
75
+ | 17 | `reverse` | Reverse line order |
76
+ | 18 | `corp-prefixes` | Corporate prefix usernames (MSP/SOC/DevOps) |
77
+ | 19 | `train` | Train ML pattern model |
78
+ | 20 | `sysinfo` | Hardware & compute info |
79
+
80
+ > **Detailed syntax and examples for each subcommand:** [Wiki — Subcommands](https://github.com/mrhenrike/WordListsForHacking/wiki)
81
+
82
+ ### Global Flags
83
+
84
+ ```bash
85
+ python wfh.py --threads 20 --compute cuda --no-ml <subcommand>
86
+ ```
87
+
88
+ | Flag | Default | Description |
89
+ |------|---------|-------------|
90
+ | `--threads N` | `5` | Thread count (1–300) |
91
+ | `--compute MODE` | `auto` | `auto` / `cpu` / `gpu` / `cuda` / `rocm` / `mps` / `hybrid` |
92
+ | `--no-ml` | off | Disable ML ranking |
93
+ | `-v` | off | Verbose logging |
94
+
95
+ ---
96
+
97
+ ## Common Usage Examples
98
+
99
+ ### Corporate pentest — generate users + passwords
100
+
101
+ ```bash
102
+ python wfh.py corp-users --domain acme.com.br --file employees.txt --passwords --combo -o acme_combo.lst
103
+ ```
104
+
105
+ ### Personal target profiling
106
+
107
+ ```bash
108
+ python wfh.py profile --name "João Silva" --nick joao --birth 15/03/1990 --leet aggressive -o target.lst
109
+ ```
110
+
111
+ ### Charset with hashcat mask
112
+
113
+ ```bash
114
+ python wfh.py charset 8 8 --mask "?u?l?l?l?d?d?d?s" -o passwords.lst
115
+ ```
116
+
117
+ ### Template-based patterns
118
+
119
+ ```bash
120
+ python wfh.py pattern -t "{company}{year}!" --vars company=acme,globex year=2020-2026 -o patterns.lst
121
+ ```
122
+
123
+ ### DNS subdomain fuzzing
124
+
125
+ ```bash
126
+ python wfh.py dns -d acme.com.br --words dev staging api admin portal -o subdomains.lst
127
+ ```
128
+
129
+ ### Analyze an existing wordlist
130
+
131
+ ```bash
132
+ python wfh.py analyze passwords.lst --top 30 --masks --format json -o analysis.json
133
+ ```
134
+
135
+ ### Merge & sanitize
136
+
137
+ ```bash
138
+ python wfh.py merge list1.lst list2.lst --min-len 6 --sort -o merged.lst
139
+ python wfh.py sanitize merged.lst --inplace
140
+ ```
141
+
142
+ > **More examples and scenarios:** [Wiki — Quick Start](https://github.com/mrhenrike/WordListsForHacking/wiki/Quick-Start)
143
+
144
+ ---
145
+
146
+ ## Wordlists
147
+
148
+ | File | Description | Entries |
149
+ |------|-------------|---------|
150
+ | `passwords/wlist_brasil.lst` | Brazilian password corpus — cultural word banks, corporate patterns, leet speak, keyboard walks. Company names and CNPJs are public OSINT data. | ~3.88M |
151
+ | `passwords/default-creds-combo.lst` | Default credential user:password combos | ~2.4K |
152
+ | `usernames/username_br.lst` | Brazilian + global username patterns | ~1.6K |
153
+ | `labs/*.lst` | Workshop & training wordlists | — |
154
+
155
+ > **Details:** [Wiki — Brazilian Wordlist](https://github.com/mrhenrike/WordListsForHacking/wiki/Brazilian-Wordlist)
156
+
157
+ ---
158
+
159
+ ## Is My Password in This List?
160
+
161
+ ```bash
162
+ # Linux/macOS
163
+ grep -qxF 'YourPassword' passwords/wlist_brasil.lst && echo "FOUND!" || echo "Not found"
164
+
165
+ # Windows PowerShell
166
+ Select-String -Path passwords\wlist_brasil.lst -Pattern '^YourPassword$' -SimpleMatch -Quiet
167
+ ```
168
+
169
+ If found: **change it immediately**, enable MFA/2FA, use a password manager, and never reuse passwords.
170
+
171
+ > **Full guide:** [Wiki — Password Check](https://github.com/mrhenrike/WordListsForHacking/wiki/Password-Check)
172
+
173
+ ---
174
+
175
+ ## ML Model
176
+
177
+ WFH includes a lightweight ML model that ranks generated candidates by structural pattern probability. Train it with `python wfh.py train --auto`. The model stores **only structural patterns** — no PII, passwords, or company names.
178
+
179
+ > **Details:** [Wiki — ML Model](https://github.com/mrhenrike/WordListsForHacking/wiki/ML-Model)
180
+
181
+ ---
182
+
183
+ ## Credits & Inspiration
184
+
185
+ | Project | Inspiration |
186
+ |---------|-------------|
187
+ | [CUPP](https://github.com/Mebus/cupp) | Personal target profiling |
188
+ | [Crunch](https://github.com/jim3ma/crunch) | Charset-based generation |
189
+ | [CeWL](https://github.com/digininja/CeWL) | Web scraping for wordlists |
190
+ | [alterx](https://github.com/projectdiscovery/alterx) | DNS/subdomain fuzzing |
191
+ | [pipal](https://github.com/digininja/pipal) | Statistical analysis |
192
+ | [SecLists](https://github.com/danielmiessler/SecLists) | Curated security lists |
193
+ | [elpscrk](https://github.com/D4Vinci/elpscrk) | Permutation-based generation |
194
+ | [BEWGor](https://github.com/berzerk0/BEWGor) | Biographical wordlist generator |
195
+ | [pnwgen](https://github.com/toxydose/pnwgen) | Phone number generation |
196
+
197
+ ---
198
+
199
+ ## Contributing
200
+
201
+ Contributions welcome. See [CONTRIBUTING.md](CONTRIBUTING.md).
202
+
203
+ ## License
204
+
205
+ [MIT License](LICENSE) — Copyright (c) 2026 André Henrique ([@mrhenrike](https://github.com/mrhenrike))
206
+
207
+ ---
208
+
209
+ <p align="center">
210
+ Created by <a href="https://github.com/mrhenrike">André Henrique (@mrhenrike)</a> — <a href="https://github.com/Uniao-Geek">União Geek</a>
211
+ </p>
212
+
213
+ <p align="center">
214
+ <a href="README.pt-BR.md">Leia em Português</a> · <a href="https://github.com/mrhenrike/WordListsForHacking/wiki">Full Documentation (Wiki)</a>
215
+ </p>