wordlesmith 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. wordlesmith-0.1.0/.gitignore +42 -0
  2. wordlesmith-0.1.0/CHANGELOG.md +47 -0
  3. wordlesmith-0.1.0/LICENSE.txt +21 -0
  4. wordlesmith-0.1.0/PKG-INFO +300 -0
  5. wordlesmith-0.1.0/README.md +247 -0
  6. wordlesmith-0.1.0/benchmarks/results/official/curated_all.csv +4 -0
  7. wordlesmith-0.1.0/benchmarks/results/official/curated_all.md +5 -0
  8. wordlesmith-0.1.0/benchmarks/results/official/curated_answers.csv +6 -0
  9. wordlesmith-0.1.0/benchmarks/results/official/curated_answers.md +7 -0
  10. wordlesmith-0.1.0/benchmarks/results/official/distribution_curated.png +0 -0
  11. wordlesmith-0.1.0/benchmarks/results/official/distribution_curated_all.png +0 -0
  12. wordlesmith-0.1.0/benchmarks/results/official/distribution_valid.png +0 -0
  13. wordlesmith-0.1.0/benchmarks/results/official/meta.txt +4 -0
  14. wordlesmith-0.1.0/benchmarks/results/official/per_word_curated.csv +2316 -0
  15. wordlesmith-0.1.0/benchmarks/results/official/per_word_valid.csv +14856 -0
  16. wordlesmith-0.1.0/benchmarks/results/official/primary_valid.csv +6 -0
  17. wordlesmith-0.1.0/benchmarks/results/official/primary_valid.md +7 -0
  18. wordlesmith-0.1.0/docs/demo.gif +0 -0
  19. wordlesmith-0.1.0/docs/demo.tape +34 -0
  20. wordlesmith-0.1.0/docs/solve.gif +0 -0
  21. wordlesmith-0.1.0/docs/solve.tape +30 -0
  22. wordlesmith-0.1.0/docs/strategies.md +214 -0
  23. wordlesmith-0.1.0/examples/demo.ipynb +192 -0
  24. wordlesmith-0.1.0/pyproject.toml +102 -0
  25. wordlesmith-0.1.0/scripts/bench_one.py +39 -0
  26. wordlesmith-0.1.0/scripts/gen_openings.py +95 -0
  27. wordlesmith-0.1.0/scripts/run_official_benchmark.py +85 -0
  28. wordlesmith-0.1.0/src/wordlesmith/__init__.py +41 -0
  29. wordlesmith-0.1.0/src/wordlesmith/benchmark.py +243 -0
  30. wordlesmith-0.1.0/src/wordlesmith/cli.py +303 -0
  31. wordlesmith-0.1.0/src/wordlesmith/data/curated_answers.txt +2318 -0
  32. wordlesmith-0.1.0/src/wordlesmith/data/openings.json +10 -0
  33. wordlesmith-0.1.0/src/wordlesmith/data/valid_words.txt +14859 -0
  34. wordlesmith-0.1.0/src/wordlesmith/feedback.py +87 -0
  35. wordlesmith-0.1.0/src/wordlesmith/game.py +92 -0
  36. wordlesmith-0.1.0/src/wordlesmith/py.typed +0 -0
  37. wordlesmith-0.1.0/src/wordlesmith/strategies/__init__.py +49 -0
  38. wordlesmith-0.1.0/src/wordlesmith/strategies/base.py +128 -0
  39. wordlesmith-0.1.0/src/wordlesmith/strategies/entropy.py +24 -0
  40. wordlesmith-0.1.0/src/wordlesmith/strategies/expected_size.py +19 -0
  41. wordlesmith-0.1.0/src/wordlesmith/strategies/frequency.py +42 -0
  42. wordlesmith-0.1.0/src/wordlesmith/strategies/minimax.py +18 -0
  43. wordlesmith-0.1.0/src/wordlesmith/strategies/random_guess.py +20 -0
  44. wordlesmith-0.1.0/src/wordlesmith/words.py +51 -0
  45. wordlesmith-0.1.0/tests/test_benchmark.py +112 -0
  46. wordlesmith-0.1.0/tests/test_cli.py +93 -0
  47. wordlesmith-0.1.0/tests/test_feedback.py +90 -0
  48. wordlesmith-0.1.0/tests/test_game.py +60 -0
  49. wordlesmith-0.1.0/tests/test_strategies.py +111 -0
  50. wordlesmith-0.1.0/tests/test_words.py +45 -0
@@ -0,0 +1,42 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+ .venv/
9
+ venv/
10
+ env/
11
+
12
+ # Tooling caches
13
+ .pytest_cache/
14
+ .mypy_cache/
15
+ .ruff_cache/
16
+ .coverage
17
+ htmlcov/
18
+ coverage.xml
19
+
20
+ # Benchmark output. Only the official committed run is tracked, and within it
21
+ # only the compact artifacts (CSV / Markdown / PNG / per-word). The large
22
+ # combined JSON and the raw per-result checkpoints are regenerable, so ignore
23
+ # them: `python scripts/run_official_benchmark.py` rebuilds everything.
24
+ benchmarks/results/*
25
+ !benchmarks/results/official/
26
+ benchmarks/results/official/raw/
27
+ benchmarks/results/official/*.json
28
+
29
+ # OS / editor
30
+ .DS_Store
31
+ .idea/
32
+ .vscode/
33
+ *.swp
34
+
35
+ # Local planning / writing notes
36
+ plan.html
37
+ docs/blog-post.md
38
+ docs/paper-outline.md
39
+ docs/pypi-plan.md
40
+
41
+ # Jupyter
42
+ .ipynb_checkpoints/
@@ -0,0 +1,47 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project are documented here. The format is based on
4
+ [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project
5
+ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [0.1.0] - Unreleased
8
+
9
+ First packaged release. Rewrites the original exploratory notebook into an
10
+ installable, tested Python package with a CLI and a strategy-comparison
11
+ benchmark suite.
12
+
13
+ ### Added
14
+ - `wordlesmith` package with a zero-dependency core (pure standard library).
15
+ - Exact Wordle feedback scoring with correct duplicate-letter handling, encoded
16
+ as base-3 patterns (`feedback`, `pattern_to_string`, `pattern_from_string`).
17
+ - `GameState` / `simulate` engine that filters candidates purely by
18
+ pattern-consistency.
19
+ - Five strategies behind a common interface: `frequency` (the original
20
+ positional-frequency baseline), `entropy`, `expected-size`, `minimax`, and a
21
+ `random` control. Scoring strategies support both `answers` and `all` guess
22
+ pools.
23
+ - Packaged word lists: the full 14,855-word valid-guess list (the default
24
+ answer and guess pool) and the original 2,315-word curated answer set, plus a
25
+ precomputed opening-guess table for fast first moves.
26
+ - `--curated` flag (and `load_curated_answers`) to run against the original
27
+ 2,315-word solution set instead of the full valid list.
28
+ - `wordlesmith` CLI with `play`, `solve`, `benchmark`, and `compare`
29
+ subcommands (argparse, standard library only).
30
+ - Benchmark runner with average/median/max/fail metrics, guess-distribution
31
+ histograms, and CSV/JSON/Markdown output; optional plots via the `bench`
32
+ extra.
33
+ - Test suite (pytest) covering duplicate-letter scoring edge cases,
34
+ pattern-consistency filtering, strategy selection, and the CLI.
35
+ - GitHub Actions CI (lint, type-check, test matrix on Python 3.10-3.13, build).
36
+
37
+ ### Changed
38
+ - The engine no longer tracks greens/yellows/grays with ad-hoc bookkeeping. The
39
+ pattern-consistency filter fixes latent duplicate-letter bugs in the original
40
+ notebook, which slightly improves the frequency baseline (about 3.64 vs the
41
+ previously reported 3.67 average).
42
+ - The solver now considers every valid word a possible answer by default,
43
+ instead of only the original 2,315 solutions. The NYT has revised the answer
44
+ set over time (for example MAVEN, a real answer, is not in the original list),
45
+ so the old default could dead-end on a legitimate puzzle. The full-valid pool
46
+ never does, at the cost of a somewhat higher average; the classic numbers are
47
+ still available with `--curated`.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Aditya Mehrotra
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,300 @@
1
+ Metadata-Version: 2.4
2
+ Name: wordlesmith
3
+ Version: 0.1.0
4
+ Summary: A Wordle solver with pluggable strategies and a strategy-comparison benchmark suite.
5
+ Project-URL: Homepage, https://github.com/adityakmehrotra/wordlesmith
6
+ Project-URL: Repository, https://github.com/adityakmehrotra/wordlesmith
7
+ Project-URL: Issues, https://github.com/adityakmehrotra/wordlesmith/issues
8
+ Author-email: Aditya Mehrotra <adi1.mehrotra@gmail.com>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Aditya Mehrotra
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE.txt
31
+ Keywords: benchmark,entropy,game,puzzle,solver,wordle
32
+ Classifier: Development Status :: 4 - Beta
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3.10
37
+ Classifier: Programming Language :: Python :: 3.11
38
+ Classifier: Programming Language :: Python :: 3.12
39
+ Classifier: Programming Language :: Python :: 3.13
40
+ Classifier: Topic :: Games/Entertainment :: Puzzle Games
41
+ Classifier: Typing :: Typed
42
+ Requires-Python: >=3.10
43
+ Provides-Extra: bench
44
+ Requires-Dist: matplotlib>=3.6; extra == 'bench'
45
+ Provides-Extra: dev
46
+ Requires-Dist: build>=1.0; extra == 'dev'
47
+ Requires-Dist: mypy>=1.8; extra == 'dev'
48
+ Requires-Dist: pytest-cov>=4.1; extra == 'dev'
49
+ Requires-Dist: pytest>=7.4; extra == 'dev'
50
+ Requires-Dist: ruff>=0.5; extra == 'dev'
51
+ Requires-Dist: twine>=5.0; extra == 'dev'
52
+ Description-Content-Type: text/markdown
53
+
54
+ <a id="readme-top"></a>
55
+
56
+ <div align="center">
57
+
58
+ [![CI][ci-shield]][ci-url]
59
+ [![Python 3.10+][python-shield]][python-url]
60
+ [![MIT License][license-shield]][license-url]
61
+
62
+ # wordlesmith
63
+
64
+ A Wordle solver with pluggable strategies and a benchmark suite for comparing them.
65
+
66
+ Considers every valid word a possible answer, so it never dead-ends on a real puzzle (entropy
67
+ averages 4.52 guesses over all 14,855 valid words, and 3.60 on the classic 2,315-answer set). The
68
+ core is pure standard library.
69
+
70
+ <img src="https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/docs/demo.gif" alt="wordlesmith playing along with a Wordle" width="760">
71
+
72
+ </div>
73
+
74
+ ---
75
+
76
+ ## Contents
77
+
78
+ - [What it does](#what-it-does)
79
+ - [Install](#install)
80
+ - [Quickstart](#quickstart)
81
+ - [Benchmark](#benchmark)
82
+ - [How it works](#how-it-works)
83
+ - [Strategies](#strategies) ([in-depth](docs/strategies.md))
84
+ - [Development](#development)
85
+ - [License &amp; contact](#license--contact)
86
+
87
+ ## What it does
88
+
89
+ `wordlesmith` is a command-line and library Wordle solver. It ships:
90
+
91
+ - A Wordle scoring engine that handles duplicate letters correctly, which is where most
92
+ solvers have subtle bugs.
93
+ - Five strategies behind one interface: positional frequency, entropy, expected remaining
94
+ size, minimax, and a random control.
95
+ - A benchmark framework that plays every valid word and reports the full guess distribution.
96
+ - The full 14,855-word valid-guess list (the default answer pool, so it never dead-ends on a
97
+ real puzzle) and the original 2,315-word answer set, packaged with a precomputed
98
+ opening-guess table so the first move is instant.
99
+
100
+ The core has no third-party dependencies. Plotting is the only extra.
101
+
102
+ ## Install
103
+
104
+ ```bash
105
+ # From GitHub
106
+ pip install "git+https://github.com/adityakmehrotra/wordlesmith"
107
+
108
+ # For development (tests, lint, plots)
109
+ git clone https://github.com/adityakmehrotra/wordlesmith
110
+ cd wordlesmith
111
+ pip install -e ".[dev,bench]"
112
+ ```
113
+
114
+ Requires Python 3.10+.
115
+
116
+ ## Quickstart
117
+
118
+ ### Command line
119
+
120
+ Auto-solve a known word:
121
+
122
+ <img src="https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/docs/solve.gif" alt="wordlesmith solve maven, then solve crane --curated" width="720">
123
+
124
+ (`maven` is a real NYT answer that isn't in the original 2,315-word list, so a solver built only
125
+ on that list would never find it. The default pool is every valid word, so this just works.)
126
+
127
+ Play along with a real puzzle: it suggests a guess, you type the colors back
128
+ (`g`=green, `y`=yellow, `x`=gray):
129
+
130
+ ```console
131
+ $ wordlesmith play --strategy entropy
132
+ Turn 1 suggestion: TARES (14855 candidates)
133
+ Enter feedback: xgxgx
134
+ Turn 2 suggestion: LADEN (150 candidates)
135
+ Enter feedback: ...
136
+ ```
137
+
138
+ Benchmark one strategy, or compare several:
139
+
140
+ ```console
141
+ $ wordlesmith benchmark --strategy entropy --sample 300
142
+ $ wordlesmith compare --strategies frequency,entropy,minimax --markdown
143
+ $ wordlesmith compare --curated --markdown # the classic 2,315-answer set
144
+ ```
145
+
146
+ Run `wordlesmith --help` (or `wordlesmith <command> --help`) for all options,
147
+ including `--curated`, `--guess-pool all`, `--jobs` for parallel benchmarks, and
148
+ `--answers`/`--allowed` for custom word lists.
149
+
150
+ ### Python API
151
+
152
+ ```python
153
+ from wordlesmith import get_strategy, simulate, feedback, pattern_to_string
154
+
155
+ # Score a guess against a target (base-3 pattern; g/y/x string for humans)
156
+ print(pattern_to_string(feedback("speed", "abide"))) # -> xxyxy
157
+
158
+ # Auto-play a word
159
+ result = simulate("maven", get_strategy("entropy"))
160
+ print(result.turns, result.guesses) # -> 3 ['tares', 'laden', 'maven']
161
+ ```
162
+
163
+ ## Benchmark
164
+
165
+ Lower average is better; `max` is the worst game; `fail%` is games not solved within six
166
+ guesses.
167
+
168
+ ### Primary: every valid word (the default)
169
+
170
+ Each strategy plays all 14,855 valid words, guessing from the words still consistent with the
171
+ feedback. This is how the solver actually runs, so it never dead-ends on a real puzzle:
172
+
173
+ | strategy | pool | avg | max | fail% |
174
+ | --- | --- | --- | --- | --- |
175
+ | random | answers | 5.061 | >6 | 16.68 |
176
+ | frequency | answers | 4.922 | >6 | 14.57 |
177
+ | minimax | answers | 4.658 | >6 | 11.29 |
178
+ | expected-size | answers | 4.585 | >6 | 10.57 |
179
+ | entropy | answers | 4.523 | >6 | 9.47 |
180
+
181
+ The averages are higher and the failure rate is non-trivial (about 9% even for entropy) because the
182
+ full valid list is packed with near-identical clusters (`match`/`batch`/`catch`/`hatch`/..., the
183
+ `-ound` and `-ight` families, plus many obscure words) that simply cannot be separated in six
184
+ guesses. Those hard words are almost never real NYT answers, so for actual daily play the curated
185
+ number below is the realistic one; this table is the pessimistic "solve literally any valid word"
186
+ figure.
187
+
188
+ ![Guess distribution by strategy](https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/benchmarks/results/official/distribution_valid.png)
189
+
190
+ ### Secondary: the classic 2,315-answer set (`--curated`)
191
+
192
+ Restricted to the original Wordle solution set, the problem is easier and the numbers are
193
+ comparable to published solvers. The `all` pool (guessing any word for information) gets close
194
+ to the known optimum of about 3.421:
195
+
196
+ | strategy | pool | avg | max | fail% |
197
+ | --- | --- | --- | --- | --- |
198
+ | random | answers | 4.039 | >6 | 0.82 |
199
+ | frequency | answers | 3.640 | >6 | 0.60 |
200
+ | expected-size | answers | 3.623 | >6 | 0.60 |
201
+ | minimax | answers | 3.677 | >6 | 0.65 |
202
+ | entropy | answers | 3.598 | >6 | 0.48 |
203
+ | entropy | all | 3.465 | 6 | 0.00 |
204
+ | expected-size | all | 3.481 | 5 | 0.00 |
205
+ | minimax | all | 3.573 | 6 | 0.00 |
206
+
207
+ A concrete example of what the smart strategies buy you: solving `mound` on the curated set, the
208
+ frequency baseline burns turns cycling through lookalikes (`slate`, `crony`, `bound`, `found`,
209
+ `hound`, `mound`) while entropy picks a splitting guess and finishes in three (`raise`, `mulch`,
210
+ `mound`).
211
+
212
+ <sub>Methodology: a game is a failure if unsolved in 6 guesses (counted as 7 in the mean).
213
+ Deterministic strategies are reproducible; `random` uses a fixed seed. Full results and per-word
214
+ data are in [`benchmarks/results/official/`](https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/benchmarks/results/official); regenerate the primary
215
+ with `python scripts/run_official_benchmark.py`. The primary `answers`-pool run takes about 10
216
+ minutes per strategy on 9 cores; the curated `all`-pool run scores every valid word each turn and
217
+ takes far longer, which is why it stays on the smaller curated set. Use `--sample N` for a quick
218
+ estimate.</sub>
219
+
220
+ ## How it works
221
+
222
+ Scoring: Wordle feedback is computed in two passes. Greens are assigned first and each
223
+ consumes its letter in the target; yellows are then assigned left to right, each consuming
224
+ a remaining occurrence. A guess letter with no occurrence left is gray. This is why the
225
+ second `E` in `SPEED` is gray against `ABIDE`, which has only one `E`.
226
+
227
+ Filtering: after each guess the solver keeps a word `w` only if `feedback(guess, w)`
228
+ equals the pattern actually observed. This single rule handles every duplicate-letter case
229
+ correctly, so there is no separate (and bug-prone) tracking of which letters are "in" or
230
+ "out".
231
+
232
+ Word lists: by default every valid Wordle word is treated as a possible answer. The original
233
+ Wordle solution set was only 2,315 words, but the NYT has revised it over time, so a solver
234
+ built on that list can dead-end on a legitimate answer it never considered (`maven`, for
235
+ instance). Using the full valid list avoids that, at the cost of a somewhat higher average
236
+ since there are more words to tell apart. Pass `--curated` to fall back to the original
237
+ 2,315-answer set (faster, and the numbers become comparable to published solvers).
238
+
239
+ ## Strategies
240
+
241
+ | name | idea | good for |
242
+ | --- | --- | --- |
243
+ | `frequency` | Sum of per-position letter frequencies among candidates (the original baseline). | A strong, cheap heuristic. |
244
+ | `entropy` | Maximize expected information (Shannon entropy of the feedback-bucket distribution). | Best average guess count. |
245
+ | `expected-size` | Minimize the expected number of remaining candidates. | Simple, nearly as strong as entropy. |
246
+ | `minimax` | Minimize the largest feedback bucket (worst case). | Smallest worst case. |
247
+ | `random` | Guess a random consistent word. | A control / lower bound. |
248
+
249
+ The entropy, expected-size, and minimax strategies accept a `--guess-pool` of `answers`
250
+ (guess from remaining candidates) or `all` (guess from the full allowed list).
251
+
252
+ See [`docs/strategies.md`](docs/strategies.md) for an in-depth explanation of each strategy:
253
+ the scoring formulas, the bucket-splitting idea the information-theoretic strategies share
254
+ (with a worked example), the guess-pool trade-off, and how to add your own strategy.
255
+
256
+ ## Limitations
257
+
258
+ - **Pure Python is slow for the `all` guess pool.** Scoring every valid word each turn takes
259
+ minutes per benchmark, which is why the committed `all`-pool numbers stay on the curated set.
260
+ For a single interactive `solve`/`play` it's fine (the opening is precomputed).
261
+ - **The word list is a snapshot.** `valid_words.txt` is the NYT valid-guess list as of mid-2025.
262
+ If the NYT adds words later, refresh it and regenerate the opening table.
263
+ - **Six-guess failures are expected.** Over the full valid list even entropy fails about 9% of
264
+ games, because clusters like `match`/`batch`/`catch`/`hatch` or the `-ound`/`-ight` families
265
+ can't be separated in six turns. Those words are rarely real answers, so `--curated` is the
266
+ realistic daily-play figure.
267
+ - **The strategies are greedy.** They optimize the current guess, not the whole game tree, so
268
+ even the best is a step behind the known optimal decision tree (about 3.421 on the curated set).
269
+ - **English five-letter Wordle only.** No hard mode and no other word lengths (the engine assumes
270
+ five letters), though `--answers`/`--allowed` accept custom five-letter word lists.
271
+
272
+ ## Development
273
+
274
+ ```bash
275
+ pip install -e ".[dev,bench]"
276
+ pytest --cov=wordlesmith # tests + coverage
277
+ ruff check . && ruff format --check .
278
+ mypy src/
279
+ python -m build && twine check dist/*
280
+ ```
281
+
282
+ Contributions welcome. A natural extension is adding a new strategy: implement `Strategy`,
283
+ register it, and it shows up in `compare` automatically. Please open an
284
+ [issue](https://github.com/adityakmehrotra/wordlesmith/issues) or PR.
285
+
286
+ ## License &amp; contact
287
+
288
+ Distributed under the MIT License. See [`LICENSE.txt`](LICENSE.txt).
289
+
290
+ Aditya Mehrotra. Reach me at `adi1.mehrotra@gmail.com` or on
291
+ [LinkedIn](https://www.linkedin.com/in/aditya-mehrotra-).
292
+
293
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
294
+
295
+ [ci-shield]: https://github.com/adityakmehrotra/wordlesmith/actions/workflows/ci.yml/badge.svg
296
+ [ci-url]: https://github.com/adityakmehrotra/wordlesmith/actions/workflows/ci.yml
297
+ [python-shield]: https://img.shields.io/badge/python-3.10%2B-blue
298
+ [python-url]: https://www.python.org/downloads/
299
+ [license-shield]: https://img.shields.io/badge/license-MIT-green
300
+ [license-url]: https://github.com/adityakmehrotra/wordlesmith/blob/main/LICENSE.txt
@@ -0,0 +1,247 @@
1
+ <a id="readme-top"></a>
2
+
3
+ <div align="center">
4
+
5
+ [![CI][ci-shield]][ci-url]
6
+ [![Python 3.10+][python-shield]][python-url]
7
+ [![MIT License][license-shield]][license-url]
8
+
9
+ # wordlesmith
10
+
11
+ A Wordle solver with pluggable strategies and a benchmark suite for comparing them.
12
+
13
+ Considers every valid word a possible answer, so it never dead-ends on a real puzzle (entropy
14
+ averages 4.52 guesses over all 14,855 valid words, and 3.60 on the classic 2,315-answer set). The
15
+ core is pure standard library.
16
+
17
+ <img src="https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/docs/demo.gif" alt="wordlesmith playing along with a Wordle" width="760">
18
+
19
+ </div>
20
+
21
+ ---
22
+
23
+ ## Contents
24
+
25
+ - [What it does](#what-it-does)
26
+ - [Install](#install)
27
+ - [Quickstart](#quickstart)
28
+ - [Benchmark](#benchmark)
29
+ - [How it works](#how-it-works)
30
+ - [Strategies](#strategies) ([in-depth](docs/strategies.md))
31
+ - [Development](#development)
32
+ - [License &amp; contact](#license--contact)
33
+
34
+ ## What it does
35
+
36
+ `wordlesmith` is a command-line and library Wordle solver. It ships:
37
+
38
+ - A Wordle scoring engine that handles duplicate letters correctly, which is where most
39
+ solvers have subtle bugs.
40
+ - Five strategies behind one interface: positional frequency, entropy, expected remaining
41
+ size, minimax, and a random control.
42
+ - A benchmark framework that plays every valid word and reports the full guess distribution.
43
+ - The full 14,855-word valid-guess list (the default answer pool, so it never dead-ends on a
44
+ real puzzle) and the original 2,315-word answer set, packaged with a precomputed
45
+ opening-guess table so the first move is instant.
46
+
47
+ The core has no third-party dependencies. Plotting is the only extra.
48
+
49
+ ## Install
50
+
51
+ ```bash
52
+ # From GitHub
53
+ pip install "git+https://github.com/adityakmehrotra/wordlesmith"
54
+
55
+ # For development (tests, lint, plots)
56
+ git clone https://github.com/adityakmehrotra/wordlesmith
57
+ cd wordlesmith
58
+ pip install -e ".[dev,bench]"
59
+ ```
60
+
61
+ Requires Python 3.10+.
62
+
63
+ ## Quickstart
64
+
65
+ ### Command line
66
+
67
+ Auto-solve a known word:
68
+
69
+ <img src="https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/docs/solve.gif" alt="wordlesmith solve maven, then solve crane --curated" width="720">
70
+
71
+ (`maven` is a real NYT answer that isn't in the original 2,315-word list, so a solver built only
72
+ on that list would never find it. The default pool is every valid word, so this just works.)
73
+
74
+ Play along with a real puzzle: it suggests a guess, you type the colors back
75
+ (`g`=green, `y`=yellow, `x`=gray):
76
+
77
+ ```console
78
+ $ wordlesmith play --strategy entropy
79
+ Turn 1 suggestion: TARES (14855 candidates)
80
+ Enter feedback: xgxgx
81
+ Turn 2 suggestion: LADEN (150 candidates)
82
+ Enter feedback: ...
83
+ ```
84
+
85
+ Benchmark one strategy, or compare several:
86
+
87
+ ```console
88
+ $ wordlesmith benchmark --strategy entropy --sample 300
89
+ $ wordlesmith compare --strategies frequency,entropy,minimax --markdown
90
+ $ wordlesmith compare --curated --markdown # the classic 2,315-answer set
91
+ ```
92
+
93
+ Run `wordlesmith --help` (or `wordlesmith <command> --help`) for all options,
94
+ including `--curated`, `--guess-pool all`, `--jobs` for parallel benchmarks, and
95
+ `--answers`/`--allowed` for custom word lists.
96
+
97
+ ### Python API
98
+
99
+ ```python
100
+ from wordlesmith import get_strategy, simulate, feedback, pattern_to_string
101
+
102
+ # Score a guess against a target (base-3 pattern; g/y/x string for humans)
103
+ print(pattern_to_string(feedback("speed", "abide"))) # -> xxyxy
104
+
105
+ # Auto-play a word
106
+ result = simulate("maven", get_strategy("entropy"))
107
+ print(result.turns, result.guesses) # -> 3 ['tares', 'laden', 'maven']
108
+ ```
109
+
110
+ ## Benchmark
111
+
112
+ Lower average is better; `max` is the worst game; `fail%` is games not solved within six
113
+ guesses.
114
+
115
+ ### Primary: every valid word (the default)
116
+
117
+ Each strategy plays all 14,855 valid words, guessing from the words still consistent with the
118
+ feedback. This is how the solver actually runs, so it never dead-ends on a real puzzle:
119
+
120
+ | strategy | pool | avg | max | fail% |
121
+ | --- | --- | --- | --- | --- |
122
+ | random | answers | 5.061 | >6 | 16.68 |
123
+ | frequency | answers | 4.922 | >6 | 14.57 |
124
+ | minimax | answers | 4.658 | >6 | 11.29 |
125
+ | expected-size | answers | 4.585 | >6 | 10.57 |
126
+ | entropy | answers | 4.523 | >6 | 9.47 |
127
+
128
+ The averages are higher and the failure rate is non-trivial (about 9% even for entropy) because the
129
+ full valid list is packed with near-identical clusters (`match`/`batch`/`catch`/`hatch`/..., the
130
+ `-ound` and `-ight` families, plus many obscure words) that simply cannot be separated in six
131
+ guesses. Those hard words are almost never real NYT answers, so for actual daily play the curated
132
+ number below is the realistic one; this table is the pessimistic "solve literally any valid word"
133
+ figure.
134
+
135
+ ![Guess distribution by strategy](https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/benchmarks/results/official/distribution_valid.png)
136
+
137
+ ### Secondary: the classic 2,315-answer set (`--curated`)
138
+
139
+ Restricted to the original Wordle solution set, the problem is easier and the numbers are
140
+ comparable to published solvers. The `all` pool (guessing any word for information) gets close
141
+ to the known optimum of about 3.421:
142
+
143
+ | strategy | pool | avg | max | fail% |
144
+ | --- | --- | --- | --- | --- |
145
+ | random | answers | 4.039 | >6 | 0.82 |
146
+ | frequency | answers | 3.640 | >6 | 0.60 |
147
+ | expected-size | answers | 3.623 | >6 | 0.60 |
148
+ | minimax | answers | 3.677 | >6 | 0.65 |
149
+ | entropy | answers | 3.598 | >6 | 0.48 |
150
+ | entropy | all | 3.465 | 6 | 0.00 |
151
+ | expected-size | all | 3.481 | 5 | 0.00 |
152
+ | minimax | all | 3.573 | 6 | 0.00 |
153
+
154
+ A concrete example of what the smart strategies buy you: solving `mound` on the curated set, the
155
+ frequency baseline burns turns cycling through lookalikes (`slate`, `crony`, `bound`, `found`,
156
+ `hound`, `mound`) while entropy picks a splitting guess and finishes in three (`raise`, `mulch`,
157
+ `mound`).
158
+
159
+ <sub>Methodology: a game is a failure if unsolved in 6 guesses (counted as 7 in the mean).
160
+ Deterministic strategies are reproducible; `random` uses a fixed seed. Full results and per-word
161
+ data are in [`benchmarks/results/official/`](https://raw.githubusercontent.com/adityakmehrotra/wordlesmith/main/benchmarks/results/official); regenerate the primary
162
+ with `python scripts/run_official_benchmark.py`. The primary `answers`-pool run takes about 10
163
+ minutes per strategy on 9 cores; the curated `all`-pool run scores every valid word each turn and
164
+ takes far longer, which is why it stays on the smaller curated set. Use `--sample N` for a quick
165
+ estimate.</sub>
166
+
167
+ ## How it works
168
+
169
+ Scoring: Wordle feedback is computed in two passes. Greens are assigned first and each
170
+ consumes its letter in the target; yellows are then assigned left to right, each consuming
171
+ a remaining occurrence. A guess letter with no occurrence left is gray. This is why the
172
+ second `E` in `SPEED` is gray against `ABIDE`, which has only one `E`.
173
+
174
+ Filtering: after each guess the solver keeps a word `w` only if `feedback(guess, w)`
175
+ equals the pattern actually observed. This single rule handles every duplicate-letter case
176
+ correctly, so there is no separate (and bug-prone) tracking of which letters are "in" or
177
+ "out".
178
+
179
+ Word lists: by default every valid Wordle word is treated as a possible answer. The original
180
+ Wordle solution set was only 2,315 words, but the NYT has revised it over time, so a solver
181
+ built on that list can dead-end on a legitimate answer it never considered (`maven`, for
182
+ instance). Using the full valid list avoids that, at the cost of a somewhat higher average
183
+ since there are more words to tell apart. Pass `--curated` to fall back to the original
184
+ 2,315-answer set (faster, and the numbers become comparable to published solvers).
185
+
186
+ ## Strategies
187
+
188
+ | name | idea | good for |
189
+ | --- | --- | --- |
190
+ | `frequency` | Sum of per-position letter frequencies among candidates (the original baseline). | A strong, cheap heuristic. |
191
+ | `entropy` | Maximize expected information (Shannon entropy of the feedback-bucket distribution). | Best average guess count. |
192
+ | `expected-size` | Minimize the expected number of remaining candidates. | Simple, nearly as strong as entropy. |
193
+ | `minimax` | Minimize the largest feedback bucket (worst case). | Smallest worst case. |
194
+ | `random` | Guess a random consistent word. | A control / lower bound. |
195
+
196
+ The entropy, expected-size, and minimax strategies accept a `--guess-pool` of `answers`
197
+ (guess from remaining candidates) or `all` (guess from the full allowed list).
198
+
199
+ See [`docs/strategies.md`](docs/strategies.md) for an in-depth explanation of each strategy:
200
+ the scoring formulas, the bucket-splitting idea the information-theoretic strategies share
201
+ (with a worked example), the guess-pool trade-off, and how to add your own strategy.
202
+
203
+ ## Limitations
204
+
205
+ - **Pure Python is slow for the `all` guess pool.** Scoring every valid word each turn takes
206
+ minutes per benchmark, which is why the committed `all`-pool numbers stay on the curated set.
207
+ For a single interactive `solve`/`play` it's fine (the opening is precomputed).
208
+ - **The word list is a snapshot.** `valid_words.txt` is the NYT valid-guess list as of mid-2025.
209
+ If the NYT adds words later, refresh it and regenerate the opening table.
210
+ - **Six-guess failures are expected.** Over the full valid list even entropy fails about 9% of
211
+ games, because clusters like `match`/`batch`/`catch`/`hatch` or the `-ound`/`-ight` families
212
+ can't be separated in six turns. Those words are rarely real answers, so `--curated` is the
213
+ realistic daily-play figure.
214
+ - **The strategies are greedy.** They optimize the current guess, not the whole game tree, so
215
+ even the best is a step behind the known optimal decision tree (about 3.421 on the curated set).
216
+ - **English five-letter Wordle only.** No hard mode and no other word lengths (the engine assumes
217
+ five letters), though `--answers`/`--allowed` accept custom five-letter word lists.
218
+
219
+ ## Development
220
+
221
+ ```bash
222
+ pip install -e ".[dev,bench]"
223
+ pytest --cov=wordlesmith # tests + coverage
224
+ ruff check . && ruff format --check .
225
+ mypy src/
226
+ python -m build && twine check dist/*
227
+ ```
228
+
229
+ Contributions welcome. A natural extension is adding a new strategy: implement `Strategy`,
230
+ register it, and it shows up in `compare` automatically. Please open an
231
+ [issue](https://github.com/adityakmehrotra/wordlesmith/issues) or PR.
232
+
233
+ ## License &amp; contact
234
+
235
+ Distributed under the MIT License. See [`LICENSE.txt`](LICENSE.txt).
236
+
237
+ Aditya Mehrotra. Reach me at `adi1.mehrotra@gmail.com` or on
238
+ [LinkedIn](https://www.linkedin.com/in/aditya-mehrotra-).
239
+
240
+ <p align="right">(<a href="#readme-top">back to top</a>)</p>
241
+
242
+ [ci-shield]: https://github.com/adityakmehrotra/wordlesmith/actions/workflows/ci.yml/badge.svg
243
+ [ci-url]: https://github.com/adityakmehrotra/wordlesmith/actions/workflows/ci.yml
244
+ [python-shield]: https://img.shields.io/badge/python-3.10%2B-blue
245
+ [python-url]: https://www.python.org/downloads/
246
+ [license-shield]: https://img.shields.io/badge/license-MIT-green
247
+ [license-url]: https://github.com/adityakmehrotra/wordlesmith/blob/main/LICENSE.txt
@@ -0,0 +1,4 @@
1
+ strategy,guess_pool,num_words,average,median,max,fails,fail_pct,turns_1,turns_2,turns_3,turns_4,turns_5,turns_6,turns_fail,wall_seconds,seed,package_version
2
+ expected-size,all,2315,3.4812,3,5,0,0.0000,0,55,1130,1091,39,0,0,574.09,0,0.1.0
3
+ minimax,all,2315,3.5732,4,6,0,0.0000,1,53,990,1162,107,2,0,530.64,0,0.1.0
4
+ entropy,all,2315,3.4648,3,6,0,0.0000,0,44,1216,991,63,1,0,530.19,0,0.1.0
@@ -0,0 +1,5 @@
1
+ | strategy | pool | avg | median | max | fail% | time(s) |
2
+ | --- | --- | --- | --- | --- | --- | --- |
3
+ | expected-size | all | 3.481 | 3.0 | 5 | 0.00 | 574.1 |
4
+ | minimax | all | 3.573 | 4.0 | 6 | 0.00 | 530.6 |
5
+ | entropy | all | 3.465 | 3.0 | 6 | 0.00 | 530.2 |
@@ -0,0 +1,6 @@
1
+ strategy,guess_pool,num_words,average,median,max,fails,fail_pct,turns_1,turns_2,turns_3,turns_4,turns_5,turns_6,turns_fail,wall_seconds,seed,package_version
2
+ random,answers,2315,4.0393,4,7,19,0.8207,0,98,597,935,505,161,19,11.68,0,0.1.0
3
+ frequency,answers,2315,3.6397,4,7,14,0.6048,1,146,900,975,239,40,14,2.19,0,0.1.0
4
+ expected-size,answers,2315,3.6233,4,7,14,0.6048,1,131,957,946,224,42,14,6.59,0,0.1.0
5
+ minimax,answers,2315,3.6773,4,7,15,0.6479,1,122,880,1009,241,47,15,6.38,0,0.1.0
6
+ entropy,answers,2315,3.5983,4,7,11,0.4752,1,131,999,919,207,47,11,6.24,0,0.1.0